| OLD | NEW |
| 1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
| 2 .text | 2 .text |
| 3 | 3 |
| 4 | 4 |
| 5 | 5 |
| 6 .globl _bn_mul_mont_gather5 | 6 .globl _bn_mul_mont_gather5 |
| 7 .private_extern _bn_mul_mont_gather5 | 7 .private_extern _bn_mul_mont_gather5 |
| 8 | 8 |
| 9 .p2align 6 | 9 .p2align 6 |
| 10 _bn_mul_mont_gather5: | 10 _bn_mul_mont_gather5: |
| 11 testl $7,%r9d | 11 testl $7,%r9d |
| 12 jnz L$mul_enter | 12 jnz L$mul_enter |
| 13 jmp L$mul4x_enter | 13 jmp L$mul4x_enter |
| 14 | 14 |
| 15 .p2align 4 | 15 .p2align 4 |
| 16 L$mul_enter: | 16 L$mul_enter: |
| 17 movl %r9d,%r9d | 17 movl %r9d,%r9d |
| 18 movq %rsp,%rax | 18 movq %rsp,%rax |
| 19 » movl» 8(%rsp),%r10d | 19 » movd» 8(%rsp),%xmm5 |
| 20 » leaq» L$inc(%rip),%r10 |
| 20 pushq %rbx | 21 pushq %rbx |
| 21 pushq %rbp | 22 pushq %rbp |
| 22 pushq %r12 | 23 pushq %r12 |
| 23 pushq %r13 | 24 pushq %r13 |
| 24 pushq %r14 | 25 pushq %r14 |
| 25 pushq %r15 | 26 pushq %r15 |
| 27 |
| 26 leaq 2(%r9),%r11 | 28 leaq 2(%r9),%r11 |
| 27 negq %r11 | 29 negq %r11 |
| 28 » leaq» (%rsp,%r11,8),%rsp | 30 » leaq» -264(%rsp,%r11,8),%rsp |
| 29 andq $-1024,%rsp | 31 andq $-1024,%rsp |
| 30 | 32 |
| 31 movq %rax,8(%rsp,%r9,8) | 33 movq %rax,8(%rsp,%r9,8) |
| 32 L$mul_body: | 34 L$mul_body: |
| 33 » movq» %rdx,%r12 | 35 » leaq» 128(%rdx),%r12 |
| 34 » movq» %r10,%r11 | 36 » movdqa» 0(%r10),%xmm0 |
| 35 » shrq» $3,%r10 | 37 » movdqa» 16(%r10),%xmm1 |
| 36 » andq» $7,%r11 | 38 » leaq» 24-112(%rsp,%r9,8),%r10 |
| 37 » notq» %r10 | 39 » andq» $-16,%r10 |
| 38 » leaq» L$magic_masks(%rip),%rax | |
| 39 » andq» $3,%r10 | |
| 40 » leaq» 96(%r12,%r11,8),%r12 | |
| 41 » movq» 0(%rax,%r10,8),%xmm4 | |
| 42 » movq» 8(%rax,%r10,8),%xmm5 | |
| 43 » movq» 16(%rax,%r10,8),%xmm6 | |
| 44 » movq» 24(%rax,%r10,8),%xmm7 | |
| 45 | 40 |
| 46 » movq» -96(%r12),%xmm0 | 41 » pshufd» $0,%xmm5,%xmm5 |
| 47 » movq» -32(%r12),%xmm1 | 42 » movdqa» %xmm1,%xmm4 |
| 48 » pand» %xmm4,%xmm0 | 43 » movdqa» %xmm1,%xmm2 |
| 49 » movq» 32(%r12),%xmm2 | 44 » paddd» %xmm0,%xmm1 |
| 50 » pand» %xmm5,%xmm1 | 45 » pcmpeqd»%xmm5,%xmm0 |
| 51 » movq» 96(%r12),%xmm3 | 46 .byte» 0x67 |
| 52 » pand» %xmm6,%xmm2 | 47 » movdqa» %xmm4,%xmm3 |
| 48 » paddd» %xmm1,%xmm2 |
| 49 » pcmpeqd»%xmm5,%xmm1 |
| 50 » movdqa» %xmm0,112(%r10) |
| 51 » movdqa» %xmm4,%xmm0 |
| 52 |
| 53 » paddd» %xmm2,%xmm3 |
| 54 » pcmpeqd»%xmm5,%xmm2 |
| 55 » movdqa» %xmm1,128(%r10) |
| 56 » movdqa» %xmm4,%xmm1 |
| 57 |
| 58 » paddd» %xmm3,%xmm0 |
| 59 » pcmpeqd»%xmm5,%xmm3 |
| 60 » movdqa» %xmm2,144(%r10) |
| 61 » movdqa» %xmm4,%xmm2 |
| 62 |
| 63 » paddd» %xmm0,%xmm1 |
| 64 » pcmpeqd»%xmm5,%xmm0 |
| 65 » movdqa» %xmm3,160(%r10) |
| 66 » movdqa» %xmm4,%xmm3 |
| 67 » paddd» %xmm1,%xmm2 |
| 68 » pcmpeqd»%xmm5,%xmm1 |
| 69 » movdqa» %xmm0,176(%r10) |
| 70 » movdqa» %xmm4,%xmm0 |
| 71 |
| 72 » paddd» %xmm2,%xmm3 |
| 73 » pcmpeqd»%xmm5,%xmm2 |
| 74 » movdqa» %xmm1,192(%r10) |
| 75 » movdqa» %xmm4,%xmm1 |
| 76 |
| 77 » paddd» %xmm3,%xmm0 |
| 78 » pcmpeqd»%xmm5,%xmm3 |
| 79 » movdqa» %xmm2,208(%r10) |
| 80 » movdqa» %xmm4,%xmm2 |
| 81 |
| 82 » paddd» %xmm0,%xmm1 |
| 83 » pcmpeqd»%xmm5,%xmm0 |
| 84 » movdqa» %xmm3,224(%r10) |
| 85 » movdqa» %xmm4,%xmm3 |
| 86 » paddd» %xmm1,%xmm2 |
| 87 » pcmpeqd»%xmm5,%xmm1 |
| 88 » movdqa» %xmm0,240(%r10) |
| 89 » movdqa» %xmm4,%xmm0 |
| 90 |
| 91 » paddd» %xmm2,%xmm3 |
| 92 » pcmpeqd»%xmm5,%xmm2 |
| 93 » movdqa» %xmm1,256(%r10) |
| 94 » movdqa» %xmm4,%xmm1 |
| 95 |
| 96 » paddd» %xmm3,%xmm0 |
| 97 » pcmpeqd»%xmm5,%xmm3 |
| 98 » movdqa» %xmm2,272(%r10) |
| 99 » movdqa» %xmm4,%xmm2 |
| 100 |
| 101 » paddd» %xmm0,%xmm1 |
| 102 » pcmpeqd»%xmm5,%xmm0 |
| 103 » movdqa» %xmm3,288(%r10) |
| 104 » movdqa» %xmm4,%xmm3 |
| 105 » paddd» %xmm1,%xmm2 |
| 106 » pcmpeqd»%xmm5,%xmm1 |
| 107 » movdqa» %xmm0,304(%r10) |
| 108 |
| 109 » paddd» %xmm2,%xmm3 |
| 110 .byte» 0x67 |
| 111 » pcmpeqd»%xmm5,%xmm2 |
| 112 » movdqa» %xmm1,320(%r10) |
| 113 |
| 114 » pcmpeqd»%xmm5,%xmm3 |
| 115 » movdqa» %xmm2,336(%r10) |
| 116 » pand» 64(%r12),%xmm0 |
| 117 |
| 118 » pand» 80(%r12),%xmm1 |
| 119 » pand» 96(%r12),%xmm2 |
| 120 » movdqa» %xmm3,352(%r10) |
| 121 » pand» 112(%r12),%xmm3 |
| 122 » por» %xmm2,%xmm0 |
| 123 » por» %xmm3,%xmm1 |
| 124 » movdqa» -128(%r12),%xmm4 |
| 125 » movdqa» -112(%r12),%xmm5 |
| 126 » movdqa» -96(%r12),%xmm2 |
| 127 » pand» 112(%r10),%xmm4 |
| 128 » movdqa» -80(%r12),%xmm3 |
| 129 » pand» 128(%r10),%xmm5 |
| 130 » por» %xmm4,%xmm0 |
| 131 » pand» 144(%r10),%xmm2 |
| 132 » por» %xmm5,%xmm1 |
| 133 » pand» 160(%r10),%xmm3 |
| 134 » por» %xmm2,%xmm0 |
| 135 » por» %xmm3,%xmm1 |
| 136 » movdqa» -64(%r12),%xmm4 |
| 137 » movdqa» -48(%r12),%xmm5 |
| 138 » movdqa» -32(%r12),%xmm2 |
| 139 » pand» 176(%r10),%xmm4 |
| 140 » movdqa» -16(%r12),%xmm3 |
| 141 » pand» 192(%r10),%xmm5 |
| 142 » por» %xmm4,%xmm0 |
| 143 » pand» 208(%r10),%xmm2 |
| 144 » por» %xmm5,%xmm1 |
| 145 » pand» 224(%r10),%xmm3 |
| 146 » por» %xmm2,%xmm0 |
| 147 » por» %xmm3,%xmm1 |
| 148 » movdqa» 0(%r12),%xmm4 |
| 149 » movdqa» 16(%r12),%xmm5 |
| 150 » movdqa» 32(%r12),%xmm2 |
| 151 » pand» 240(%r10),%xmm4 |
| 152 » movdqa» 48(%r12),%xmm3 |
| 153 » pand» 256(%r10),%xmm5 |
| 154 » por» %xmm4,%xmm0 |
| 155 » pand» 272(%r10),%xmm2 |
| 156 » por» %xmm5,%xmm1 |
| 157 » pand» 288(%r10),%xmm3 |
| 158 » por» %xmm2,%xmm0 |
| 159 » por» %xmm3,%xmm1 |
| 53 por %xmm1,%xmm0 | 160 por %xmm1,%xmm0 |
| 54 » pand» %xmm7,%xmm3 | 161 » pshufd» $0x4e,%xmm0,%xmm1 |
| 55 » por» %xmm2,%xmm0 | 162 » por» %xmm1,%xmm0 |
| 56 leaq 256(%r12),%r12 | 163 leaq 256(%r12),%r12 |
| 57 por %xmm3,%xmm0 | |
| 58 | |
| 59 .byte 102,72,15,126,195 | 164 .byte 102,72,15,126,195 |
| 60 | 165 |
| 61 movq (%r8),%r8 | 166 movq (%r8),%r8 |
| 62 movq (%rsi),%rax | 167 movq (%rsi),%rax |
| 63 | 168 |
| 64 xorq %r14,%r14 | 169 xorq %r14,%r14 |
| 65 xorq %r15,%r15 | 170 xorq %r15,%r15 |
| 66 | 171 |
| 67 movq -96(%r12),%xmm0 | |
| 68 movq -32(%r12),%xmm1 | |
| 69 pand %xmm4,%xmm0 | |
| 70 movq 32(%r12),%xmm2 | |
| 71 pand %xmm5,%xmm1 | |
| 72 | |
| 73 movq %r8,%rbp | 172 movq %r8,%rbp |
| 74 mulq %rbx | 173 mulq %rbx |
| 75 movq %rax,%r10 | 174 movq %rax,%r10 |
| 76 movq (%rcx),%rax | 175 movq (%rcx),%rax |
| 77 | 176 |
| 78 movq 96(%r12),%xmm3 | |
| 79 pand %xmm6,%xmm2 | |
| 80 por %xmm1,%xmm0 | |
| 81 pand %xmm7,%xmm3 | |
| 82 | |
| 83 imulq %r10,%rbp | 177 imulq %r10,%rbp |
| 84 movq %rdx,%r11 | 178 movq %rdx,%r11 |
| 85 | 179 |
| 86 por %xmm2,%xmm0 | |
| 87 leaq 256(%r12),%r12 | |
| 88 por %xmm3,%xmm0 | |
| 89 | |
| 90 mulq %rbp | 180 mulq %rbp |
| 91 addq %rax,%r10 | 181 addq %rax,%r10 |
| 92 movq 8(%rsi),%rax | 182 movq 8(%rsi),%rax |
| 93 adcq $0,%rdx | 183 adcq $0,%rdx |
| 94 movq %rdx,%r13 | 184 movq %rdx,%r13 |
| 95 | 185 |
| 96 leaq 1(%r15),%r15 | 186 leaq 1(%r15),%r15 |
| 97 jmp L$1st_enter | 187 jmp L$1st_enter |
| 98 | 188 |
| 99 .p2align 4 | 189 .p2align 4 |
| (...skipping 12 matching lines...) Expand all Loading... |
| 112 addq %rax,%r11 | 202 addq %rax,%r11 |
| 113 movq (%rcx,%r15,8),%rax | 203 movq (%rcx,%r15,8),%rax |
| 114 adcq $0,%rdx | 204 adcq $0,%rdx |
| 115 leaq 1(%r15),%r15 | 205 leaq 1(%r15),%r15 |
| 116 movq %rdx,%r10 | 206 movq %rdx,%r10 |
| 117 | 207 |
| 118 mulq %rbp | 208 mulq %rbp |
| 119 cmpq %r9,%r15 | 209 cmpq %r9,%r15 |
| 120 jne L$1st | 210 jne L$1st |
| 121 | 211 |
| 122 .byte 102,72,15,126,195 | |
| 123 | 212 |
| 124 addq %rax,%r13 | 213 addq %rax,%r13 |
| 125 movq (%rsi),%rax | |
| 126 adcq $0,%rdx | 214 adcq $0,%rdx |
| 127 addq %r11,%r13 | 215 addq %r11,%r13 |
| 128 adcq $0,%rdx | 216 adcq $0,%rdx |
| 129 » movq» %r13,-16(%rsp,%r15,8) | 217 » movq» %r13,-16(%rsp,%r9,8) |
| 130 movq %rdx,%r13 | 218 movq %rdx,%r13 |
| 131 movq %r10,%r11 | 219 movq %r10,%r11 |
| 132 | 220 |
| 133 xorq %rdx,%rdx | 221 xorq %rdx,%rdx |
| 134 addq %r11,%r13 | 222 addq %r11,%r13 |
| 135 adcq $0,%rdx | 223 adcq $0,%rdx |
| 136 movq %r13,-8(%rsp,%r9,8) | 224 movq %r13,-8(%rsp,%r9,8) |
| 137 movq %rdx,(%rsp,%r9,8) | 225 movq %rdx,(%rsp,%r9,8) |
| 138 | 226 |
| 139 leaq 1(%r14),%r14 | 227 leaq 1(%r14),%r14 |
| 140 jmp L$outer | 228 jmp L$outer |
| 141 .p2align 4 | 229 .p2align 4 |
| 142 L$outer: | 230 L$outer: |
| 231 leaq 24+128(%rsp,%r9,8),%rdx |
| 232 andq $-16,%rdx |
| 233 pxor %xmm4,%xmm4 |
| 234 pxor %xmm5,%xmm5 |
| 235 movdqa -128(%r12),%xmm0 |
| 236 movdqa -112(%r12),%xmm1 |
| 237 movdqa -96(%r12),%xmm2 |
| 238 movdqa -80(%r12),%xmm3 |
| 239 pand -128(%rdx),%xmm0 |
| 240 pand -112(%rdx),%xmm1 |
| 241 por %xmm0,%xmm4 |
| 242 pand -96(%rdx),%xmm2 |
| 243 por %xmm1,%xmm5 |
| 244 pand -80(%rdx),%xmm3 |
| 245 por %xmm2,%xmm4 |
| 246 por %xmm3,%xmm5 |
| 247 movdqa -64(%r12),%xmm0 |
| 248 movdqa -48(%r12),%xmm1 |
| 249 movdqa -32(%r12),%xmm2 |
| 250 movdqa -16(%r12),%xmm3 |
| 251 pand -64(%rdx),%xmm0 |
| 252 pand -48(%rdx),%xmm1 |
| 253 por %xmm0,%xmm4 |
| 254 pand -32(%rdx),%xmm2 |
| 255 por %xmm1,%xmm5 |
| 256 pand -16(%rdx),%xmm3 |
| 257 por %xmm2,%xmm4 |
| 258 por %xmm3,%xmm5 |
| 259 movdqa 0(%r12),%xmm0 |
| 260 movdqa 16(%r12),%xmm1 |
| 261 movdqa 32(%r12),%xmm2 |
| 262 movdqa 48(%r12),%xmm3 |
| 263 pand 0(%rdx),%xmm0 |
| 264 pand 16(%rdx),%xmm1 |
| 265 por %xmm0,%xmm4 |
| 266 pand 32(%rdx),%xmm2 |
| 267 por %xmm1,%xmm5 |
| 268 pand 48(%rdx),%xmm3 |
| 269 por %xmm2,%xmm4 |
| 270 por %xmm3,%xmm5 |
| 271 movdqa 64(%r12),%xmm0 |
| 272 movdqa 80(%r12),%xmm1 |
| 273 movdqa 96(%r12),%xmm2 |
| 274 movdqa 112(%r12),%xmm3 |
| 275 pand 64(%rdx),%xmm0 |
| 276 pand 80(%rdx),%xmm1 |
| 277 por %xmm0,%xmm4 |
| 278 pand 96(%rdx),%xmm2 |
| 279 por %xmm1,%xmm5 |
| 280 pand 112(%rdx),%xmm3 |
| 281 por %xmm2,%xmm4 |
| 282 por %xmm3,%xmm5 |
| 283 por %xmm5,%xmm4 |
| 284 pshufd $0x4e,%xmm4,%xmm0 |
| 285 por %xmm4,%xmm0 |
| 286 leaq 256(%r12),%r12 |
| 287 |
| 288 movq (%rsi),%rax |
| 289 .byte 102,72,15,126,195 |
| 290 |
| 143 xorq %r15,%r15 | 291 xorq %r15,%r15 |
| 144 movq %r8,%rbp | 292 movq %r8,%rbp |
| 145 movq (%rsp),%r10 | 293 movq (%rsp),%r10 |
| 146 | 294 |
| 147 movq -96(%r12),%xmm0 | |
| 148 movq -32(%r12),%xmm1 | |
| 149 pand %xmm4,%xmm0 | |
| 150 movq 32(%r12),%xmm2 | |
| 151 pand %xmm5,%xmm1 | |
| 152 | |
| 153 mulq %rbx | 295 mulq %rbx |
| 154 addq %rax,%r10 | 296 addq %rax,%r10 |
| 155 movq (%rcx),%rax | 297 movq (%rcx),%rax |
| 156 adcq $0,%rdx | 298 adcq $0,%rdx |
| 157 | 299 |
| 158 movq 96(%r12),%xmm3 | |
| 159 pand %xmm6,%xmm2 | |
| 160 por %xmm1,%xmm0 | |
| 161 pand %xmm7,%xmm3 | |
| 162 | |
| 163 imulq %r10,%rbp | 300 imulq %r10,%rbp |
| 164 movq %rdx,%r11 | 301 movq %rdx,%r11 |
| 165 | 302 |
| 166 por %xmm2,%xmm0 | |
| 167 leaq 256(%r12),%r12 | |
| 168 por %xmm3,%xmm0 | |
| 169 | |
| 170 mulq %rbp | 303 mulq %rbp |
| 171 addq %rax,%r10 | 304 addq %rax,%r10 |
| 172 movq 8(%rsi),%rax | 305 movq 8(%rsi),%rax |
| 173 adcq $0,%rdx | 306 adcq $0,%rdx |
| 174 movq 8(%rsp),%r10 | 307 movq 8(%rsp),%r10 |
| 175 movq %rdx,%r13 | 308 movq %rdx,%r13 |
| 176 | 309 |
| 177 leaq 1(%r15),%r15 | 310 leaq 1(%r15),%r15 |
| 178 jmp L$inner_enter | 311 jmp L$inner_enter |
| 179 | 312 |
| (...skipping 15 matching lines...) Expand all Loading... |
| 195 adcq $0,%rdx | 328 adcq $0,%rdx |
| 196 addq %r11,%r10 | 329 addq %r11,%r10 |
| 197 movq %rdx,%r11 | 330 movq %rdx,%r11 |
| 198 adcq $0,%r11 | 331 adcq $0,%r11 |
| 199 leaq 1(%r15),%r15 | 332 leaq 1(%r15),%r15 |
| 200 | 333 |
| 201 mulq %rbp | 334 mulq %rbp |
| 202 cmpq %r9,%r15 | 335 cmpq %r9,%r15 |
| 203 jne L$inner | 336 jne L$inner |
| 204 | 337 |
| 205 .byte 102,72,15,126,195 | |
| 206 | |
| 207 addq %rax,%r13 | 338 addq %rax,%r13 |
| 208 movq (%rsi),%rax | |
| 209 adcq $0,%rdx | 339 adcq $0,%rdx |
| 210 addq %r10,%r13 | 340 addq %r10,%r13 |
| 211 » movq» (%rsp,%r15,8),%r10 | 341 » movq» (%rsp,%r9,8),%r10 |
| 212 adcq $0,%rdx | 342 adcq $0,%rdx |
| 213 » movq» %r13,-16(%rsp,%r15,8) | 343 » movq» %r13,-16(%rsp,%r9,8) |
| 214 movq %rdx,%r13 | 344 movq %rdx,%r13 |
| 215 | 345 |
| 216 xorq %rdx,%rdx | 346 xorq %rdx,%rdx |
| 217 addq %r11,%r13 | 347 addq %r11,%r13 |
| 218 adcq $0,%rdx | 348 adcq $0,%rdx |
| 219 addq %r10,%r13 | 349 addq %r10,%r13 |
| 220 adcq $0,%rdx | 350 adcq $0,%rdx |
| 221 movq %r13,-8(%rsp,%r9,8) | 351 movq %r13,-8(%rsp,%r9,8) |
| 222 movq %rdx,(%rsp,%r9,8) | 352 movq %rdx,(%rsp,%r9,8) |
| 223 | 353 |
| (...skipping 25 matching lines...) Expand all Loading... |
| 249 andq %rax,%rsi | 379 andq %rax,%rsi |
| 250 xorq %rcx,%rsi | 380 xorq %rcx,%rsi |
| 251 movq %r14,(%rsp,%r14,8) | 381 movq %r14,(%rsp,%r14,8) |
| 252 movq %rsi,(%rdi,%r14,8) | 382 movq %rsi,(%rdi,%r14,8) |
| 253 leaq 1(%r14),%r14 | 383 leaq 1(%r14),%r14 |
| 254 subq $1,%r15 | 384 subq $1,%r15 |
| 255 jnz L$copy | 385 jnz L$copy |
| 256 | 386 |
| 257 movq 8(%rsp,%r9,8),%rsi | 387 movq 8(%rsp,%r9,8),%rsi |
| 258 movq $1,%rax | 388 movq $1,%rax |
| 389 |
| 259 movq -48(%rsi),%r15 | 390 movq -48(%rsi),%r15 |
| 260 movq -40(%rsi),%r14 | 391 movq -40(%rsi),%r14 |
| 261 movq -32(%rsi),%r13 | 392 movq -32(%rsi),%r13 |
| 262 movq -24(%rsi),%r12 | 393 movq -24(%rsi),%r12 |
| 263 movq -16(%rsi),%rbp | 394 movq -16(%rsi),%rbp |
| 264 movq -8(%rsi),%rbx | 395 movq -8(%rsi),%rbx |
| 265 leaq (%rsi),%rsp | 396 leaq (%rsi),%rsp |
| 266 L$mul_epilogue: | 397 L$mul_epilogue: |
| 267 .byte 0xf3,0xc3 | 398 .byte 0xf3,0xc3 |
| 268 | 399 |
| 269 | 400 |
| 270 .p2align 5 | 401 .p2align 5 |
| 271 bn_mul4x_mont_gather5: | 402 bn_mul4x_mont_gather5: |
| 272 L$mul4x_enter: | 403 L$mul4x_enter: |
| 273 .byte 0x67 | 404 .byte 0x67 |
| 274 movq %rsp,%rax | 405 movq %rsp,%rax |
| 275 pushq %rbx | 406 pushq %rbx |
| 276 pushq %rbp | 407 pushq %rbp |
| 277 pushq %r12 | 408 pushq %r12 |
| 278 pushq %r13 | 409 pushq %r13 |
| 279 pushq %r14 | 410 pushq %r14 |
| 280 pushq %r15 | 411 pushq %r15 |
| 412 |
| 281 .byte 0x67 | 413 .byte 0x67 |
| 282 movl %r9d,%r10d | |
| 283 shll $3,%r9d | 414 shll $3,%r9d |
| 284 » shll» $3+2,%r10d | 415 » leaq» (%r9,%r9,2),%r10 |
| 285 negq %r9 | 416 negq %r9 |
| 286 | 417 |
| 287 | 418 |
| 288 | 419 |
| 289 | 420 |
| 290 | 421 |
| 291 | 422 |
| 292 | 423 |
| 293 | 424 |
| 294 » leaq» -64(%rsp,%r9,2),%r11 | 425 |
| 295 » subq» %rsi,%r11 | 426 |
| 427 » leaq» -320(%rsp,%r9,2),%r11 |
| 428 » subq» %rdi,%r11 |
| 296 andq $4095,%r11 | 429 andq $4095,%r11 |
| 297 cmpq %r11,%r10 | 430 cmpq %r11,%r10 |
| 298 jb L$mul4xsp_alt | 431 jb L$mul4xsp_alt |
| 299 subq %r11,%rsp | 432 subq %r11,%rsp |
| 300 » leaq» -64(%rsp,%r9,2),%rsp | 433 » leaq» -320(%rsp,%r9,2),%rsp |
| 301 jmp L$mul4xsp_done | 434 jmp L$mul4xsp_done |
| 302 | 435 |
| 303 .p2align 5 | 436 .p2align 5 |
| 304 L$mul4xsp_alt: | 437 L$mul4xsp_alt: |
| 305 » leaq» 4096-64(,%r9,2),%r10 | 438 » leaq» 4096-320(,%r9,2),%r10 |
| 306 » leaq» -64(%rsp,%r9,2),%rsp | 439 » leaq» -320(%rsp,%r9,2),%rsp |
| 307 subq %r10,%r11 | 440 subq %r10,%r11 |
| 308 movq $0,%r10 | 441 movq $0,%r10 |
| 309 cmovcq %r10,%r11 | 442 cmovcq %r10,%r11 |
| 310 subq %r11,%rsp | 443 subq %r11,%rsp |
| 311 L$mul4xsp_done: | 444 L$mul4xsp_done: |
| 312 andq $-64,%rsp | 445 andq $-64,%rsp |
| 313 negq %r9 | 446 negq %r9 |
| 314 | 447 |
| 315 movq %rax,40(%rsp) | 448 movq %rax,40(%rsp) |
| 316 L$mul4x_body: | 449 L$mul4x_body: |
| 317 | 450 |
| 318 call mul4x_internal | 451 call mul4x_internal |
| 319 | 452 |
| 320 movq 40(%rsp),%rsi | 453 movq 40(%rsp),%rsi |
| 321 movq $1,%rax | 454 movq $1,%rax |
| 455 |
| 322 movq -48(%rsi),%r15 | 456 movq -48(%rsi),%r15 |
| 323 movq -40(%rsi),%r14 | 457 movq -40(%rsi),%r14 |
| 324 movq -32(%rsi),%r13 | 458 movq -32(%rsi),%r13 |
| 325 movq -24(%rsi),%r12 | 459 movq -24(%rsi),%r12 |
| 326 movq -16(%rsi),%rbp | 460 movq -16(%rsi),%rbp |
| 327 movq -8(%rsi),%rbx | 461 movq -8(%rsi),%rbx |
| 328 leaq (%rsi),%rsp | 462 leaq (%rsi),%rsp |
| 329 L$mul4x_epilogue: | 463 L$mul4x_epilogue: |
| 330 .byte 0xf3,0xc3 | 464 .byte 0xf3,0xc3 |
| 331 | 465 |
| 332 | 466 |
| 333 | 467 |
| 334 .p2align 5 | 468 .p2align 5 |
| 335 mul4x_internal: | 469 mul4x_internal: |
| 336 shlq $5,%r9 | 470 shlq $5,%r9 |
| 337 » movl» 8(%rax),%r10d | 471 » movd» 8(%rax),%xmm5 |
| 338 » leaq» 256(%rdx,%r9,1),%r13 | 472 » leaq» L$inc(%rip),%rax |
| 473 » leaq» 128(%rdx,%r9,1),%r13 |
| 339 shrq $5,%r9 | 474 shrq $5,%r9 |
| 340 » movq» %r10,%r11 | 475 » movdqa» 0(%rax),%xmm0 |
| 341 » shrq» $3,%r10 | 476 » movdqa» 16(%rax),%xmm1 |
| 342 » andq» $7,%r11 | 477 » leaq» 88-112(%rsp,%r9,1),%r10 |
| 343 » notq» %r10 | 478 » leaq» 128(%rdx),%r12 |
| 344 » leaq» L$magic_masks(%rip),%rax | |
| 345 » andq» $3,%r10 | |
| 346 » leaq» 96(%rdx,%r11,8),%r12 | |
| 347 » movq» 0(%rax,%r10,8),%xmm4 | |
| 348 » movq» 8(%rax,%r10,8),%xmm5 | |
| 349 » addq» $7,%r11 | |
| 350 » movq» 16(%rax,%r10,8),%xmm6 | |
| 351 » movq» 24(%rax,%r10,8),%xmm7 | |
| 352 » andq» $7,%r11 | |
| 353 | 479 |
| 354 » movq» -96(%r12),%xmm0 | 480 » pshufd» $0,%xmm5,%xmm5 |
| 355 » leaq» 256(%r12),%r14 | 481 » movdqa» %xmm1,%xmm4 |
| 356 » movq» -32(%r12),%xmm1 | 482 .byte» 0x67,0x67 |
| 357 » pand» %xmm4,%xmm0 | 483 » movdqa» %xmm1,%xmm2 |
| 358 » movq» 32(%r12),%xmm2 | 484 » paddd» %xmm0,%xmm1 |
| 359 » pand» %xmm5,%xmm1 | 485 » pcmpeqd»%xmm5,%xmm0 |
| 360 » movq» 96(%r12),%xmm3 | |
| 361 » pand» %xmm6,%xmm2 | |
| 362 .byte 0x67 | 486 .byte 0x67 |
| 487 movdqa %xmm4,%xmm3 |
| 488 paddd %xmm1,%xmm2 |
| 489 pcmpeqd %xmm5,%xmm1 |
| 490 movdqa %xmm0,112(%r10) |
| 491 movdqa %xmm4,%xmm0 |
| 492 |
| 493 paddd %xmm2,%xmm3 |
| 494 pcmpeqd %xmm5,%xmm2 |
| 495 movdqa %xmm1,128(%r10) |
| 496 movdqa %xmm4,%xmm1 |
| 497 |
| 498 paddd %xmm3,%xmm0 |
| 499 pcmpeqd %xmm5,%xmm3 |
| 500 movdqa %xmm2,144(%r10) |
| 501 movdqa %xmm4,%xmm2 |
| 502 |
| 503 paddd %xmm0,%xmm1 |
| 504 pcmpeqd %xmm5,%xmm0 |
| 505 movdqa %xmm3,160(%r10) |
| 506 movdqa %xmm4,%xmm3 |
| 507 paddd %xmm1,%xmm2 |
| 508 pcmpeqd %xmm5,%xmm1 |
| 509 movdqa %xmm0,176(%r10) |
| 510 movdqa %xmm4,%xmm0 |
| 511 |
| 512 paddd %xmm2,%xmm3 |
| 513 pcmpeqd %xmm5,%xmm2 |
| 514 movdqa %xmm1,192(%r10) |
| 515 movdqa %xmm4,%xmm1 |
| 516 |
| 517 paddd %xmm3,%xmm0 |
| 518 pcmpeqd %xmm5,%xmm3 |
| 519 movdqa %xmm2,208(%r10) |
| 520 movdqa %xmm4,%xmm2 |
| 521 |
| 522 paddd %xmm0,%xmm1 |
| 523 pcmpeqd %xmm5,%xmm0 |
| 524 movdqa %xmm3,224(%r10) |
| 525 movdqa %xmm4,%xmm3 |
| 526 paddd %xmm1,%xmm2 |
| 527 pcmpeqd %xmm5,%xmm1 |
| 528 movdqa %xmm0,240(%r10) |
| 529 movdqa %xmm4,%xmm0 |
| 530 |
| 531 paddd %xmm2,%xmm3 |
| 532 pcmpeqd %xmm5,%xmm2 |
| 533 movdqa %xmm1,256(%r10) |
| 534 movdqa %xmm4,%xmm1 |
| 535 |
| 536 paddd %xmm3,%xmm0 |
| 537 pcmpeqd %xmm5,%xmm3 |
| 538 movdqa %xmm2,272(%r10) |
| 539 movdqa %xmm4,%xmm2 |
| 540 |
| 541 paddd %xmm0,%xmm1 |
| 542 pcmpeqd %xmm5,%xmm0 |
| 543 movdqa %xmm3,288(%r10) |
| 544 movdqa %xmm4,%xmm3 |
| 545 paddd %xmm1,%xmm2 |
| 546 pcmpeqd %xmm5,%xmm1 |
| 547 movdqa %xmm0,304(%r10) |
| 548 |
| 549 paddd %xmm2,%xmm3 |
| 550 .byte 0x67 |
| 551 pcmpeqd %xmm5,%xmm2 |
| 552 movdqa %xmm1,320(%r10) |
| 553 |
| 554 pcmpeqd %xmm5,%xmm3 |
| 555 movdqa %xmm2,336(%r10) |
| 556 pand 64(%r12),%xmm0 |
| 557 |
| 558 pand 80(%r12),%xmm1 |
| 559 pand 96(%r12),%xmm2 |
| 560 movdqa %xmm3,352(%r10) |
| 561 pand 112(%r12),%xmm3 |
| 562 por %xmm2,%xmm0 |
| 563 por %xmm3,%xmm1 |
| 564 movdqa -128(%r12),%xmm4 |
| 565 movdqa -112(%r12),%xmm5 |
| 566 movdqa -96(%r12),%xmm2 |
| 567 pand 112(%r10),%xmm4 |
| 568 movdqa -80(%r12),%xmm3 |
| 569 pand 128(%r10),%xmm5 |
| 570 por %xmm4,%xmm0 |
| 571 pand 144(%r10),%xmm2 |
| 572 por %xmm5,%xmm1 |
| 573 pand 160(%r10),%xmm3 |
| 574 por %xmm2,%xmm0 |
| 575 por %xmm3,%xmm1 |
| 576 movdqa -64(%r12),%xmm4 |
| 577 movdqa -48(%r12),%xmm5 |
| 578 movdqa -32(%r12),%xmm2 |
| 579 pand 176(%r10),%xmm4 |
| 580 movdqa -16(%r12),%xmm3 |
| 581 pand 192(%r10),%xmm5 |
| 582 por %xmm4,%xmm0 |
| 583 pand 208(%r10),%xmm2 |
| 584 por %xmm5,%xmm1 |
| 585 pand 224(%r10),%xmm3 |
| 586 por %xmm2,%xmm0 |
| 587 por %xmm3,%xmm1 |
| 588 movdqa 0(%r12),%xmm4 |
| 589 movdqa 16(%r12),%xmm5 |
| 590 movdqa 32(%r12),%xmm2 |
| 591 pand 240(%r10),%xmm4 |
| 592 movdqa 48(%r12),%xmm3 |
| 593 pand 256(%r10),%xmm5 |
| 594 por %xmm4,%xmm0 |
| 595 pand 272(%r10),%xmm2 |
| 596 por %xmm5,%xmm1 |
| 597 pand 288(%r10),%xmm3 |
| 598 por %xmm2,%xmm0 |
| 599 por %xmm3,%xmm1 |
| 363 por %xmm1,%xmm0 | 600 por %xmm1,%xmm0 |
| 364 » movq» -96(%r14),%xmm1 | 601 » pshufd» $0x4e,%xmm0,%xmm1 |
| 365 .byte» 0x67 | 602 » por» %xmm1,%xmm0 |
| 366 » pand» %xmm7,%xmm3 | 603 » leaq» 256(%r12),%r12 |
| 367 .byte» 0x67 | 604 .byte» 102,72,15,126,195 |
| 368 » por» %xmm2,%xmm0 | |
| 369 » movq» -32(%r14),%xmm2 | |
| 370 .byte» 0x67 | |
| 371 » pand» %xmm4,%xmm1 | |
| 372 .byte» 0x67 | |
| 373 » por» %xmm3,%xmm0 | |
| 374 » movq» 32(%r14),%xmm3 | |
| 375 | 605 |
| 376 .byte 102,72,15,126,195 | |
| 377 movq 96(%r14),%xmm0 | |
| 378 movq %r13,16+8(%rsp) | 606 movq %r13,16+8(%rsp) |
| 379 movq %rdi,56+8(%rsp) | 607 movq %rdi,56+8(%rsp) |
| 380 | 608 |
| 381 movq (%r8),%r8 | 609 movq (%r8),%r8 |
| 382 movq (%rsi),%rax | 610 movq (%rsi),%rax |
| 383 leaq (%rsi,%r9,1),%rsi | 611 leaq (%rsi,%r9,1),%rsi |
| 384 negq %r9 | 612 negq %r9 |
| 385 | 613 |
| 386 movq %r8,%rbp | 614 movq %r8,%rbp |
| 387 mulq %rbx | 615 mulq %rbx |
| 388 movq %rax,%r10 | 616 movq %rax,%r10 |
| 389 movq (%rcx),%rax | 617 movq (%rcx),%rax |
| 390 | 618 |
| 391 pand %xmm5,%xmm2 | |
| 392 pand %xmm6,%xmm3 | |
| 393 por %xmm2,%xmm1 | |
| 394 | |
| 395 imulq %r10,%rbp | 619 imulq %r10,%rbp |
| 396 | 620 » leaq» 64+8(%rsp),%r14 |
| 397 | |
| 398 | |
| 399 | |
| 400 | |
| 401 | |
| 402 | |
| 403 » leaq» 64+8(%rsp,%r11,8),%r14 | |
| 404 movq %rdx,%r11 | 621 movq %rdx,%r11 |
| 405 | 622 |
| 406 pand %xmm7,%xmm0 | |
| 407 por %xmm3,%xmm1 | |
| 408 leaq 512(%r12),%r12 | |
| 409 por %xmm1,%xmm0 | |
| 410 | |
| 411 mulq %rbp | 623 mulq %rbp |
| 412 addq %rax,%r10 | 624 addq %rax,%r10 |
| 413 movq 8(%rsi,%r9,1),%rax | 625 movq 8(%rsi,%r9,1),%rax |
| 414 adcq $0,%rdx | 626 adcq $0,%rdx |
| 415 movq %rdx,%rdi | 627 movq %rdx,%rdi |
| 416 | 628 |
| 417 mulq %rbx | 629 mulq %rbx |
| 418 addq %rax,%r11 | 630 addq %rax,%r11 |
| 419 » movq» 16(%rcx),%rax | 631 » movq» 8(%rcx),%rax |
| 420 adcq $0,%rdx | 632 adcq $0,%rdx |
| 421 movq %rdx,%r10 | 633 movq %rdx,%r10 |
| 422 | 634 |
| 423 mulq %rbp | 635 mulq %rbp |
| 424 addq %rax,%rdi | 636 addq %rax,%rdi |
| 425 movq 16(%rsi,%r9,1),%rax | 637 movq 16(%rsi,%r9,1),%rax |
| 426 adcq $0,%rdx | 638 adcq $0,%rdx |
| 427 addq %r11,%rdi | 639 addq %r11,%rdi |
| 428 leaq 32(%r9),%r15 | 640 leaq 32(%r9),%r15 |
| 429 » leaq» 64(%rcx),%rcx | 641 » leaq» 32(%rcx),%rcx |
| 430 adcq $0,%rdx | 642 adcq $0,%rdx |
| 431 movq %rdi,(%r14) | 643 movq %rdi,(%r14) |
| 432 movq %rdx,%r13 | 644 movq %rdx,%r13 |
| 433 jmp L$1st4x | 645 jmp L$1st4x |
| 434 | 646 |
| 435 .p2align 5 | 647 .p2align 5 |
| 436 L$1st4x: | 648 L$1st4x: |
| 437 mulq %rbx | 649 mulq %rbx |
| 438 addq %rax,%r10 | 650 addq %rax,%r10 |
| 439 » movq» -32(%rcx),%rax | 651 » movq» -16(%rcx),%rax |
| 440 leaq 32(%r14),%r14 | 652 leaq 32(%r14),%r14 |
| 441 adcq $0,%rdx | 653 adcq $0,%rdx |
| 442 movq %rdx,%r11 | 654 movq %rdx,%r11 |
| 443 | 655 |
| 444 mulq %rbp | 656 mulq %rbp |
| 445 addq %rax,%r13 | 657 addq %rax,%r13 |
| 446 movq -8(%rsi,%r15,1),%rax | 658 movq -8(%rsi,%r15,1),%rax |
| 447 adcq $0,%rdx | 659 adcq $0,%rdx |
| 448 addq %r10,%r13 | 660 addq %r10,%r13 |
| 449 adcq $0,%rdx | 661 adcq $0,%rdx |
| 450 movq %r13,-24(%r14) | 662 movq %r13,-24(%r14) |
| 451 movq %rdx,%rdi | 663 movq %rdx,%rdi |
| 452 | 664 |
| 453 mulq %rbx | 665 mulq %rbx |
| 454 addq %rax,%r11 | 666 addq %rax,%r11 |
| 455 » movq» -16(%rcx),%rax | 667 » movq» -8(%rcx),%rax |
| 456 adcq $0,%rdx | 668 adcq $0,%rdx |
| 457 movq %rdx,%r10 | 669 movq %rdx,%r10 |
| 458 | 670 |
| 459 mulq %rbp | 671 mulq %rbp |
| 460 addq %rax,%rdi | 672 addq %rax,%rdi |
| 461 movq (%rsi,%r15,1),%rax | 673 movq (%rsi,%r15,1),%rax |
| 462 adcq $0,%rdx | 674 adcq $0,%rdx |
| 463 addq %r11,%rdi | 675 addq %r11,%rdi |
| 464 adcq $0,%rdx | 676 adcq $0,%rdx |
| 465 movq %rdi,-16(%r14) | 677 movq %rdi,-16(%r14) |
| 466 movq %rdx,%r13 | 678 movq %rdx,%r13 |
| 467 | 679 |
| 468 mulq %rbx | 680 mulq %rbx |
| 469 addq %rax,%r10 | 681 addq %rax,%r10 |
| 470 movq 0(%rcx),%rax | 682 movq 0(%rcx),%rax |
| 471 adcq $0,%rdx | 683 adcq $0,%rdx |
| 472 movq %rdx,%r11 | 684 movq %rdx,%r11 |
| 473 | 685 |
| 474 mulq %rbp | 686 mulq %rbp |
| 475 addq %rax,%r13 | 687 addq %rax,%r13 |
| 476 movq 8(%rsi,%r15,1),%rax | 688 movq 8(%rsi,%r15,1),%rax |
| 477 adcq $0,%rdx | 689 adcq $0,%rdx |
| 478 addq %r10,%r13 | 690 addq %r10,%r13 |
| 479 adcq $0,%rdx | 691 adcq $0,%rdx |
| 480 movq %r13,-8(%r14) | 692 movq %r13,-8(%r14) |
| 481 movq %rdx,%rdi | 693 movq %rdx,%rdi |
| 482 | 694 |
| 483 mulq %rbx | 695 mulq %rbx |
| 484 addq %rax,%r11 | 696 addq %rax,%r11 |
| 485 » movq» 16(%rcx),%rax | 697 » movq» 8(%rcx),%rax |
| 486 adcq $0,%rdx | 698 adcq $0,%rdx |
| 487 movq %rdx,%r10 | 699 movq %rdx,%r10 |
| 488 | 700 |
| 489 mulq %rbp | 701 mulq %rbp |
| 490 addq %rax,%rdi | 702 addq %rax,%rdi |
| 491 movq 16(%rsi,%r15,1),%rax | 703 movq 16(%rsi,%r15,1),%rax |
| 492 adcq $0,%rdx | 704 adcq $0,%rdx |
| 493 addq %r11,%rdi | 705 addq %r11,%rdi |
| 494 » leaq» 64(%rcx),%rcx | 706 » leaq» 32(%rcx),%rcx |
| 495 adcq $0,%rdx | 707 adcq $0,%rdx |
| 496 movq %rdi,(%r14) | 708 movq %rdi,(%r14) |
| 497 movq %rdx,%r13 | 709 movq %rdx,%r13 |
| 498 | 710 |
| 499 addq $32,%r15 | 711 addq $32,%r15 |
| 500 jnz L$1st4x | 712 jnz L$1st4x |
| 501 | 713 |
| 502 mulq %rbx | 714 mulq %rbx |
| 503 addq %rax,%r10 | 715 addq %rax,%r10 |
| 504 » movq» -32(%rcx),%rax | 716 » movq» -16(%rcx),%rax |
| 505 leaq 32(%r14),%r14 | 717 leaq 32(%r14),%r14 |
| 506 adcq $0,%rdx | 718 adcq $0,%rdx |
| 507 movq %rdx,%r11 | 719 movq %rdx,%r11 |
| 508 | 720 |
| 509 mulq %rbp | 721 mulq %rbp |
| 510 addq %rax,%r13 | 722 addq %rax,%r13 |
| 511 movq -8(%rsi),%rax | 723 movq -8(%rsi),%rax |
| 512 adcq $0,%rdx | 724 adcq $0,%rdx |
| 513 addq %r10,%r13 | 725 addq %r10,%r13 |
| 514 adcq $0,%rdx | 726 adcq $0,%rdx |
| 515 movq %r13,-24(%r14) | 727 movq %r13,-24(%r14) |
| 516 movq %rdx,%rdi | 728 movq %rdx,%rdi |
| 517 | 729 |
| 518 mulq %rbx | 730 mulq %rbx |
| 519 addq %rax,%r11 | 731 addq %rax,%r11 |
| 520 » movq» -16(%rcx),%rax | 732 » movq» -8(%rcx),%rax |
| 521 adcq $0,%rdx | 733 adcq $0,%rdx |
| 522 movq %rdx,%r10 | 734 movq %rdx,%r10 |
| 523 | 735 |
| 524 mulq %rbp | 736 mulq %rbp |
| 525 addq %rax,%rdi | 737 addq %rax,%rdi |
| 526 movq (%rsi,%r9,1),%rax | 738 movq (%rsi,%r9,1),%rax |
| 527 adcq $0,%rdx | 739 adcq $0,%rdx |
| 528 addq %r11,%rdi | 740 addq %r11,%rdi |
| 529 adcq $0,%rdx | 741 adcq $0,%rdx |
| 530 movq %rdi,-16(%r14) | 742 movq %rdi,-16(%r14) |
| 531 movq %rdx,%r13 | 743 movq %rdx,%r13 |
| 532 | 744 |
| 533 .byte» 102,72,15,126,195 | 745 » leaq» (%rcx,%r9,1),%rcx |
| 534 » leaq» (%rcx,%r9,2),%rcx | |
| 535 | 746 |
| 536 xorq %rdi,%rdi | 747 xorq %rdi,%rdi |
| 537 addq %r10,%r13 | 748 addq %r10,%r13 |
| 538 adcq $0,%rdi | 749 adcq $0,%rdi |
| 539 movq %r13,-8(%r14) | 750 movq %r13,-8(%r14) |
| 540 | 751 |
| 541 jmp L$outer4x | 752 jmp L$outer4x |
| 542 | 753 |
| 543 .p2align 5 | 754 .p2align 5 |
| 544 L$outer4x: | 755 L$outer4x: |
| 756 leaq 16+128(%r14),%rdx |
| 757 pxor %xmm4,%xmm4 |
| 758 pxor %xmm5,%xmm5 |
| 759 movdqa -128(%r12),%xmm0 |
| 760 movdqa -112(%r12),%xmm1 |
| 761 movdqa -96(%r12),%xmm2 |
| 762 movdqa -80(%r12),%xmm3 |
| 763 pand -128(%rdx),%xmm0 |
| 764 pand -112(%rdx),%xmm1 |
| 765 por %xmm0,%xmm4 |
| 766 pand -96(%rdx),%xmm2 |
| 767 por %xmm1,%xmm5 |
| 768 pand -80(%rdx),%xmm3 |
| 769 por %xmm2,%xmm4 |
| 770 por %xmm3,%xmm5 |
| 771 movdqa -64(%r12),%xmm0 |
| 772 movdqa -48(%r12),%xmm1 |
| 773 movdqa -32(%r12),%xmm2 |
| 774 movdqa -16(%r12),%xmm3 |
| 775 pand -64(%rdx),%xmm0 |
| 776 pand -48(%rdx),%xmm1 |
| 777 por %xmm0,%xmm4 |
| 778 pand -32(%rdx),%xmm2 |
| 779 por %xmm1,%xmm5 |
| 780 pand -16(%rdx),%xmm3 |
| 781 por %xmm2,%xmm4 |
| 782 por %xmm3,%xmm5 |
| 783 movdqa 0(%r12),%xmm0 |
| 784 movdqa 16(%r12),%xmm1 |
| 785 movdqa 32(%r12),%xmm2 |
| 786 movdqa 48(%r12),%xmm3 |
| 787 pand 0(%rdx),%xmm0 |
| 788 pand 16(%rdx),%xmm1 |
| 789 por %xmm0,%xmm4 |
| 790 pand 32(%rdx),%xmm2 |
| 791 por %xmm1,%xmm5 |
| 792 pand 48(%rdx),%xmm3 |
| 793 por %xmm2,%xmm4 |
| 794 por %xmm3,%xmm5 |
| 795 movdqa 64(%r12),%xmm0 |
| 796 movdqa 80(%r12),%xmm1 |
| 797 movdqa 96(%r12),%xmm2 |
| 798 movdqa 112(%r12),%xmm3 |
| 799 pand 64(%rdx),%xmm0 |
| 800 pand 80(%rdx),%xmm1 |
| 801 por %xmm0,%xmm4 |
| 802 pand 96(%rdx),%xmm2 |
| 803 por %xmm1,%xmm5 |
| 804 pand 112(%rdx),%xmm3 |
| 805 por %xmm2,%xmm4 |
| 806 por %xmm3,%xmm5 |
| 807 por %xmm5,%xmm4 |
| 808 pshufd $0x4e,%xmm4,%xmm0 |
| 809 por %xmm4,%xmm0 |
| 810 leaq 256(%r12),%r12 |
| 811 .byte 102,72,15,126,195 |
| 812 |
| 545 movq (%r14,%r9,1),%r10 | 813 movq (%r14,%r9,1),%r10 |
| 546 movq %r8,%rbp | 814 movq %r8,%rbp |
| 547 mulq %rbx | 815 mulq %rbx |
| 548 addq %rax,%r10 | 816 addq %rax,%r10 |
| 549 movq (%rcx),%rax | 817 movq (%rcx),%rax |
| 550 adcq $0,%rdx | 818 adcq $0,%rdx |
| 551 | 819 |
| 552 movq -96(%r12),%xmm0 | |
| 553 movq -32(%r12),%xmm1 | |
| 554 pand %xmm4,%xmm0 | |
| 555 movq 32(%r12),%xmm2 | |
| 556 pand %xmm5,%xmm1 | |
| 557 movq 96(%r12),%xmm3 | |
| 558 | |
| 559 imulq %r10,%rbp | 820 imulq %r10,%rbp |
| 560 .byte 0x67 | |
| 561 movq %rdx,%r11 | 821 movq %rdx,%r11 |
| 562 movq %rdi,(%r14) | 822 movq %rdi,(%r14) |
| 563 | 823 |
| 564 pand %xmm6,%xmm2 | |
| 565 por %xmm1,%xmm0 | |
| 566 pand %xmm7,%xmm3 | |
| 567 por %xmm2,%xmm0 | |
| 568 leaq (%r14,%r9,1),%r14 | 824 leaq (%r14,%r9,1),%r14 |
| 569 leaq 256(%r12),%r12 | |
| 570 por %xmm3,%xmm0 | |
| 571 | 825 |
| 572 mulq %rbp | 826 mulq %rbp |
| 573 addq %rax,%r10 | 827 addq %rax,%r10 |
| 574 movq 8(%rsi,%r9,1),%rax | 828 movq 8(%rsi,%r9,1),%rax |
| 575 adcq $0,%rdx | 829 adcq $0,%rdx |
| 576 movq %rdx,%rdi | 830 movq %rdx,%rdi |
| 577 | 831 |
| 578 mulq %rbx | 832 mulq %rbx |
| 579 addq %rax,%r11 | 833 addq %rax,%r11 |
| 580 » movq» 16(%rcx),%rax | 834 » movq» 8(%rcx),%rax |
| 581 adcq $0,%rdx | 835 adcq $0,%rdx |
| 582 addq 8(%r14),%r11 | 836 addq 8(%r14),%r11 |
| 583 adcq $0,%rdx | 837 adcq $0,%rdx |
| 584 movq %rdx,%r10 | 838 movq %rdx,%r10 |
| 585 | 839 |
| 586 mulq %rbp | 840 mulq %rbp |
| 587 addq %rax,%rdi | 841 addq %rax,%rdi |
| 588 movq 16(%rsi,%r9,1),%rax | 842 movq 16(%rsi,%r9,1),%rax |
| 589 adcq $0,%rdx | 843 adcq $0,%rdx |
| 590 addq %r11,%rdi | 844 addq %r11,%rdi |
| 591 leaq 32(%r9),%r15 | 845 leaq 32(%r9),%r15 |
| 592 » leaq» 64(%rcx),%rcx | 846 » leaq» 32(%rcx),%rcx |
| 593 adcq $0,%rdx | 847 adcq $0,%rdx |
| 594 movq %rdx,%r13 | 848 movq %rdx,%r13 |
| 595 jmp L$inner4x | 849 jmp L$inner4x |
| 596 | 850 |
| 597 .p2align 5 | 851 .p2align 5 |
| 598 L$inner4x: | 852 L$inner4x: |
| 599 mulq %rbx | 853 mulq %rbx |
| 600 addq %rax,%r10 | 854 addq %rax,%r10 |
| 601 » movq» -32(%rcx),%rax | 855 » movq» -16(%rcx),%rax |
| 602 adcq $0,%rdx | 856 adcq $0,%rdx |
| 603 addq 16(%r14),%r10 | 857 addq 16(%r14),%r10 |
| 604 leaq 32(%r14),%r14 | 858 leaq 32(%r14),%r14 |
| 605 adcq $0,%rdx | 859 adcq $0,%rdx |
| 606 movq %rdx,%r11 | 860 movq %rdx,%r11 |
| 607 | 861 |
| 608 mulq %rbp | 862 mulq %rbp |
| 609 addq %rax,%r13 | 863 addq %rax,%r13 |
| 610 movq -8(%rsi,%r15,1),%rax | 864 movq -8(%rsi,%r15,1),%rax |
| 611 adcq $0,%rdx | 865 adcq $0,%rdx |
| 612 addq %r10,%r13 | 866 addq %r10,%r13 |
| 613 adcq $0,%rdx | 867 adcq $0,%rdx |
| 614 movq %rdi,-32(%r14) | 868 movq %rdi,-32(%r14) |
| 615 movq %rdx,%rdi | 869 movq %rdx,%rdi |
| 616 | 870 |
| 617 mulq %rbx | 871 mulq %rbx |
| 618 addq %rax,%r11 | 872 addq %rax,%r11 |
| 619 » movq» -16(%rcx),%rax | 873 » movq» -8(%rcx),%rax |
| 620 adcq $0,%rdx | 874 adcq $0,%rdx |
| 621 addq -8(%r14),%r11 | 875 addq -8(%r14),%r11 |
| 622 adcq $0,%rdx | 876 adcq $0,%rdx |
| 623 movq %rdx,%r10 | 877 movq %rdx,%r10 |
| 624 | 878 |
| 625 mulq %rbp | 879 mulq %rbp |
| 626 addq %rax,%rdi | 880 addq %rax,%rdi |
| 627 movq (%rsi,%r15,1),%rax | 881 movq (%rsi,%r15,1),%rax |
| 628 adcq $0,%rdx | 882 adcq $0,%rdx |
| 629 addq %r11,%rdi | 883 addq %r11,%rdi |
| (...skipping 13 matching lines...) Expand all Loading... |
| 643 addq %rax,%r13 | 897 addq %rax,%r13 |
| 644 movq 8(%rsi,%r15,1),%rax | 898 movq 8(%rsi,%r15,1),%rax |
| 645 adcq $0,%rdx | 899 adcq $0,%rdx |
| 646 addq %r10,%r13 | 900 addq %r10,%r13 |
| 647 adcq $0,%rdx | 901 adcq $0,%rdx |
| 648 movq %rdi,-16(%r14) | 902 movq %rdi,-16(%r14) |
| 649 movq %rdx,%rdi | 903 movq %rdx,%rdi |
| 650 | 904 |
| 651 mulq %rbx | 905 mulq %rbx |
| 652 addq %rax,%r11 | 906 addq %rax,%r11 |
| 653 » movq» 16(%rcx),%rax | 907 » movq» 8(%rcx),%rax |
| 654 adcq $0,%rdx | 908 adcq $0,%rdx |
| 655 addq 8(%r14),%r11 | 909 addq 8(%r14),%r11 |
| 656 adcq $0,%rdx | 910 adcq $0,%rdx |
| 657 movq %rdx,%r10 | 911 movq %rdx,%r10 |
| 658 | 912 |
| 659 mulq %rbp | 913 mulq %rbp |
| 660 addq %rax,%rdi | 914 addq %rax,%rdi |
| 661 movq 16(%rsi,%r15,1),%rax | 915 movq 16(%rsi,%r15,1),%rax |
| 662 adcq $0,%rdx | 916 adcq $0,%rdx |
| 663 addq %r11,%rdi | 917 addq %r11,%rdi |
| 664 » leaq» 64(%rcx),%rcx | 918 » leaq» 32(%rcx),%rcx |
| 665 adcq $0,%rdx | 919 adcq $0,%rdx |
| 666 movq %r13,-8(%r14) | 920 movq %r13,-8(%r14) |
| 667 movq %rdx,%r13 | 921 movq %rdx,%r13 |
| 668 | 922 |
| 669 addq $32,%r15 | 923 addq $32,%r15 |
| 670 jnz L$inner4x | 924 jnz L$inner4x |
| 671 | 925 |
| 672 mulq %rbx | 926 mulq %rbx |
| 673 addq %rax,%r10 | 927 addq %rax,%r10 |
| 674 » movq» -32(%rcx),%rax | 928 » movq» -16(%rcx),%rax |
| 675 adcq $0,%rdx | 929 adcq $0,%rdx |
| 676 addq 16(%r14),%r10 | 930 addq 16(%r14),%r10 |
| 677 leaq 32(%r14),%r14 | 931 leaq 32(%r14),%r14 |
| 678 adcq $0,%rdx | 932 adcq $0,%rdx |
| 679 movq %rdx,%r11 | 933 movq %rdx,%r11 |
| 680 | 934 |
| 681 mulq %rbp | 935 mulq %rbp |
| 682 addq %rax,%r13 | 936 addq %rax,%r13 |
| 683 movq -8(%rsi),%rax | 937 movq -8(%rsi),%rax |
| 684 adcq $0,%rdx | 938 adcq $0,%rdx |
| 685 addq %r10,%r13 | 939 addq %r10,%r13 |
| 686 adcq $0,%rdx | 940 adcq $0,%rdx |
| 687 movq %rdi,-32(%r14) | 941 movq %rdi,-32(%r14) |
| 688 movq %rdx,%rdi | 942 movq %rdx,%rdi |
| 689 | 943 |
| 690 mulq %rbx | 944 mulq %rbx |
| 691 addq %rax,%r11 | 945 addq %rax,%r11 |
| 692 movq %rbp,%rax | 946 movq %rbp,%rax |
| 693 » movq» -16(%rcx),%rbp | 947 » movq» -8(%rcx),%rbp |
| 694 adcq $0,%rdx | 948 adcq $0,%rdx |
| 695 addq -8(%r14),%r11 | 949 addq -8(%r14),%r11 |
| 696 adcq $0,%rdx | 950 adcq $0,%rdx |
| 697 movq %rdx,%r10 | 951 movq %rdx,%r10 |
| 698 | 952 |
| 699 mulq %rbp | 953 mulq %rbp |
| 700 addq %rax,%rdi | 954 addq %rax,%rdi |
| 701 movq (%rsi,%r9,1),%rax | 955 movq (%rsi,%r9,1),%rax |
| 702 adcq $0,%rdx | 956 adcq $0,%rdx |
| 703 addq %r11,%rdi | 957 addq %r11,%rdi |
| 704 adcq $0,%rdx | 958 adcq $0,%rdx |
| 705 movq %r13,-24(%r14) | 959 movq %r13,-24(%r14) |
| 706 movq %rdx,%r13 | 960 movq %rdx,%r13 |
| 707 | 961 |
| 708 .byte 102,72,15,126,195 | |
| 709 movq %rdi,-16(%r14) | 962 movq %rdi,-16(%r14) |
| 710 » leaq» (%rcx,%r9,2),%rcx | 963 » leaq» (%rcx,%r9,1),%rcx |
| 711 | 964 |
| 712 xorq %rdi,%rdi | 965 xorq %rdi,%rdi |
| 713 addq %r10,%r13 | 966 addq %r10,%r13 |
| 714 adcq $0,%rdi | 967 adcq $0,%rdi |
| 715 addq (%r14),%r13 | 968 addq (%r14),%r13 |
| 716 adcq $0,%rdi | 969 adcq $0,%rdi |
| 717 movq %r13,-8(%r14) | 970 movq %r13,-8(%r14) |
| 718 | 971 |
| 719 cmpq 16+8(%rsp),%r12 | 972 cmpq 16+8(%rsp),%r12 |
| 720 jb L$outer4x | 973 jb L$outer4x |
| 974 xorq %rax,%rax |
| 721 subq %r13,%rbp | 975 subq %r13,%rbp |
| 722 adcq %r15,%r15 | 976 adcq %r15,%r15 |
| 723 orq %r15,%rdi | 977 orq %r15,%rdi |
| 724 » xorq» $1,%rdi | 978 » subq» %rdi,%rax |
| 725 leaq (%r14,%r9,1),%rbx | 979 leaq (%r14,%r9,1),%rbx |
| 726 » leaq» (%rcx,%rdi,8),%rbp | 980 » movq» (%rcx),%r12 |
| 981 » leaq» (%rcx),%rbp |
| 727 movq %r9,%rcx | 982 movq %r9,%rcx |
| 728 sarq $3+2,%rcx | 983 sarq $3+2,%rcx |
| 729 movq 56+8(%rsp),%rdi | 984 movq 56+8(%rsp),%rdi |
| 730 » jmp» L$sqr4x_sub | 985 » decq» %r12 |
| 986 » xorq» %r10,%r10 |
| 987 » movq» 8(%rbp),%r13 |
| 988 » movq» 16(%rbp),%r14 |
| 989 » movq» 24(%rbp),%r15 |
| 990 » jmp» L$sqr4x_sub_entry |
| 731 | 991 |
| 732 .globl _bn_power5 | 992 .globl _bn_power5 |
| 733 .private_extern _bn_power5 | 993 .private_extern _bn_power5 |
| 734 | 994 |
| 735 .p2align 5 | 995 .p2align 5 |
| 736 _bn_power5: | 996 _bn_power5: |
| 737 movq %rsp,%rax | 997 movq %rsp,%rax |
| 738 pushq %rbx | 998 pushq %rbx |
| 739 pushq %rbp | 999 pushq %rbp |
| 740 pushq %r12 | 1000 pushq %r12 |
| 741 pushq %r13 | 1001 pushq %r13 |
| 742 pushq %r14 | 1002 pushq %r14 |
| 743 pushq %r15 | 1003 pushq %r15 |
| 744 » movl» %r9d,%r10d | 1004 |
| 745 shll $3,%r9d | 1005 shll $3,%r9d |
| 746 » shll» $3+2,%r10d | 1006 » leal» (%r9,%r9,2),%r10d |
| 747 negq %r9 | 1007 negq %r9 |
| 748 movq (%r8),%r8 | 1008 movq (%r8),%r8 |
| 749 | 1009 |
| 750 | 1010 |
| 751 | 1011 |
| 752 | 1012 |
| 753 | 1013 |
| 754 | 1014 |
| 755 | 1015 |
| 756 » leaq» -64(%rsp,%r9,2),%r11 | 1016 |
| 757 » subq» %rsi,%r11 | 1017 » leaq» -320(%rsp,%r9,2),%r11 |
| 1018 » subq» %rdi,%r11 |
| 758 andq $4095,%r11 | 1019 andq $4095,%r11 |
| 759 cmpq %r11,%r10 | 1020 cmpq %r11,%r10 |
| 760 jb L$pwr_sp_alt | 1021 jb L$pwr_sp_alt |
| 761 subq %r11,%rsp | 1022 subq %r11,%rsp |
| 762 » leaq» -64(%rsp,%r9,2),%rsp | 1023 » leaq» -320(%rsp,%r9,2),%rsp |
| 763 jmp L$pwr_sp_done | 1024 jmp L$pwr_sp_done |
| 764 | 1025 |
| 765 .p2align 5 | 1026 .p2align 5 |
| 766 L$pwr_sp_alt: | 1027 L$pwr_sp_alt: |
| 767 » leaq» 4096-64(,%r9,2),%r10 | 1028 » leaq» 4096-320(,%r9,2),%r10 |
| 768 » leaq» -64(%rsp,%r9,2),%rsp | 1029 » leaq» -320(%rsp,%r9,2),%rsp |
| 769 subq %r10,%r11 | 1030 subq %r10,%r11 |
| 770 movq $0,%r10 | 1031 movq $0,%r10 |
| 771 cmovcq %r10,%r11 | 1032 cmovcq %r10,%r11 |
| 772 subq %r11,%rsp | 1033 subq %r11,%rsp |
| 773 L$pwr_sp_done: | 1034 L$pwr_sp_done: |
| 774 andq $-64,%rsp | 1035 andq $-64,%rsp |
| 775 movq %r9,%r10 | 1036 movq %r9,%r10 |
| 776 negq %r9 | 1037 negq %r9 |
| 777 | 1038 |
| 778 | 1039 |
| 779 | 1040 |
| 780 | 1041 |
| 781 | 1042 |
| 782 | 1043 |
| 783 | 1044 |
| 784 | 1045 |
| 785 | 1046 |
| 786 | 1047 |
| 787 movq %r8,32(%rsp) | 1048 movq %r8,32(%rsp) |
| 788 movq %rax,40(%rsp) | 1049 movq %rax,40(%rsp) |
| 789 L$power5_body: | 1050 L$power5_body: |
| 790 .byte 102,72,15,110,207 | 1051 .byte 102,72,15,110,207 |
| 791 .byte 102,72,15,110,209 | 1052 .byte 102,72,15,110,209 |
| 792 .byte 102,73,15,110,218 | 1053 .byte 102,73,15,110,218 |
| 793 .byte 102,72,15,110,226 | 1054 .byte 102,72,15,110,226 |
| 794 | 1055 |
| 795 call __bn_sqr8x_internal | 1056 call __bn_sqr8x_internal |
| 1057 call __bn_post4x_internal |
| 796 call __bn_sqr8x_internal | 1058 call __bn_sqr8x_internal |
| 1059 call __bn_post4x_internal |
| 797 call __bn_sqr8x_internal | 1060 call __bn_sqr8x_internal |
| 1061 call __bn_post4x_internal |
| 798 call __bn_sqr8x_internal | 1062 call __bn_sqr8x_internal |
| 1063 call __bn_post4x_internal |
| 799 call __bn_sqr8x_internal | 1064 call __bn_sqr8x_internal |
| 1065 call __bn_post4x_internal |
| 800 | 1066 |
| 801 .byte 102,72,15,126,209 | 1067 .byte 102,72,15,126,209 |
| 802 .byte 102,72,15,126,226 | 1068 .byte 102,72,15,126,226 |
| 803 movq %rsi,%rdi | 1069 movq %rsi,%rdi |
| 804 movq 40(%rsp),%rax | 1070 movq 40(%rsp),%rax |
| 805 leaq 32(%rsp),%r8 | 1071 leaq 32(%rsp),%r8 |
| 806 | 1072 |
| 807 call mul4x_internal | 1073 call mul4x_internal |
| 808 | 1074 |
| 809 movq 40(%rsp),%rsi | 1075 movq 40(%rsp),%rsi |
| (...skipping 524 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1334 leaq (%rcx,%r11,2),%r8 | 1600 leaq (%rcx,%r11,2),%r8 |
| 1335 shrq $63,%r11 | 1601 shrq $63,%r11 |
| 1336 orq %r10,%r8 | 1602 orq %r10,%r8 |
| 1337 mulq %rax | 1603 mulq %rax |
| 1338 negq %r15 | 1604 negq %r15 |
| 1339 adcq %rax,%rbx | 1605 adcq %rax,%rbx |
| 1340 adcq %rdx,%r8 | 1606 adcq %rdx,%r8 |
| 1341 movq %rbx,-16(%rdi) | 1607 movq %rbx,-16(%rdi) |
| 1342 movq %r8,-8(%rdi) | 1608 movq %r8,-8(%rdi) |
| 1343 .byte 102,72,15,126,213 | 1609 .byte 102,72,15,126,213 |
| 1344 sqr8x_reduction: | 1610 __bn_sqr8x_reduction: |
| 1345 xorq %rax,%rax | 1611 xorq %rax,%rax |
| 1346 » leaq» (%rbp,%r9,2),%rcx | 1612 » leaq» (%r9,%rbp,1),%rcx |
| 1347 leaq 48+8(%rsp,%r9,2),%rdx | 1613 leaq 48+8(%rsp,%r9,2),%rdx |
| 1348 movq %rcx,0+8(%rsp) | 1614 movq %rcx,0+8(%rsp) |
| 1349 leaq 48+8(%rsp,%r9,1),%rdi | 1615 leaq 48+8(%rsp,%r9,1),%rdi |
| 1350 movq %rdx,8+8(%rsp) | 1616 movq %rdx,8+8(%rsp) |
| 1351 negq %r9 | 1617 negq %r9 |
| 1352 jmp L$8x_reduction_loop | 1618 jmp L$8x_reduction_loop |
| 1353 | 1619 |
| 1354 .p2align 5 | 1620 .p2align 5 |
| 1355 L$8x_reduction_loop: | 1621 L$8x_reduction_loop: |
| 1356 leaq (%rdi,%r9,1),%rdi | 1622 leaq (%rdi,%r9,1),%rdi |
| (...skipping 12 matching lines...) Expand all Loading... |
| 1369 .byte 0x67 | 1635 .byte 0x67 |
| 1370 movq %rbx,%r8 | 1636 movq %rbx,%r8 |
| 1371 imulq 32+8(%rsp),%rbx | 1637 imulq 32+8(%rsp),%rbx |
| 1372 movq 0(%rbp),%rax | 1638 movq 0(%rbp),%rax |
| 1373 movl $8,%ecx | 1639 movl $8,%ecx |
| 1374 jmp L$8x_reduce | 1640 jmp L$8x_reduce |
| 1375 | 1641 |
| 1376 .p2align 5 | 1642 .p2align 5 |
| 1377 L$8x_reduce: | 1643 L$8x_reduce: |
| 1378 mulq %rbx | 1644 mulq %rbx |
| 1379 » movq» 16(%rbp),%rax | 1645 » movq» 8(%rbp),%rax |
| 1380 negq %r8 | 1646 negq %r8 |
| 1381 movq %rdx,%r8 | 1647 movq %rdx,%r8 |
| 1382 adcq $0,%r8 | 1648 adcq $0,%r8 |
| 1383 | 1649 |
| 1384 mulq %rbx | 1650 mulq %rbx |
| 1385 addq %rax,%r9 | 1651 addq %rax,%r9 |
| 1386 » movq» 32(%rbp),%rax | 1652 » movq» 16(%rbp),%rax |
| 1387 adcq $0,%rdx | 1653 adcq $0,%rdx |
| 1388 addq %r9,%r8 | 1654 addq %r9,%r8 |
| 1389 movq %rbx,48-8+8(%rsp,%rcx,8) | 1655 movq %rbx,48-8+8(%rsp,%rcx,8) |
| 1390 movq %rdx,%r9 | 1656 movq %rdx,%r9 |
| 1391 adcq $0,%r9 | 1657 adcq $0,%r9 |
| 1392 | 1658 |
| 1393 mulq %rbx | 1659 mulq %rbx |
| 1394 addq %rax,%r10 | 1660 addq %rax,%r10 |
| 1395 » movq» 48(%rbp),%rax | 1661 » movq» 24(%rbp),%rax |
| 1396 adcq $0,%rdx | 1662 adcq $0,%rdx |
| 1397 addq %r10,%r9 | 1663 addq %r10,%r9 |
| 1398 movq 32+8(%rsp),%rsi | 1664 movq 32+8(%rsp),%rsi |
| 1399 movq %rdx,%r10 | 1665 movq %rdx,%r10 |
| 1400 adcq $0,%r10 | 1666 adcq $0,%r10 |
| 1401 | 1667 |
| 1402 mulq %rbx | 1668 mulq %rbx |
| 1403 addq %rax,%r11 | 1669 addq %rax,%r11 |
| 1404 » movq» 64(%rbp),%rax | 1670 » movq» 32(%rbp),%rax |
| 1405 adcq $0,%rdx | 1671 adcq $0,%rdx |
| 1406 imulq %r8,%rsi | 1672 imulq %r8,%rsi |
| 1407 addq %r11,%r10 | 1673 addq %r11,%r10 |
| 1408 movq %rdx,%r11 | 1674 movq %rdx,%r11 |
| 1409 adcq $0,%r11 | 1675 adcq $0,%r11 |
| 1410 | 1676 |
| 1411 mulq %rbx | 1677 mulq %rbx |
| 1412 addq %rax,%r12 | 1678 addq %rax,%r12 |
| 1413 » movq» 80(%rbp),%rax | 1679 » movq» 40(%rbp),%rax |
| 1414 adcq $0,%rdx | 1680 adcq $0,%rdx |
| 1415 addq %r12,%r11 | 1681 addq %r12,%r11 |
| 1416 movq %rdx,%r12 | 1682 movq %rdx,%r12 |
| 1417 adcq $0,%r12 | 1683 adcq $0,%r12 |
| 1418 | 1684 |
| 1419 mulq %rbx | 1685 mulq %rbx |
| 1420 addq %rax,%r13 | 1686 addq %rax,%r13 |
| 1421 » movq» 96(%rbp),%rax | 1687 » movq» 48(%rbp),%rax |
| 1422 adcq $0,%rdx | 1688 adcq $0,%rdx |
| 1423 addq %r13,%r12 | 1689 addq %r13,%r12 |
| 1424 movq %rdx,%r13 | 1690 movq %rdx,%r13 |
| 1425 adcq $0,%r13 | 1691 adcq $0,%r13 |
| 1426 | 1692 |
| 1427 mulq %rbx | 1693 mulq %rbx |
| 1428 addq %rax,%r14 | 1694 addq %rax,%r14 |
| 1429 » movq» 112(%rbp),%rax | 1695 » movq» 56(%rbp),%rax |
| 1430 adcq $0,%rdx | 1696 adcq $0,%rdx |
| 1431 addq %r14,%r13 | 1697 addq %r14,%r13 |
| 1432 movq %rdx,%r14 | 1698 movq %rdx,%r14 |
| 1433 adcq $0,%r14 | 1699 adcq $0,%r14 |
| 1434 | 1700 |
| 1435 mulq %rbx | 1701 mulq %rbx |
| 1436 movq %rsi,%rbx | 1702 movq %rsi,%rbx |
| 1437 addq %rax,%r15 | 1703 addq %rax,%r15 |
| 1438 movq 0(%rbp),%rax | 1704 movq 0(%rbp),%rax |
| 1439 adcq $0,%rdx | 1705 adcq $0,%rdx |
| 1440 addq %r15,%r14 | 1706 addq %r15,%r14 |
| 1441 movq %rdx,%r15 | 1707 movq %rdx,%r15 |
| 1442 adcq $0,%r15 | 1708 adcq $0,%r15 |
| 1443 | 1709 |
| 1444 decl %ecx | 1710 decl %ecx |
| 1445 jnz L$8x_reduce | 1711 jnz L$8x_reduce |
| 1446 | 1712 |
| 1447 » leaq» 128(%rbp),%rbp | 1713 » leaq» 64(%rbp),%rbp |
| 1448 xorq %rax,%rax | 1714 xorq %rax,%rax |
| 1449 movq 8+8(%rsp),%rdx | 1715 movq 8+8(%rsp),%rdx |
| 1450 cmpq 0+8(%rsp),%rbp | 1716 cmpq 0+8(%rsp),%rbp |
| 1451 jae L$8x_no_tail | 1717 jae L$8x_no_tail |
| 1452 | 1718 |
| 1453 .byte 0x66 | 1719 .byte 0x66 |
| 1454 addq 0(%rdi),%r8 | 1720 addq 0(%rdi),%r8 |
| 1455 adcq 8(%rdi),%r9 | 1721 adcq 8(%rdi),%r9 |
| 1456 adcq 16(%rdi),%r10 | 1722 adcq 16(%rdi),%r10 |
| 1457 adcq 24(%rdi),%r11 | 1723 adcq 24(%rdi),%r11 |
| 1458 adcq 32(%rdi),%r12 | 1724 adcq 32(%rdi),%r12 |
| 1459 adcq 40(%rdi),%r13 | 1725 adcq 40(%rdi),%r13 |
| 1460 adcq 48(%rdi),%r14 | 1726 adcq 48(%rdi),%r14 |
| 1461 adcq 56(%rdi),%r15 | 1727 adcq 56(%rdi),%r15 |
| 1462 sbbq %rsi,%rsi | 1728 sbbq %rsi,%rsi |
| 1463 | 1729 |
| 1464 movq 48+56+8(%rsp),%rbx | 1730 movq 48+56+8(%rsp),%rbx |
| 1465 movl $8,%ecx | 1731 movl $8,%ecx |
| 1466 movq 0(%rbp),%rax | 1732 movq 0(%rbp),%rax |
| 1467 jmp L$8x_tail | 1733 jmp L$8x_tail |
| 1468 | 1734 |
| 1469 .p2align 5 | 1735 .p2align 5 |
| 1470 L$8x_tail: | 1736 L$8x_tail: |
| 1471 mulq %rbx | 1737 mulq %rbx |
| 1472 addq %rax,%r8 | 1738 addq %rax,%r8 |
| 1473 » movq» 16(%rbp),%rax | 1739 » movq» 8(%rbp),%rax |
| 1474 movq %r8,(%rdi) | 1740 movq %r8,(%rdi) |
| 1475 movq %rdx,%r8 | 1741 movq %rdx,%r8 |
| 1476 adcq $0,%r8 | 1742 adcq $0,%r8 |
| 1477 | 1743 |
| 1478 mulq %rbx | 1744 mulq %rbx |
| 1479 addq %rax,%r9 | 1745 addq %rax,%r9 |
| 1480 » movq» 32(%rbp),%rax | 1746 » movq» 16(%rbp),%rax |
| 1481 adcq $0,%rdx | 1747 adcq $0,%rdx |
| 1482 addq %r9,%r8 | 1748 addq %r9,%r8 |
| 1483 leaq 8(%rdi),%rdi | 1749 leaq 8(%rdi),%rdi |
| 1484 movq %rdx,%r9 | 1750 movq %rdx,%r9 |
| 1485 adcq $0,%r9 | 1751 adcq $0,%r9 |
| 1486 | 1752 |
| 1487 mulq %rbx | 1753 mulq %rbx |
| 1488 addq %rax,%r10 | 1754 addq %rax,%r10 |
| 1489 » movq» 48(%rbp),%rax | 1755 » movq» 24(%rbp),%rax |
| 1490 adcq $0,%rdx | 1756 adcq $0,%rdx |
| 1491 addq %r10,%r9 | 1757 addq %r10,%r9 |
| 1492 movq %rdx,%r10 | 1758 movq %rdx,%r10 |
| 1493 adcq $0,%r10 | 1759 adcq $0,%r10 |
| 1494 | 1760 |
| 1495 mulq %rbx | 1761 mulq %rbx |
| 1496 addq %rax,%r11 | 1762 addq %rax,%r11 |
| 1497 » movq» 64(%rbp),%rax | 1763 » movq» 32(%rbp),%rax |
| 1498 adcq $0,%rdx | 1764 adcq $0,%rdx |
| 1499 addq %r11,%r10 | 1765 addq %r11,%r10 |
| 1500 movq %rdx,%r11 | 1766 movq %rdx,%r11 |
| 1501 adcq $0,%r11 | 1767 adcq $0,%r11 |
| 1502 | 1768 |
| 1503 mulq %rbx | 1769 mulq %rbx |
| 1504 addq %rax,%r12 | 1770 addq %rax,%r12 |
| 1505 » movq» 80(%rbp),%rax | 1771 » movq» 40(%rbp),%rax |
| 1506 adcq $0,%rdx | 1772 adcq $0,%rdx |
| 1507 addq %r12,%r11 | 1773 addq %r12,%r11 |
| 1508 movq %rdx,%r12 | 1774 movq %rdx,%r12 |
| 1509 adcq $0,%r12 | 1775 adcq $0,%r12 |
| 1510 | 1776 |
| 1511 mulq %rbx | 1777 mulq %rbx |
| 1512 addq %rax,%r13 | 1778 addq %rax,%r13 |
| 1513 » movq» 96(%rbp),%rax | 1779 » movq» 48(%rbp),%rax |
| 1514 adcq $0,%rdx | 1780 adcq $0,%rdx |
| 1515 addq %r13,%r12 | 1781 addq %r13,%r12 |
| 1516 movq %rdx,%r13 | 1782 movq %rdx,%r13 |
| 1517 adcq $0,%r13 | 1783 adcq $0,%r13 |
| 1518 | 1784 |
| 1519 mulq %rbx | 1785 mulq %rbx |
| 1520 addq %rax,%r14 | 1786 addq %rax,%r14 |
| 1521 » movq» 112(%rbp),%rax | 1787 » movq» 56(%rbp),%rax |
| 1522 adcq $0,%rdx | 1788 adcq $0,%rdx |
| 1523 addq %r14,%r13 | 1789 addq %r14,%r13 |
| 1524 movq %rdx,%r14 | 1790 movq %rdx,%r14 |
| 1525 adcq $0,%r14 | 1791 adcq $0,%r14 |
| 1526 | 1792 |
| 1527 mulq %rbx | 1793 mulq %rbx |
| 1528 movq 48-16+8(%rsp,%rcx,8),%rbx | 1794 movq 48-16+8(%rsp,%rcx,8),%rbx |
| 1529 addq %rax,%r15 | 1795 addq %rax,%r15 |
| 1530 adcq $0,%rdx | 1796 adcq $0,%rdx |
| 1531 addq %r15,%r14 | 1797 addq %r15,%r14 |
| 1532 movq 0(%rbp),%rax | 1798 movq 0(%rbp),%rax |
| 1533 movq %rdx,%r15 | 1799 movq %rdx,%r15 |
| 1534 adcq $0,%r15 | 1800 adcq $0,%r15 |
| 1535 | 1801 |
| 1536 decl %ecx | 1802 decl %ecx |
| 1537 jnz L$8x_tail | 1803 jnz L$8x_tail |
| 1538 | 1804 |
| 1539 » leaq» 128(%rbp),%rbp | 1805 » leaq» 64(%rbp),%rbp |
| 1540 movq 8+8(%rsp),%rdx | 1806 movq 8+8(%rsp),%rdx |
| 1541 cmpq 0+8(%rsp),%rbp | 1807 cmpq 0+8(%rsp),%rbp |
| 1542 jae L$8x_tail_done | 1808 jae L$8x_tail_done |
| 1543 | 1809 |
| 1544 movq 48+56+8(%rsp),%rbx | 1810 movq 48+56+8(%rsp),%rbx |
| 1545 negq %rsi | 1811 negq %rsi |
| 1546 movq 0(%rbp),%rax | 1812 movq 0(%rbp),%rax |
| 1547 adcq 0(%rdi),%r8 | 1813 adcq 0(%rdi),%r8 |
| 1548 adcq 8(%rdi),%r9 | 1814 adcq 8(%rdi),%r9 |
| 1549 adcq 16(%rdi),%r10 | 1815 adcq 16(%rdi),%r10 |
| 1550 adcq 24(%rdi),%r11 | 1816 adcq 24(%rdi),%r11 |
| 1551 adcq 32(%rdi),%r12 | 1817 adcq 32(%rdi),%r12 |
| 1552 adcq 40(%rdi),%r13 | 1818 adcq 40(%rdi),%r13 |
| 1553 adcq 48(%rdi),%r14 | 1819 adcq 48(%rdi),%r14 |
| 1554 adcq 56(%rdi),%r15 | 1820 adcq 56(%rdi),%r15 |
| 1555 sbbq %rsi,%rsi | 1821 sbbq %rsi,%rsi |
| 1556 | 1822 |
| 1557 movl $8,%ecx | 1823 movl $8,%ecx |
| 1558 jmp L$8x_tail | 1824 jmp L$8x_tail |
| 1559 | 1825 |
| 1560 .p2align 5 | 1826 .p2align 5 |
| 1561 L$8x_tail_done: | 1827 L$8x_tail_done: |
| 1562 addq (%rdx),%r8 | 1828 addq (%rdx),%r8 |
| 1829 adcq $0,%r9 |
| 1830 adcq $0,%r10 |
| 1831 adcq $0,%r11 |
| 1832 adcq $0,%r12 |
| 1833 adcq $0,%r13 |
| 1834 adcq $0,%r14 |
| 1835 adcq $0,%r15 |
| 1836 |
| 1837 |
| 1563 xorq %rax,%rax | 1838 xorq %rax,%rax |
| 1564 | 1839 |
| 1565 negq %rsi | 1840 negq %rsi |
| 1566 L$8x_no_tail: | 1841 L$8x_no_tail: |
| 1567 adcq 0(%rdi),%r8 | 1842 adcq 0(%rdi),%r8 |
| 1568 adcq 8(%rdi),%r9 | 1843 adcq 8(%rdi),%r9 |
| 1569 adcq 16(%rdi),%r10 | 1844 adcq 16(%rdi),%r10 |
| 1570 adcq 24(%rdi),%r11 | 1845 adcq 24(%rdi),%r11 |
| 1571 adcq 32(%rdi),%r12 | 1846 adcq 32(%rdi),%r12 |
| 1572 adcq 40(%rdi),%r13 | 1847 adcq 40(%rdi),%r13 |
| 1573 adcq 48(%rdi),%r14 | 1848 adcq 48(%rdi),%r14 |
| 1574 adcq 56(%rdi),%r15 | 1849 adcq 56(%rdi),%r15 |
| 1575 adcq $0,%rax | 1850 adcq $0,%rax |
| 1576 » movq» -16(%rbp),%rcx | 1851 » movq» -8(%rbp),%rcx |
| 1577 xorq %rsi,%rsi | 1852 xorq %rsi,%rsi |
| 1578 | 1853 |
| 1579 .byte 102,72,15,126,213 | 1854 .byte 102,72,15,126,213 |
| 1580 | 1855 |
| 1581 movq %r8,0(%rdi) | 1856 movq %r8,0(%rdi) |
| 1582 movq %r9,8(%rdi) | 1857 movq %r9,8(%rdi) |
| 1583 .byte 102,73,15,126,217 | 1858 .byte 102,73,15,126,217 |
| 1584 movq %r10,16(%rdi) | 1859 movq %r10,16(%rdi) |
| 1585 movq %r11,24(%rdi) | 1860 movq %r11,24(%rdi) |
| 1586 movq %r12,32(%rdi) | 1861 movq %r12,32(%rdi) |
| 1587 movq %r13,40(%rdi) | 1862 movq %r13,40(%rdi) |
| 1588 movq %r14,48(%rdi) | 1863 movq %r14,48(%rdi) |
| 1589 movq %r15,56(%rdi) | 1864 movq %r15,56(%rdi) |
| 1590 leaq 64(%rdi),%rdi | 1865 leaq 64(%rdi),%rdi |
| 1591 | 1866 |
| 1592 cmpq %rdx,%rdi | 1867 cmpq %rdx,%rdi |
| 1593 jb L$8x_reduction_loop | 1868 jb L$8x_reduction_loop |
| 1869 .byte 0xf3,0xc3 |
| 1594 | 1870 |
| 1595 subq %r15,%rcx | |
| 1596 leaq (%rdi,%r9,1),%rbx | |
| 1597 adcq %rsi,%rsi | |
| 1598 movq %r9,%rcx | |
| 1599 orq %rsi,%rax | |
| 1600 .byte 102,72,15,126,207 | |
| 1601 xorq $1,%rax | |
| 1602 .byte 102,72,15,126,206 | |
| 1603 leaq (%rbp,%rax,8),%rbp | |
| 1604 sarq $3+2,%rcx | |
| 1605 jmp L$sqr4x_sub | |
| 1606 | 1871 |
| 1607 .p2align 5 | 1872 .p2align 5 |
| 1873 __bn_post4x_internal: |
| 1874 movq 0(%rbp),%r12 |
| 1875 leaq (%rdi,%r9,1),%rbx |
| 1876 movq %r9,%rcx |
| 1877 .byte 102,72,15,126,207 |
| 1878 negq %rax |
| 1879 .byte 102,72,15,126,206 |
| 1880 sarq $3+2,%rcx |
| 1881 decq %r12 |
| 1882 xorq %r10,%r10 |
| 1883 movq 8(%rbp),%r13 |
| 1884 movq 16(%rbp),%r14 |
| 1885 movq 24(%rbp),%r15 |
| 1886 jmp L$sqr4x_sub_entry |
| 1887 |
| 1888 .p2align 4 |
| 1608 L$sqr4x_sub: | 1889 L$sqr4x_sub: |
| 1609 .byte» 0x66 | 1890 » movq» 0(%rbp),%r12 |
| 1610 » movq» 0(%rbx),%r12 | 1891 » movq» 8(%rbp),%r13 |
| 1611 » movq» 8(%rbx),%r13 | 1892 » movq» 16(%rbp),%r14 |
| 1612 » sbbq» 0(%rbp),%r12 | 1893 » movq» 24(%rbp),%r15 |
| 1613 » movq» 16(%rbx),%r14 | 1894 L$sqr4x_sub_entry: |
| 1614 » sbbq» 16(%rbp),%r13 | 1895 » leaq» 32(%rbp),%rbp |
| 1615 » movq» 24(%rbx),%r15 | 1896 » notq» %r12 |
| 1897 » notq» %r13 |
| 1898 » notq» %r14 |
| 1899 » notq» %r15 |
| 1900 » andq» %rax,%r12 |
| 1901 » andq» %rax,%r13 |
| 1902 » andq» %rax,%r14 |
| 1903 » andq» %rax,%r15 |
| 1904 |
| 1905 » negq» %r10 |
| 1906 » adcq» 0(%rbx),%r12 |
| 1907 » adcq» 8(%rbx),%r13 |
| 1908 » adcq» 16(%rbx),%r14 |
| 1909 » adcq» 24(%rbx),%r15 |
| 1910 » movq» %r12,0(%rdi) |
| 1616 leaq 32(%rbx),%rbx | 1911 leaq 32(%rbx),%rbx |
| 1617 sbbq 32(%rbp),%r14 | |
| 1618 movq %r12,0(%rdi) | |
| 1619 sbbq 48(%rbp),%r15 | |
| 1620 leaq 64(%rbp),%rbp | |
| 1621 movq %r13,8(%rdi) | 1912 movq %r13,8(%rdi) |
| 1913 sbbq %r10,%r10 |
| 1622 movq %r14,16(%rdi) | 1914 movq %r14,16(%rdi) |
| 1623 movq %r15,24(%rdi) | 1915 movq %r15,24(%rdi) |
| 1624 leaq 32(%rdi),%rdi | 1916 leaq 32(%rdi),%rdi |
| 1625 | 1917 |
| 1626 incq %rcx | 1918 incq %rcx |
| 1627 jnz L$sqr4x_sub | 1919 jnz L$sqr4x_sub |
| 1920 |
| 1628 movq %r9,%r10 | 1921 movq %r9,%r10 |
| 1629 negq %r9 | 1922 negq %r9 |
| 1630 .byte 0xf3,0xc3 | 1923 .byte 0xf3,0xc3 |
| 1631 | 1924 |
| 1632 .globl _bn_from_montgomery | 1925 .globl _bn_from_montgomery |
| 1633 .private_extern _bn_from_montgomery | 1926 .private_extern _bn_from_montgomery |
| 1634 | 1927 |
| 1635 .p2align 5 | 1928 .p2align 5 |
| 1636 _bn_from_montgomery: | 1929 _bn_from_montgomery: |
| 1637 testl $7,%r9d | 1930 testl $7,%r9d |
| 1638 jz bn_from_mont8x | 1931 jz bn_from_mont8x |
| 1639 xorl %eax,%eax | 1932 xorl %eax,%eax |
| 1640 .byte 0xf3,0xc3 | 1933 .byte 0xf3,0xc3 |
| 1641 | 1934 |
| 1642 | 1935 |
| 1643 | 1936 |
| 1644 .p2align 5 | 1937 .p2align 5 |
| 1645 bn_from_mont8x: | 1938 bn_from_mont8x: |
| 1646 .byte 0x67 | 1939 .byte 0x67 |
| 1647 movq %rsp,%rax | 1940 movq %rsp,%rax |
| 1648 pushq %rbx | 1941 pushq %rbx |
| 1649 pushq %rbp | 1942 pushq %rbp |
| 1650 pushq %r12 | 1943 pushq %r12 |
| 1651 pushq %r13 | 1944 pushq %r13 |
| 1652 pushq %r14 | 1945 pushq %r14 |
| 1653 pushq %r15 | 1946 pushq %r15 |
| 1654 .byte» 0x67 | 1947 |
| 1655 » movl» %r9d,%r10d | |
| 1656 shll $3,%r9d | 1948 shll $3,%r9d |
| 1657 » shll» $3+2,%r10d | 1949 » leaq» (%r9,%r9,2),%r10 |
| 1658 negq %r9 | 1950 negq %r9 |
| 1659 movq (%r8),%r8 | 1951 movq (%r8),%r8 |
| 1660 | 1952 |
| 1661 | 1953 |
| 1662 | 1954 |
| 1663 | 1955 |
| 1664 | 1956 |
| 1665 | 1957 |
| 1666 | 1958 |
| 1667 » leaq» -64(%rsp,%r9,2),%r11 | 1959 |
| 1668 » subq» %rsi,%r11 | 1960 » leaq» -320(%rsp,%r9,2),%r11 |
| 1961 » subq» %rdi,%r11 |
| 1669 andq $4095,%r11 | 1962 andq $4095,%r11 |
| 1670 cmpq %r11,%r10 | 1963 cmpq %r11,%r10 |
| 1671 jb L$from_sp_alt | 1964 jb L$from_sp_alt |
| 1672 subq %r11,%rsp | 1965 subq %r11,%rsp |
| 1673 » leaq» -64(%rsp,%r9,2),%rsp | 1966 » leaq» -320(%rsp,%r9,2),%rsp |
| 1674 jmp L$from_sp_done | 1967 jmp L$from_sp_done |
| 1675 | 1968 |
| 1676 .p2align 5 | 1969 .p2align 5 |
| 1677 L$from_sp_alt: | 1970 L$from_sp_alt: |
| 1678 » leaq» 4096-64(,%r9,2),%r10 | 1971 » leaq» 4096-320(,%r9,2),%r10 |
| 1679 » leaq» -64(%rsp,%r9,2),%rsp | 1972 » leaq» -320(%rsp,%r9,2),%rsp |
| 1680 subq %r10,%r11 | 1973 subq %r10,%r11 |
| 1681 movq $0,%r10 | 1974 movq $0,%r10 |
| 1682 cmovcq %r10,%r11 | 1975 cmovcq %r10,%r11 |
| 1683 subq %r11,%rsp | 1976 subq %r11,%rsp |
| 1684 L$from_sp_done: | 1977 L$from_sp_done: |
| 1685 andq $-64,%rsp | 1978 andq $-64,%rsp |
| 1686 movq %r9,%r10 | 1979 movq %r9,%r10 |
| 1687 negq %r9 | 1980 negq %r9 |
| 1688 | 1981 |
| 1689 | 1982 |
| (...skipping 30 matching lines...) Expand all Loading... |
| 1720 movdqa %xmm4,48(%rax) | 2013 movdqa %xmm4,48(%rax) |
| 1721 leaq 64(%rax),%rax | 2014 leaq 64(%rax),%rax |
| 1722 subq $64,%r11 | 2015 subq $64,%r11 |
| 1723 jnz L$mul_by_1 | 2016 jnz L$mul_by_1 |
| 1724 | 2017 |
| 1725 .byte 102,72,15,110,207 | 2018 .byte 102,72,15,110,207 |
| 1726 .byte 102,72,15,110,209 | 2019 .byte 102,72,15,110,209 |
| 1727 .byte 0x67 | 2020 .byte 0x67 |
| 1728 movq %rcx,%rbp | 2021 movq %rcx,%rbp |
| 1729 .byte 102,73,15,110,218 | 2022 .byte 102,73,15,110,218 |
| 1730 » call» sqr8x_reduction | 2023 » call» __bn_sqr8x_reduction |
| 2024 » call» __bn_post4x_internal |
| 1731 | 2025 |
| 1732 pxor %xmm0,%xmm0 | 2026 pxor %xmm0,%xmm0 |
| 1733 leaq 48(%rsp),%rax | 2027 leaq 48(%rsp),%rax |
| 1734 movq 40(%rsp),%rsi | 2028 movq 40(%rsp),%rsi |
| 1735 jmp L$from_mont_zero | 2029 jmp L$from_mont_zero |
| 1736 | 2030 |
| 1737 .p2align 5 | 2031 .p2align 5 |
| 1738 L$from_mont_zero: | 2032 L$from_mont_zero: |
| 1739 movdqa %xmm0,0(%rax) | 2033 movdqa %xmm0,0(%rax) |
| 1740 movdqa %xmm0,16(%rax) | 2034 movdqa %xmm0,16(%rax) |
| (...skipping 29 matching lines...) Expand all Loading... |
| 1770 leaq 256(%rdx),%rdx | 2064 leaq 256(%rdx),%rdx |
| 1771 subl $1,%esi | 2065 subl $1,%esi |
| 1772 jnz L$scatter | 2066 jnz L$scatter |
| 1773 L$scatter_epilogue: | 2067 L$scatter_epilogue: |
| 1774 .byte 0xf3,0xc3 | 2068 .byte 0xf3,0xc3 |
| 1775 | 2069 |
| 1776 | 2070 |
| 1777 .globl _bn_gather5 | 2071 .globl _bn_gather5 |
| 1778 .private_extern _bn_gather5 | 2072 .private_extern _bn_gather5 |
| 1779 | 2073 |
| 1780 .p2align» 4 | 2074 .p2align» 5 |
| 1781 _bn_gather5: | 2075 _bn_gather5: |
| 1782 » movl» %ecx,%r11d | 2076 L$SEH_begin_bn_gather5: |
| 1783 » shrl» $3,%ecx | 2077 |
| 1784 » andq» $7,%r11 | 2078 .byte» 0x4c,0x8d,0x14,0x24 |
| 1785 » notl» %ecx | 2079 .byte» 0x48,0x81,0xec,0x08,0x01,0x00,0x00 |
| 1786 » leaq» L$magic_masks(%rip),%rax | 2080 » leaq» L$inc(%rip),%rax |
| 1787 » andl» $3,%ecx | 2081 » andq» $-16,%rsp |
| 1788 » leaq» 128(%rdx,%r11,8),%rdx | 2082 |
| 1789 » movq» 0(%rax,%rcx,8),%xmm4 | 2083 » movd» %ecx,%xmm5 |
| 1790 » movq» 8(%rax,%rcx,8),%xmm5 | 2084 » movdqa» 0(%rax),%xmm0 |
| 1791 » movq» 16(%rax,%rcx,8),%xmm6 | 2085 » movdqa» 16(%rax),%xmm1 |
| 1792 » movq» 24(%rax,%rcx,8),%xmm7 | 2086 » leaq» 128(%rdx),%r11 |
| 2087 » leaq» 128(%rsp),%rax |
| 2088 |
| 2089 » pshufd» $0,%xmm5,%xmm5 |
| 2090 » movdqa» %xmm1,%xmm4 |
| 2091 » movdqa» %xmm1,%xmm2 |
| 2092 » paddd» %xmm0,%xmm1 |
| 2093 » pcmpeqd»%xmm5,%xmm0 |
| 2094 » movdqa» %xmm4,%xmm3 |
| 2095 |
| 2096 » paddd» %xmm1,%xmm2 |
| 2097 » pcmpeqd»%xmm5,%xmm1 |
| 2098 » movdqa» %xmm0,-128(%rax) |
| 2099 » movdqa» %xmm4,%xmm0 |
| 2100 |
| 2101 » paddd» %xmm2,%xmm3 |
| 2102 » pcmpeqd»%xmm5,%xmm2 |
| 2103 » movdqa» %xmm1,-112(%rax) |
| 2104 » movdqa» %xmm4,%xmm1 |
| 2105 |
| 2106 » paddd» %xmm3,%xmm0 |
| 2107 » pcmpeqd»%xmm5,%xmm3 |
| 2108 » movdqa» %xmm2,-96(%rax) |
| 2109 » movdqa» %xmm4,%xmm2 |
| 2110 » paddd» %xmm0,%xmm1 |
| 2111 » pcmpeqd»%xmm5,%xmm0 |
| 2112 » movdqa» %xmm3,-80(%rax) |
| 2113 » movdqa» %xmm4,%xmm3 |
| 2114 |
| 2115 » paddd» %xmm1,%xmm2 |
| 2116 » pcmpeqd»%xmm5,%xmm1 |
| 2117 » movdqa» %xmm0,-64(%rax) |
| 2118 » movdqa» %xmm4,%xmm0 |
| 2119 |
| 2120 » paddd» %xmm2,%xmm3 |
| 2121 » pcmpeqd»%xmm5,%xmm2 |
| 2122 » movdqa» %xmm1,-48(%rax) |
| 2123 » movdqa» %xmm4,%xmm1 |
| 2124 |
| 2125 » paddd» %xmm3,%xmm0 |
| 2126 » pcmpeqd»%xmm5,%xmm3 |
| 2127 » movdqa» %xmm2,-32(%rax) |
| 2128 » movdqa» %xmm4,%xmm2 |
| 2129 » paddd» %xmm0,%xmm1 |
| 2130 » pcmpeqd»%xmm5,%xmm0 |
| 2131 » movdqa» %xmm3,-16(%rax) |
| 2132 » movdqa» %xmm4,%xmm3 |
| 2133 |
| 2134 » paddd» %xmm1,%xmm2 |
| 2135 » pcmpeqd»%xmm5,%xmm1 |
| 2136 » movdqa» %xmm0,0(%rax) |
| 2137 » movdqa» %xmm4,%xmm0 |
| 2138 |
| 2139 » paddd» %xmm2,%xmm3 |
| 2140 » pcmpeqd»%xmm5,%xmm2 |
| 2141 » movdqa» %xmm1,16(%rax) |
| 2142 » movdqa» %xmm4,%xmm1 |
| 2143 |
| 2144 » paddd» %xmm3,%xmm0 |
| 2145 » pcmpeqd»%xmm5,%xmm3 |
| 2146 » movdqa» %xmm2,32(%rax) |
| 2147 » movdqa» %xmm4,%xmm2 |
| 2148 » paddd» %xmm0,%xmm1 |
| 2149 » pcmpeqd»%xmm5,%xmm0 |
| 2150 » movdqa» %xmm3,48(%rax) |
| 2151 » movdqa» %xmm4,%xmm3 |
| 2152 |
| 2153 » paddd» %xmm1,%xmm2 |
| 2154 » pcmpeqd»%xmm5,%xmm1 |
| 2155 » movdqa» %xmm0,64(%rax) |
| 2156 » movdqa» %xmm4,%xmm0 |
| 2157 |
| 2158 » paddd» %xmm2,%xmm3 |
| 2159 » pcmpeqd»%xmm5,%xmm2 |
| 2160 » movdqa» %xmm1,80(%rax) |
| 2161 » movdqa» %xmm4,%xmm1 |
| 2162 |
| 2163 » paddd» %xmm3,%xmm0 |
| 2164 » pcmpeqd»%xmm5,%xmm3 |
| 2165 » movdqa» %xmm2,96(%rax) |
| 2166 » movdqa» %xmm4,%xmm2 |
| 2167 » movdqa» %xmm3,112(%rax) |
| 1793 jmp L$gather | 2168 jmp L$gather |
| 1794 .p2align» 4 | 2169 |
| 2170 .p2align» 5 |
| 1795 L$gather: | 2171 L$gather: |
| 1796 » movq» -128(%rdx),%xmm0 | 2172 » pxor» %xmm4,%xmm4 |
| 1797 » movq» -64(%rdx),%xmm1 | 2173 » pxor» %xmm5,%xmm5 |
| 1798 » pand» %xmm4,%xmm0 | 2174 » movdqa» -128(%r11),%xmm0 |
| 1799 » movq» 0(%rdx),%xmm2 | 2175 » movdqa» -112(%r11),%xmm1 |
| 1800 » pand» %xmm5,%xmm1 | 2176 » movdqa» -96(%r11),%xmm2 |
| 1801 » movq» 64(%rdx),%xmm3 | 2177 » pand» -128(%rax),%xmm0 |
| 1802 » pand» %xmm6,%xmm2 | 2178 » movdqa» -80(%r11),%xmm3 |
| 1803 » por» %xmm1,%xmm0 | 2179 » pand» -112(%rax),%xmm1 |
| 1804 » pand» %xmm7,%xmm3 | 2180 » por» %xmm0,%xmm4 |
| 1805 .byte» 0x67,0x67 | 2181 » pand» -96(%rax),%xmm2 |
| 1806 » por» %xmm2,%xmm0 | 2182 » por» %xmm1,%xmm5 |
| 1807 » leaq» 256(%rdx),%rdx | 2183 » pand» -80(%rax),%xmm3 |
| 1808 » por» %xmm3,%xmm0 | 2184 » por» %xmm2,%xmm4 |
| 1809 | 2185 » por» %xmm3,%xmm5 |
| 2186 » movdqa» -64(%r11),%xmm0 |
| 2187 » movdqa» -48(%r11),%xmm1 |
| 2188 » movdqa» -32(%r11),%xmm2 |
| 2189 » pand» -64(%rax),%xmm0 |
| 2190 » movdqa» -16(%r11),%xmm3 |
| 2191 » pand» -48(%rax),%xmm1 |
| 2192 » por» %xmm0,%xmm4 |
| 2193 » pand» -32(%rax),%xmm2 |
| 2194 » por» %xmm1,%xmm5 |
| 2195 » pand» -16(%rax),%xmm3 |
| 2196 » por» %xmm2,%xmm4 |
| 2197 » por» %xmm3,%xmm5 |
| 2198 » movdqa» 0(%r11),%xmm0 |
| 2199 » movdqa» 16(%r11),%xmm1 |
| 2200 » movdqa» 32(%r11),%xmm2 |
| 2201 » pand» 0(%rax),%xmm0 |
| 2202 » movdqa» 48(%r11),%xmm3 |
| 2203 » pand» 16(%rax),%xmm1 |
| 2204 » por» %xmm0,%xmm4 |
| 2205 » pand» 32(%rax),%xmm2 |
| 2206 » por» %xmm1,%xmm5 |
| 2207 » pand» 48(%rax),%xmm3 |
| 2208 » por» %xmm2,%xmm4 |
| 2209 » por» %xmm3,%xmm5 |
| 2210 » movdqa» 64(%r11),%xmm0 |
| 2211 » movdqa» 80(%r11),%xmm1 |
| 2212 » movdqa» 96(%r11),%xmm2 |
| 2213 » pand» 64(%rax),%xmm0 |
| 2214 » movdqa» 112(%r11),%xmm3 |
| 2215 » pand» 80(%rax),%xmm1 |
| 2216 » por» %xmm0,%xmm4 |
| 2217 » pand» 96(%rax),%xmm2 |
| 2218 » por» %xmm1,%xmm5 |
| 2219 » pand» 112(%rax),%xmm3 |
| 2220 » por» %xmm2,%xmm4 |
| 2221 » por» %xmm3,%xmm5 |
| 2222 » por» %xmm5,%xmm4 |
| 2223 » leaq» 256(%r11),%r11 |
| 2224 » pshufd» $0x4e,%xmm4,%xmm0 |
| 2225 » por» %xmm4,%xmm0 |
| 1810 movq %xmm0,(%rdi) | 2226 movq %xmm0,(%rdi) |
| 1811 leaq 8(%rdi),%rdi | 2227 leaq 8(%rdi),%rdi |
| 1812 subl $1,%esi | 2228 subl $1,%esi |
| 1813 jnz L$gather | 2229 jnz L$gather |
| 2230 |
| 2231 leaq (%r10),%rsp |
| 1814 .byte 0xf3,0xc3 | 2232 .byte 0xf3,0xc3 |
| 1815 L$SEH_end_bn_gather5: | 2233 L$SEH_end_bn_gather5: |
| 1816 | 2234 |
| 1817 .p2align 6 | 2235 .p2align 6 |
| 1818 L$magic_masks: | 2236 L$inc: |
| 1819 .long» 0,0, 0,0, 0,0, -1,-1 | 2237 .long» 0,0, 1,1 |
| 1820 .long» 0,0, 0,0, 0,0, 0,0 | 2238 .long» 2,2, 2,2 |
| 1821 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105
,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97
,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71
,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,1
11,114,103,62,0 | 2239 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105
,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97
,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71
,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,1
11,114,103,62,0 |
| 1822 #endif | 2240 #endif |
| OLD | NEW |