| Index: gcc/gmp/mpn/x86_64/aorsmul_1.asm
|
| diff --git a/gcc/gmp/mpn/x86_64/aorsmul_1.asm b/gcc/gmp/mpn/x86_64/aorsmul_1.asm
|
| deleted file mode 100644
|
| index a25c74ebdcfd072255bc61d4d26b411f82a61b43..0000000000000000000000000000000000000000
|
| --- a/gcc/gmp/mpn/x86_64/aorsmul_1.asm
|
| +++ /dev/null
|
| @@ -1,147 +0,0 @@
|
| -dnl AMD64 mpn_addmul_1 and mpn_submul_1.
|
| -
|
| -dnl Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
|
| -
|
| -dnl This file is part of the GNU MP Library.
|
| -
|
| -dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
| -dnl it under the terms of the GNU Lesser General Public License as published
|
| -dnl by the Free Software Foundation; either version 3 of the License, or (at
|
| -dnl your option) any later version.
|
| -
|
| -dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
| -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
| -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
| -dnl License for more details.
|
| -
|
| -dnl You should have received a copy of the GNU Lesser General Public License
|
| -dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
|
| -
|
| -include(`../config.m4')
|
| -
|
| -C cycles/limb
|
| -C K8,K9: 2.5
|
| -C K10: 2.5
|
| -C P4: 14.9
|
| -C P6-15 (Core2): 5.09
|
| -C P6-28 (Atom): 21.3
|
| -
|
| -C The inner loop of this code is the result of running a code generation and
|
| -C optimization tool suite written by David Harvey and Torbjorn Granlund.
|
| -
|
| -C TODO:
|
| -C * The inner loop is great, but the prologue and epilogue code was
|
| -C quickly written. Tune it!
|
| -
|
| -C INPUT PARAMETERS
|
| -define(`rp', `%rdi')
|
| -define(`up', `%rsi')
|
| -define(`n_param',`%rdx')
|
| -define(`vl', `%rcx')
|
| -
|
| -define(`n', `%r11')
|
| -
|
| -ifdef(`OPERATION_addmul_1',`
|
| - define(`ADDSUB', `add')
|
| - define(`func', `mpn_addmul_1')
|
| -')
|
| -ifdef(`OPERATION_submul_1',`
|
| - define(`ADDSUB', `sub')
|
| - define(`func', `mpn_submul_1')
|
| -')
|
| -
|
| -MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
|
| -
|
| -ASM_START()
|
| - TEXT
|
| - ALIGN(16)
|
| -PROLOGUE(func)
|
| - mov (up), %rax C read first u limb early
|
| - push %rbx
|
| - mov n_param, %rbx C move away n from rdx, mul uses it
|
| - mul vl
|
| - mov %rbx, %r11
|
| -
|
| - and $3, R32(%rbx)
|
| - jz L(b0)
|
| - cmp $2, R32(%rbx)
|
| - jz L(b2)
|
| - jg L(b3)
|
| -
|
| -L(b1): dec n
|
| - jne L(gt1)
|
| - ADDSUB %rax, (rp)
|
| - jmp L(ret)
|
| -L(gt1): lea 8(up,n,8), up
|
| - lea -8(rp,n,8), rp
|
| - neg n
|
| - xor %r10, %r10
|
| - xor R32(%rbx), R32(%rbx)
|
| - mov %rax, %r9
|
| - mov (up,n,8), %rax
|
| - mov %rdx, %r8
|
| - jmp L(L1)
|
| -
|
| -L(b0): lea (up,n,8), up
|
| - lea -16(rp,n,8), rp
|
| - neg n
|
| - xor %r10, %r10
|
| - mov %rax, %r8
|
| - mov %rdx, %rbx
|
| - jmp L(L0)
|
| -
|
| -L(b3): lea -8(up,n,8), up
|
| - lea -24(rp,n,8), rp
|
| - neg n
|
| - mov %rax, %rbx
|
| - mov %rdx, %r10
|
| - jmp L(L3)
|
| -
|
| -L(b2): lea -16(up,n,8), up
|
| - lea -32(rp,n,8), rp
|
| - neg n
|
| - xor %r8, %r8
|
| - xor R32(%rbx), R32(%rbx)
|
| - mov %rax, %r10
|
| - mov 24(up,n,8), %rax
|
| - mov %rdx, %r9
|
| - jmp L(L2)
|
| -
|
| - ALIGN(16)
|
| -L(top): ADDSUB %r10, (rp,n,8)
|
| - adc %rax, %r9
|
| - mov (up,n,8), %rax
|
| - adc %rdx, %r8
|
| - mov $0, %r10d
|
| -L(L1): mul vl
|
| - ADDSUB %r9, 8(rp,n,8)
|
| - adc %rax, %r8
|
| - adc %rdx, %rbx
|
| -L(L0): mov 8(up,n,8), %rax
|
| - mul vl
|
| - ADDSUB %r8, 16(rp,n,8)
|
| - adc %rax, %rbx
|
| - adc %rdx, %r10
|
| -L(L3): mov 16(up,n,8), %rax
|
| - mul vl
|
| - ADDSUB %rbx, 24(rp,n,8)
|
| - mov $0, %r8d # zero
|
| - mov %r8, %rbx # zero
|
| - adc %rax, %r10
|
| - mov 24(up,n,8), %rax
|
| - mov %r8, %r9 # zero
|
| - adc %rdx, %r9
|
| -L(L2): mul vl
|
| - add $4, n
|
| - js L(top)
|
| -
|
| - ADDSUB %r10, (rp,n,8)
|
| - adc %rax, %r9
|
| - adc %r8, %rdx
|
| - ADDSUB %r9, 8(rp,n,8)
|
| -L(ret): adc $0, %rdx
|
| - mov %rdx, %rax
|
| -
|
| - pop %rbx
|
| - ret
|
| -EPILOGUE()
|
|
|