Index: gcc/gmp/mpn/x86_64/aorsmul_1.asm |
diff --git a/gcc/gmp/mpn/x86_64/aorsmul_1.asm b/gcc/gmp/mpn/x86_64/aorsmul_1.asm |
deleted file mode 100644 |
index a25c74ebdcfd072255bc61d4d26b411f82a61b43..0000000000000000000000000000000000000000 |
--- a/gcc/gmp/mpn/x86_64/aorsmul_1.asm |
+++ /dev/null |
@@ -1,147 +0,0 @@ |
-dnl AMD64 mpn_addmul_1 and mpn_submul_1. |
- |
-dnl Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc. |
- |
-dnl This file is part of the GNU MP Library. |
- |
-dnl The GNU MP Library is free software; you can redistribute it and/or modify |
-dnl it under the terms of the GNU Lesser General Public License as published |
-dnl by the Free Software Foundation; either version 3 of the License, or (at |
-dnl your option) any later version. |
- |
-dnl The GNU MP Library is distributed in the hope that it will be useful, but |
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
-dnl License for more details. |
- |
-dnl You should have received a copy of the GNU Lesser General Public License |
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. |
- |
-include(`../config.m4') |
- |
-C cycles/limb |
-C K8,K9: 2.5 |
-C K10: 2.5 |
-C P4: 14.9 |
-C P6-15 (Core2): 5.09 |
-C P6-28 (Atom): 21.3 |
- |
-C The inner loop of this code is the result of running a code generation and |
-C optimization tool suite written by David Harvey and Torbjorn Granlund. |
- |
-C TODO: |
-C * The inner loop is great, but the prologue and epilogue code was |
-C quickly written. Tune it! |
- |
-C INPUT PARAMETERS |
-define(`rp', `%rdi') |
-define(`up', `%rsi') |
-define(`n_param',`%rdx') |
-define(`vl', `%rcx') |
- |
-define(`n', `%r11') |
- |
-ifdef(`OPERATION_addmul_1',` |
- define(`ADDSUB', `add') |
- define(`func', `mpn_addmul_1') |
-') |
-ifdef(`OPERATION_submul_1',` |
- define(`ADDSUB', `sub') |
- define(`func', `mpn_submul_1') |
-') |
- |
-MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1) |
- |
-ASM_START() |
- TEXT |
- ALIGN(16) |
-PROLOGUE(func) |
- mov (up), %rax C read first u limb early |
- push %rbx |
- mov n_param, %rbx C move away n from rdx, mul uses it |
- mul vl |
- mov %rbx, %r11 |
- |
- and $3, R32(%rbx) |
- jz L(b0) |
- cmp $2, R32(%rbx) |
- jz L(b2) |
- jg L(b3) |
- |
-L(b1): dec n |
- jne L(gt1) |
- ADDSUB %rax, (rp) |
- jmp L(ret) |
-L(gt1): lea 8(up,n,8), up |
- lea -8(rp,n,8), rp |
- neg n |
- xor %r10, %r10 |
- xor R32(%rbx), R32(%rbx) |
- mov %rax, %r9 |
- mov (up,n,8), %rax |
- mov %rdx, %r8 |
- jmp L(L1) |
- |
-L(b0): lea (up,n,8), up |
- lea -16(rp,n,8), rp |
- neg n |
- xor %r10, %r10 |
- mov %rax, %r8 |
- mov %rdx, %rbx |
- jmp L(L0) |
- |
-L(b3): lea -8(up,n,8), up |
- lea -24(rp,n,8), rp |
- neg n |
- mov %rax, %rbx |
- mov %rdx, %r10 |
- jmp L(L3) |
- |
-L(b2): lea -16(up,n,8), up |
- lea -32(rp,n,8), rp |
- neg n |
- xor %r8, %r8 |
- xor R32(%rbx), R32(%rbx) |
- mov %rax, %r10 |
- mov 24(up,n,8), %rax |
- mov %rdx, %r9 |
- jmp L(L2) |
- |
- ALIGN(16) |
-L(top): ADDSUB %r10, (rp,n,8) |
- adc %rax, %r9 |
- mov (up,n,8), %rax |
- adc %rdx, %r8 |
- mov $0, %r10d |
-L(L1): mul vl |
- ADDSUB %r9, 8(rp,n,8) |
- adc %rax, %r8 |
- adc %rdx, %rbx |
-L(L0): mov 8(up,n,8), %rax |
- mul vl |
- ADDSUB %r8, 16(rp,n,8) |
- adc %rax, %rbx |
- adc %rdx, %r10 |
-L(L3): mov 16(up,n,8), %rax |
- mul vl |
- ADDSUB %rbx, 24(rp,n,8) |
- mov $0, %r8d # zero |
- mov %r8, %rbx # zero |
- adc %rax, %r10 |
- mov 24(up,n,8), %rax |
- mov %r8, %r9 # zero |
- adc %rdx, %r9 |
-L(L2): mul vl |
- add $4, n |
- js L(top) |
- |
- ADDSUB %r10, (rp,n,8) |
- adc %rax, %r9 |
- adc %r8, %rdx |
- ADDSUB %r9, 8(rp,n,8) |
-L(ret): adc $0, %rdx |
- mov %rdx, %rax |
- |
- pop %rbx |
- ret |
-EPILOGUE() |