Index: gcc/gmp/mpn/x86/k7/mod_34lsub1.asm |
diff --git a/gcc/gmp/mpn/x86/k7/mod_34lsub1.asm b/gcc/gmp/mpn/x86/k7/mod_34lsub1.asm |
deleted file mode 100644 |
index f00e84dc42165592abf2d71f18257eaa62c75fe6..0000000000000000000000000000000000000000 |
--- a/gcc/gmp/mpn/x86/k7/mod_34lsub1.asm |
+++ /dev/null |
@@ -1,178 +0,0 @@ |
-dnl AMD K7 mpn_mod_34lsub1 -- remainder modulo 2^24-1. |
- |
-dnl Copyright 2000, 2001, 2002, 2004, 2005, 2008 Free Software Foundation, |
-dnl Inc. |
-dnl |
-dnl This file is part of the GNU MP Library. |
-dnl |
-dnl The GNU MP Library is free software; you can redistribute it and/or |
-dnl modify it under the terms of the GNU Lesser General Public License as |
-dnl published by the Free Software Foundation; either version 3 of the |
-dnl License, or (at your option) any later version. |
-dnl |
-dnl The GNU MP Library is distributed in the hope that it will be useful, |
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of |
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
-dnl Lesser General Public License for more details. |
-dnl |
-dnl You should have received a copy of the GNU Lesser General Public License |
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. |
- |
-include(`../config.m4') |
- |
- |
-C cycles/limb |
-C Athlon: 1 |
-C Hammer: 1 |
- |
- |
-C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size) |
-C |
-C The loop form below and the 64 byte code alignment seem necessary for the |
-C claimed speed. This is a bit strange, since normally k7 isn't very |
-C sensitive to such things. Perhaps there has to be 6 instructions in the |
-C first 16 bytes for the BTB entry or something. |
- |
-defframe(PARAM_SIZE, 8) |
-defframe(PARAM_SRC, 4) |
- |
-dnl re-use parameter space |
-define(SAVE_EDI, `PARAM_SIZE') |
- |
- TEXT |
- ALIGN(64) |
-PROLOGUE(mpn_mod_34lsub1) |
-deflit(`FRAME',0) |
- |
- movl PARAM_SIZE, %ecx |
- movl PARAM_SRC, %edx |
- |
- subl $2, %ecx |
- ja L(three_or_more) |
- |
- movl (%edx), %eax |
- jb L(one) |
- |
- movl 4(%edx), %ecx |
- movl %eax, %edx |
- shrl $24, %eax C src[0] low |
- |
- andl $0xFFFFFF, %edx C src[0] high |
- addl %edx, %eax |
- movl %ecx, %edx |
- |
- andl $0xFFFF, %ecx |
- shrl $16, %edx C src[1] high |
- addl %edx, %eax |
- |
- shll $8, %ecx C src[1] low |
- addl %ecx, %eax |
- |
-L(one): |
- ret |
- |
- |
-L(three_or_more): |
- C eax |
- C ebx |
- C ecx size-2 |
- C edx src |
- C esi |
- C edi |
- |
- pushl %ebx FRAME_pushl() |
- xorl %eax, %eax |
- xorl %ebx, %ebx |
- |
- movl %edi, SAVE_EDI |
- pushl %esi FRAME_pushl() |
- xorl %esi, %esi C and clear carry flag |
- |
- |
- C code offset 0x40 at this point |
-L(top): |
- C eax acc 0mod3 |
- C ebx acc 1mod3 |
- C ecx counter, limbs |
- C edx src |
- C esi acc 2mod3 |
- C edi |
- |
- leal 24(%edx), %edx |
- leal -2(%ecx), %ecx |
- adcl -24(%edx), %eax |
- adcl -20(%edx), %ebx |
- adcl -16(%edx), %esi |
- |
- decl %ecx |
- jng L(done_loop) |
- |
- leal -2(%ecx), %ecx |
- adcl -12(%edx), %eax |
- adcl -8(%edx), %ebx |
- adcl -4(%edx), %esi |
- |
- decl %ecx |
- jg L(top) |
- |
- |
- leal 12(%edx), %edx |
- |
- |
-L(done_loop): |
- C ecx is -2, -1 or 0 representing 0, 1 or 2 more limbs, respectively |
- |
- incl %ecx |
- movl $0xFFFFFFFF, %edi |
- js L(combine) |
- |
- adcl -12(%edx), %eax |
- decl %ecx |
- movl $0xFFFFFF00, %edi |
- js L(combine) |
- |
- adcl -8(%edx), %ebx |
- movl $0xFFFF0000, %edi |
- |
- |
-L(combine): |
- C eax acc 0mod3 |
- C ebx acc 1mod3 |
- C ecx |
- C edx |
- C esi acc 2mod3 |
- C edi mask |
- |
- sbbl %ecx, %ecx C carry |
- movl %eax, %edx C 0mod3 |
- shrl $24, %eax C 0mod3 high |
- |
- andl %edi, %ecx C carry masked |
- andl $0x00FFFFFF, %edx C 0mod3 low |
- movl %ebx, %edi C 1mod3 |
- |
- subl %ecx, %eax C apply carry |
- shrl $16, %ebx C 1mod3 high |
- andl $0xFFFF, %edi |
- |
- addl %edx, %eax C apply 0mod3 low |
- movl %esi, %edx C 2mod3 |
- shll $8, %edi C 1mod3 low |
- |
- addl %ebx, %eax C apply 1mod3 high |
- shrl $8, %esi C 2mod3 high |
- movzbl %dl, %edx C 2mod3 low |
- |
- addl %edi, %eax C apply 1mod3 low |
- shll $16, %edx C 2mod3 low |
- |
- addl %esi, %eax C apply 2mod3 high |
- popl %esi FRAME_popl() |
- |
- movl SAVE_EDI, %edi |
- addl %edx, %eax C apply 2mod3 low |
- popl %ebx FRAME_popl() |
- |
- ret |
- |
-EPILOGUE() |