Index: gcc/gmp/mpn/x86/mod_1.asm |
diff --git a/gcc/gmp/mpn/x86/mod_1.asm b/gcc/gmp/mpn/x86/mod_1.asm |
deleted file mode 100644 |
index 0fa3ce0def466a0b888ad67a674baeb34583c180..0000000000000000000000000000000000000000 |
--- a/gcc/gmp/mpn/x86/mod_1.asm |
+++ /dev/null |
@@ -1,163 +0,0 @@ |
-dnl x86 mpn_mod_1 -- mpn by limb remainder. |
- |
-dnl Copyright 1999, 2000, 2001, 2002, 2003, 2007 Free Software Foundation, |
-dnl Inc. |
-dnl |
-dnl This file is part of the GNU MP Library. |
-dnl |
-dnl The GNU MP Library is free software; you can redistribute it and/or |
-dnl modify it under the terms of the GNU Lesser General Public License as |
-dnl published by the Free Software Foundation; either version 3 of the |
-dnl License, or (at your option) any later version. |
-dnl |
-dnl The GNU MP Library is distributed in the hope that it will be useful, |
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of |
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
-dnl Lesser General Public License for more details. |
-dnl |
-dnl You should have received a copy of the GNU Lesser General Public License |
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. |
- |
-include(`../config.m4') |
- |
- |
-C cycles/limb |
-C 486 42 approx, maybe |
-C P5 44 |
-C P6 39 |
-C K6 20 |
-C K7 41 |
-C P4 58 |
- |
- |
-C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor); |
-C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor, |
-C mp_limb_t carry); |
-C |
-C Essentially this code is the same as the division based part of |
-C mpn/generic/mod_1.c, but has the advantage that we get the desired divl |
-C instruction even when gcc is not being used (where longlong.h only has the |
-C rather slow generic C udiv_qrnnd(). |
-C |
-C A test is done to see if the high limb is less than the divisor, and if so |
-C one less div is done. A div is between 20 and 40 cycles on the various |
-C x86s, so assuming high<divisor about half the time, then this test saves |
-C half that amount. The branch misprediction penalty on each chip is less |
-C than half a div. |
-C |
-C |
-C Notes for K6: |
-C |
-C Back-to-back div instructions take 20 cycles, the same as the loop here, |
-C so it seems there's nothing to gain by rearranging. Pairing the mov and |
-C loop instructions was found to gain nothing. Normally we use a loop |
-C instruction rather than decl/jnz, but it gains nothing here. |
-C |
-C A multiply-by-inverse is used in mpn/x86/k6/pre_mod_1.asm, but it saves |
-C only 2 c/l so currently we haven't bothered with the same for mpn_mod_1. |
-C If an inverse takes about 40 cycles for normalized or perhaps 60 for |
-C unnormalized (due to bsfl being slow on k6) then the threshold would be at |
-C least 20 or 30 limbs. |
-C |
- |
-defframe(PARAM_CARRY, 16) |
-defframe(PARAM_DIVISOR,12) |
-defframe(PARAM_SIZE, 8) |
-defframe(PARAM_SRC, 4) |
- |
- TEXT |
- |
- ALIGN(16) |
-PROLOGUE(mpn_mod_1) |
-deflit(`FRAME',0) |
- |
- movl PARAM_SIZE, %ecx |
- pushl %ebx FRAME_pushl() |
- |
- movl PARAM_SRC, %ebx |
- pushl %esi FRAME_pushl() |
- |
- orl %ecx, %ecx |
- jz L(done_zero) |
- |
- movl PARAM_DIVISOR, %esi |
- movl -4(%ebx,%ecx,4), %eax C src high limb |
- |
- cmpl %esi, %eax |
- |
- sbbl %edx, %edx C -1 if high<divisor |
- |
- addl %edx, %ecx C skip one division if high<divisor |
- jz L(done_eax) |
- |
- andl %eax, %edx C carry if high<divisor |
- |
- |
-L(top): |
- C eax scratch (quotient) |
- C ebx src |
- C ecx counter |
- C edx carry (remainder) |
- C esi divisor |
- C edi |
- C ebp |
- |
- movl -4(%ebx,%ecx,4), %eax |
- |
- divl %esi |
- |
- decl %ecx |
- jnz L(top) |
- |
- |
- movl %edx, %eax |
-L(done_eax): |
- popl %esi |
- |
- popl %ebx |
- |
- ret |
- |
-EPILOGUE() |
- |
- |
- C This code located after mpn_mod_1, so the jump to L(top) here is |
- C back and hence will be predicted as taken. (size==0 is considered |
- C unlikely.) |
- |
- ALIGN(16) |
-PROLOGUE(mpn_mod_1c) |
-deflit(`FRAME',0) |
- |
- movl PARAM_SIZE, %ecx |
- pushl %ebx FRAME_pushl() |
- |
- movl PARAM_SRC, %ebx |
- pushl %esi FRAME_pushl() |
- |
- movl PARAM_DIVISOR, %esi |
- orl %ecx, %ecx |
- |
- movl PARAM_CARRY, %edx |
- jnz L(top) |
- |
- popl %esi |
- movl %edx, %eax |
- |
- popl %ebx |
- |
- ret |
- |
- |
- C This code is for mpn_mod_1, but is positioned here to save some |
- C space in the alignment padding. |
- C |
-L(done_zero): |
- popl %esi |
- xorl %eax, %eax |
- |
- popl %ebx |
- |
- ret |
- |
-EPILOGUE() |