Index: gcc/gmp/mpn/sparc64/mode1o.c |
diff --git a/gcc/gmp/mpn/sparc64/mode1o.c b/gcc/gmp/mpn/sparc64/mode1o.c |
deleted file mode 100644 |
index 5ec97c5cd40771c298171e072e6948fa15ad7403..0000000000000000000000000000000000000000 |
--- a/gcc/gmp/mpn/sparc64/mode1o.c |
+++ /dev/null |
@@ -1,186 +0,0 @@ |
-/* UltraSPARC 64 mpn_modexact_1c_odd -- mpn by limb exact style remainder. |
- |
- THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST |
- CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN |
- FUTURE GNU MP RELEASES. |
- |
-Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc. |
- |
-This file is part of the GNU MP Library. |
- |
-The GNU MP Library is free software; you can redistribute it and/or modify |
-it under the terms of the GNU Lesser General Public License as published by |
-the Free Software Foundation; either version 3 of the License, or (at your |
-option) any later version. |
- |
-The GNU MP Library is distributed in the hope that it will be useful, but |
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
-License for more details. |
- |
-You should have received a copy of the GNU Lesser General Public License |
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ |
- |
-#include "gmp.h" |
-#include "gmp-impl.h" |
-#include "longlong.h" |
- |
-#include "mpn/sparc64/sparc64.h" |
- |
- |
-/* 64-bit divisor 32-bit divisor |
- cycles/limb cycles/limb |
- (approx) (approx) |
- Ultrasparc 2i: ? ? |
-*/ |
- |
- |
-/* This implementation reduces the number of multiplies done, knowing that |
- on ultrasparc 1 and 2 the mulx instruction stalls the whole chip. |
- |
- The key idea is to use the fact that the low limb of q*d equals l, this |
- being the whole purpose of the q calculated. It means there's no need to |
- calculate the lowest 32x32->64 part of the q*d, instead it can be |
- inferred from l and the other three 32x32->64 parts. See sparc64.h for |
- details. |
- |
- When d is 32-bits, the same applies, but in this case there's only one |
- other 32x32->64 part (ie. HIGH(q)*d). |
- |
- The net effect is that for 64-bit divisor each limb is 4 mulx, or for |
- 32-bit divisor each is 2 mulx. |
- |
- Enhancements: |
- |
- No doubt this could be done in assembler, if that helped the scheduling, |
- or perhaps guaranteed good code irrespective of the compiler. |
- |
- Alternatives: |
- |
- It might be possibly to use floating point. The loop is dominated by |
- multiply latency, so not sure if floats would improve that. One |
- possibility would be to take two limbs at a time, with a 128 bit inverse, |
- if there's enough registers, which could effectively use float throughput |
- to reduce total latency across two limbs. */ |
- |
-#define ASSERT_RETVAL(r) \ |
- ASSERT (orig_c < d ? r < d : r <= d) |
- |
-mp_limb_t |
-mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t orig_c) |
-{ |
- mp_limb_t c = orig_c; |
- mp_limb_t s, l, q, h, inverse; |
- |
- ASSERT (size >= 1); |
- ASSERT (d & 1); |
- ASSERT_MPN (src, size); |
- ASSERT_LIMB (d); |
- ASSERT_LIMB (c); |
- |
- /* udivx is faster than 10 or 12 mulx's for one limb via an inverse */ |
- if (size == 1) |
- { |
- s = src[0]; |
- if (s > c) |
- { |
- l = s-c; |
- h = l % d; |
- if (h != 0) |
- h = d - h; |
- } |
- else |
- { |
- l = c-s; |
- h = l % d; |
- } |
- return h; |
- } |
- |
- binvert_limb (inverse, d); |
- |
- if (d <= 0xFFFFFFFF) |
- { |
- s = *src++; |
- size--; |
- do |
- { |
- SUBC_LIMB (c, l, s, c); |
- s = *src++; |
- q = l * inverse; |
- umul_ppmm_half_lowequal (h, q, d, l); |
- c += h; |
- size--; |
- } |
- while (size != 0); |
- |
- if (s <= d) |
- { |
- /* With high s <= d the final step can be a subtract and addback. |
- If c==0 then the addback will restore to l>=0. If c==d then |
- will get l==d if s==0, but that's ok per the function |
- definition. */ |
- |
- l = c - s; |
- l += (l > c ? d : 0); |
- |
- ASSERT_RETVAL (l); |
- return l; |
- } |
- else |
- { |
- /* Can't skip a divide, just do the loop code once more. */ |
- SUBC_LIMB (c, l, s, c); |
- q = l * inverse; |
- umul_ppmm_half_lowequal (h, q, d, l); |
- c += h; |
- |
- ASSERT_RETVAL (c); |
- return c; |
- } |
- } |
- else |
- { |
- mp_limb_t dl = LOW32 (d); |
- mp_limb_t dh = HIGH32 (d); |
- long i; |
- |
- s = *src++; |
- size--; |
- do |
- { |
- SUBC_LIMB (c, l, s, c); |
- s = *src++; |
- q = l * inverse; |
- umul_ppmm_lowequal (h, q, d, dh, dl, l); |
- c += h; |
- size--; |
- } |
- while (size != 0); |
- |
- if (s <= d) |
- { |
- /* With high s <= d the final step can be a subtract and addback. |
- If c==0 then the addback will restore to l>=0. If c==d then |
- will get l==d if s==0, but that's ok per the function |
- definition. */ |
- |
- l = c - s; |
- l += (l > c ? d : 0); |
- |
- ASSERT_RETVAL (l); |
- return l; |
- } |
- else |
- { |
- /* Can't skip a divide, just do the loop code once more. */ |
- SUBC_LIMB (c, l, s, c); |
- q = l * inverse; |
- umul_ppmm_lowequal (h, q, d, dh, dl, l); |
- c += h; |
- |
- ASSERT_RETVAL (c); |
- return c; |
- } |
- } |
-} |