| Index: gcc/gmp/mpn/generic/mu_div_qr.c
|
| diff --git a/gcc/gmp/mpn/generic/mu_div_qr.c b/gcc/gmp/mpn/generic/mu_div_qr.c
|
| deleted file mode 100644
|
| index 9049e5907af24c9a92eff7ff44b4fad433ad347c..0000000000000000000000000000000000000000
|
| --- a/gcc/gmp/mpn/generic/mu_div_qr.c
|
| +++ /dev/null
|
| @@ -1,463 +0,0 @@
|
| -/* mpn_mu_div_qr, mpn_preinv_mu_div_qr.
|
| -
|
| - Compute Q = floor(N / D) and R = N-QD. N is nn limbs and D is dn limbs and
|
| - must be normalized, and Q must be nn-dn limbs. The requirement that Q is
|
| - nn-dn limbs (and not nn-dn+1 limbs) was put in place in order to allow us to
|
| - let N be unmodified during the operation.
|
| -
|
| - Contributed to the GNU project by Torbjorn Granlund.
|
| -
|
| - THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE. IT IS
|
| - ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS
|
| - ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
|
| - RELEASE.
|
| -
|
| -Copyright 2005, 2006, 2007 Free Software Foundation, Inc.
|
| -
|
| -This file is part of the GNU MP Library.
|
| -
|
| -The GNU MP Library is free software; you can redistribute it and/or modify
|
| -it under the terms of the GNU Lesser General Public License as published by
|
| -the Free Software Foundation; either version 3 of the License, or (at your
|
| -option) any later version.
|
| -
|
| -The GNU MP Library is distributed in the hope that it will be useful, but
|
| -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
| -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
| -License for more details.
|
| -
|
| -You should have received a copy of the GNU Lesser General Public License
|
| -along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
|
| -
|
| -
|
| -/* We use the "misunderstanding algorithm" (MUA), discovered by Paul Zimmermann
|
| - and Torbjorn Granlund when Torbjorn misunderstood Paul's explanation of
|
| - Jebelean's bidirectional exact division algorithm.
|
| -
|
| - The idea of this algorithm is to compute a smaller inverted value than used
|
| - in the standard Barrett algorithm, and thus save time in the Newton
|
| - iterations, and pay just a small price when using the inverted value for
|
| - developing quotient bits.
|
| -
|
| - Written by Torbjorn Granlund. Paul Zimmermann suggested the use of the
|
| - "wrap around" trick. Based on the GMP divexact code and inspired by code
|
| - contributed to GMP by Karl Hasselstroem.
|
| -*/
|
| -
|
| -
|
| -/* CAUTION: This code and the code in mu_divappr_q.c should be edited in lockstep.
|
| -
|
| - Things to work on:
|
| -
|
| - * Passing k isn't a great interface. Either 'in' should be passed, or
|
| - determined by the code.
|
| -
|
| - * The current mpn_mu_div_qr_itch isn't exactly scientifically written.
|
| - Scratch space buffer overruns are not unlikely before some analysis is
|
| - applied. Since scratch requirements are expected to change, such an
|
| - analysis will have to wait til things settle.
|
| -
|
| - * This isn't optimal when the remainder isn't needed, since the final
|
| - multiplication could be made special and take O(1) time on average, in that
|
| - case. This is particularly bad when qn << dn. At some level, code as in
|
| - GMP 4 mpn_tdiv_qr should be used, effectively dividing the leading 2qn
|
| - dividend limbs by the qn divisor limbs.
|
| -
|
| - * This isn't optimal when the quotient isn't needed, as it might take a lot
|
| - of space. The computation is always needed, though, so there is not time
|
| - to save with special code.
|
| -
|
| - * The itch/scratch scheme isn't perhaps such a good idea as it once seemed,
|
| - demonstrated by the fact that the mpn_inv function's scratch needs means
|
| - that we need to keep a large allocation long after it is needed. Things
|
| - are worse as mpn_mul_fft does not accept any scratch parameter, which means
|
| - we'll have a large memory hole while in mpn_mul_fft. In general, a peak
|
| - scratch need in the beginning of a function isn't well-handled by the
|
| - itch/scratch scheme.
|
| -
|
| - * Some ideas from comments in divexact.c apply to this code too.
|
| -*/
|
| -
|
| -/* the NOSTAT stuff handles properly the case where files are concatenated */
|
| -#ifdef NOSTAT
|
| -#undef STAT
|
| -#endif
|
| -
|
| -#ifdef STAT
|
| -#undef STAT
|
| -#define STAT(x) x
|
| -#else
|
| -#define NOSTAT
|
| -#define STAT(x)
|
| -#endif
|
| -
|
| -#include <stdlib.h> /* for NULL */
|
| -#include "gmp.h"
|
| -#include "gmp-impl.h"
|
| -
|
| -
|
| -/* In case k=0 (automatic choice), we distinguish 3 cases:
|
| - (a) dn < qn: in = ceil(qn / ceil(qn/dn))
|
| - (b) dn/3 < qn <= dn: in = ceil(qn / 2)
|
| - (c) qn < dn/3: in = qn
|
| - In all cases we have in <= dn.
|
| - */
|
| -mp_size_t
|
| -mpn_mu_div_qr_choose_in (mp_size_t qn, mp_size_t dn, int k)
|
| -{
|
| - mp_size_t in;
|
| -
|
| - if (k == 0)
|
| - {
|
| - mp_size_t b;
|
| - if (qn > dn)
|
| - {
|
| - /* Compute an inverse size that is a nice partition of the quotient. */
|
| - b = (qn - 1) / dn + 1; /* ceil(qn/dn), number of blocks */
|
| - in = (qn - 1) / b + 1; /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
|
| - }
|
| - else if (3 * qn > dn)
|
| - {
|
| - in = (qn - 1) / 2 + 1; /* b = 2 */
|
| - }
|
| - else
|
| - {
|
| - in = (qn - 1) / 1 + 1; /* b = 1 */
|
| - }
|
| - }
|
| - else
|
| - {
|
| - mp_size_t xn;
|
| - xn = MIN (dn, qn);
|
| - in = (xn - 1) / k + 1;
|
| - }
|
| -
|
| - return in;
|
| -}
|
| -
|
| -static mp_limb_t
|
| -mpn_mu_div_qr2 (mp_ptr qp,
|
| - mp_ptr rp,
|
| - mp_ptr np,
|
| - mp_size_t nn,
|
| - mp_srcptr dp,
|
| - mp_size_t dn,
|
| - mp_ptr scratch)
|
| -{
|
| - mp_size_t qn, in;
|
| - mp_limb_t cy;
|
| - mp_ptr ip, tp;
|
| -
|
| - /* FIXME: We should probably not handle tiny operands, but do it for now. */
|
| - if (dn == 1)
|
| - {
|
| - rp[0] = mpn_divrem_1 (scratch, 0L, np, nn, dp[0]);
|
| - MPN_COPY (qp, scratch, nn - 1);
|
| - return scratch[nn - 1];
|
| - }
|
| -
|
| - qn = nn - dn;
|
| -
|
| - /* Compute the inverse size. */
|
| - in = mpn_mu_div_qr_choose_in (qn, dn, 0);
|
| - ASSERT (in <= dn);
|
| -
|
| -#if 1
|
| - /* This alternative inverse computation method gets slightly more accurate
|
| - results. FIXMEs: (1) Temp allocation needs not analysed (2) itch function
|
| - not adapted (3) mpn_invert scratch needs not met. */
|
| - ip = scratch;
|
| - tp = scratch + in + 1;
|
| -
|
| - /* compute an approximate inverse on (in+1) limbs */
|
| - if (dn == in)
|
| - {
|
| - MPN_COPY (tp + 1, dp, in);
|
| - tp[0] = 1;
|
| - mpn_invert (ip, tp, in + 1, NULL);
|
| - MPN_COPY_INCR (ip, ip + 1, in);
|
| - }
|
| - else
|
| - {
|
| - cy = mpn_add_1 (tp, dp + dn - (in + 1), in + 1, 1);
|
| - if (UNLIKELY (cy != 0))
|
| - MPN_ZERO (ip, in);
|
| - else
|
| - {
|
| - mpn_invert (ip, tp, in + 1, NULL);
|
| - MPN_COPY_INCR (ip, ip + 1, in);
|
| - }
|
| - }
|
| -#else
|
| - /* This older inverse computation method gets slightly worse results than the
|
| - one above. */
|
| - ip = scratch;
|
| - tp = scratch + in;
|
| -
|
| - /* Compute inverse of D to in+1 limbs, then round to 'in' limbs. Ideally the
|
| - inversion function should do this automatically. */
|
| - if (dn == in)
|
| - {
|
| - tp[in + 1] = 0;
|
| - MPN_COPY (tp + in + 2, dp, in);
|
| - mpn_invert (tp, tp + in + 1, in + 1, NULL);
|
| - }
|
| - else
|
| - {
|
| - mpn_invert (tp, dp + dn - (in + 1), in + 1, NULL);
|
| - }
|
| - cy = mpn_sub_1 (tp, tp, in + 1, GMP_NUMB_HIGHBIT);
|
| - if (UNLIKELY (cy != 0))
|
| - MPN_ZERO (tp + 1, in);
|
| - MPN_COPY (ip, tp + 1, in);
|
| -#endif
|
| -
|
| -/* We can't really handle qh = 1 like this since we'd here clobber N, which is
|
| - not allowed in the way we've defined this function's API. */
|
| -#if 0
|
| - qh = mpn_cmp (np + qn, dp, dn) >= 0;
|
| - if (qh != 0)
|
| - mpn_sub_n (np + qn, np + qn, dp, dn);
|
| -#endif
|
| -
|
| - mpn_preinv_mu_div_qr (qp, rp, np, nn, dp, dn, ip, in, scratch + in);
|
| -
|
| -/* return qh; */
|
| - return 0;
|
| -}
|
| -
|
| -void
|
| -mpn_preinv_mu_div_qr (mp_ptr qp,
|
| - mp_ptr rp,
|
| - mp_ptr np,
|
| - mp_size_t nn,
|
| - mp_srcptr dp,
|
| - mp_size_t dn,
|
| - mp_srcptr ip,
|
| - mp_size_t in,
|
| - mp_ptr scratch)
|
| -{
|
| - mp_size_t qn;
|
| - mp_limb_t cy;
|
| - mp_ptr tp;
|
| - mp_limb_t r;
|
| -
|
| - qn = nn - dn;
|
| -
|
| - if (qn == 0)
|
| - {
|
| - MPN_COPY (rp, np, dn);
|
| - return;
|
| - }
|
| -
|
| - tp = scratch;
|
| -
|
| - np += qn;
|
| - qp += qn;
|
| -
|
| - MPN_COPY (rp, np, dn);
|
| -
|
| - while (qn > 0)
|
| - {
|
| - if (qn < in)
|
| - {
|
| - ip += in - qn;
|
| - in = qn;
|
| - }
|
| - np -= in;
|
| - qp -= in;
|
| -
|
| - /* Compute the next block of quotient limbs by multiplying the inverse I
|
| - by the upper part of the partial remainder R. */
|
| - mpn_mul_n (tp, rp + dn - in, ip, in); /* mulhi */
|
| - cy = mpn_add_n (qp, tp + in, rp + dn - in, in); /* I's msb implicit */
|
| - ASSERT_ALWAYS (cy == 0); /* FIXME */
|
| -
|
| - /* Compute the product of the quotient block and the divisor D, to be
|
| - subtracted from the partial remainder combined with new limbs from the
|
| - dividend N. We only really need the low dn limbs. */
|
| -#if WANT_FFT
|
| - if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD))
|
| - {
|
| - /* Use the wrap-around trick. */
|
| - mp_size_t m, wn;
|
| - int k;
|
| -
|
| - k = mpn_fft_best_k (dn + 1, 0);
|
| - m = mpn_fft_next_size (dn + 1, k);
|
| - wn = dn + in - m; /* number of wrapped limbs */
|
| -
|
| - mpn_mul_fft (tp, m, dp, dn, qp, in, k);
|
| -
|
| - if (wn > 0)
|
| - {
|
| - cy = mpn_add_n (tp, tp, rp + dn - wn, wn);
|
| - mpn_incr_u (tp + wn, cy);
|
| -
|
| - cy = mpn_cmp (rp + dn - in, tp + dn, m - dn) < 0;
|
| - mpn_decr_u (tp, cy);
|
| - }
|
| - }
|
| - else
|
| -#endif
|
| - mpn_mul (tp, dp, dn, qp, in); /* dn+in limbs, high 'in' cancels */
|
| -
|
| - r = rp[dn - in] - tp[dn];
|
| -
|
| - /* Subtract the product from the partial remainder combined with new
|
| - limbs from the dividend N, generating a new partial remainder R. */
|
| - if (dn != in)
|
| - {
|
| - cy = mpn_sub_n (tp, np, tp, in); /* get next 'in' limbs from N */
|
| - cy = mpn_sub_nc (tp + in, rp, tp + in, dn - in, cy);
|
| - MPN_COPY (rp, tp, dn); /* FIXME: try to avoid this */
|
| - }
|
| - else
|
| - {
|
| - cy = mpn_sub_n (rp, np, tp, in); /* get next 'in' limbs from N */
|
| - }
|
| -
|
| - STAT (int i; int err = 0;
|
| - static int errarr[5]; static int err_rec; static int tot);
|
| -
|
| - /* Check the remainder R and adjust the quotient as needed. */
|
| - r -= cy;
|
| - while (r != 0)
|
| - {
|
| - /* We loop 0 times with about 69% probability, 1 time with about 31%
|
| - probability, 2 times with about 0.6% probability, if inverse is
|
| - computed as recommended. */
|
| - mpn_incr_u (qp, 1);
|
| - cy = mpn_sub_n (rp, rp, dp, dn);
|
| - r -= cy;
|
| - STAT (err++);
|
| - }
|
| - if (mpn_cmp (rp, dp, dn) >= 0)
|
| - {
|
| - /* This is executed with about 76% probability. */
|
| - mpn_incr_u (qp, 1);
|
| - cy = mpn_sub_n (rp, rp, dp, dn);
|
| - STAT (err++);
|
| - }
|
| -
|
| - STAT (
|
| - tot++;
|
| - errarr[err]++;
|
| - if (err > err_rec)
|
| - err_rec = err;
|
| - if (tot % 0x10000 == 0)
|
| - {
|
| - for (i = 0; i <= err_rec; i++)
|
| - printf (" %d(%.1f%%)", errarr[i], 100.0*errarr[i]/tot);
|
| - printf ("\n");
|
| - }
|
| - );
|
| -
|
| - qn -= in;
|
| - }
|
| -}
|
| -
|
| -#define THRES 100 /* FIXME: somewhat arbitrary */
|
| -
|
| -#ifdef CHECK
|
| -#undef THRES
|
| -#define THRES 1
|
| -#endif
|
| -
|
| -mp_limb_t
|
| -mpn_mu_div_qr (mp_ptr qp,
|
| - mp_ptr rp,
|
| - mp_ptr np,
|
| - mp_size_t nn,
|
| - mp_srcptr dp,
|
| - mp_size_t dn,
|
| - mp_ptr scratch)
|
| -{
|
| - mp_size_t qn;
|
| -
|
| - qn = nn - dn;
|
| - if (qn + THRES < dn)
|
| - {
|
| - /* |______________|________| dividend nn
|
| - |_______|________| divisor dn
|
| -
|
| - |______| quotient (prel) qn
|
| -
|
| - |_______________| quotient * ignored-part-of(divisor) dn-1
|
| - */
|
| -
|
| - mp_limb_t cy, x;
|
| -
|
| - if (mpn_cmp (np + nn - (qn + 1), dp + dn - (qn + 1), qn + 1) >= 0)
|
| - {
|
| - /* Quotient is 111...111, could optimize this rare case at some point. */
|
| - mpn_mu_div_qr2 (qp, rp, np, nn, dp, dn, scratch);
|
| - return 0;
|
| - }
|
| -
|
| - /* Compute a preliminary quotient and a partial remainder by dividing the
|
| - most significant limbs of each operand. */
|
| - mpn_mu_div_qr2 (qp, rp + nn - (2 * qn + 1),
|
| - np + nn - (2 * qn + 1), 2 * qn + 1,
|
| - dp + dn - (qn + 1), qn + 1,
|
| - scratch);
|
| -
|
| - /* Multiply the quotient by the divisor limbs ignored above. */
|
| - if (dn - (qn + 1) > qn)
|
| - mpn_mul (scratch, dp, dn - (qn + 1), qp, qn); /* prod is dn-1 limbs */
|
| - else
|
| - mpn_mul (scratch, qp, qn, dp, dn - (qn + 1)); /* prod is dn-1 limbs */
|
| -
|
| - cy = mpn_sub_n (rp, np, scratch, nn - (2 * qn + 1));
|
| - cy = mpn_sub_nc (rp + nn - (2 * qn + 1),
|
| - rp + nn - (2 * qn + 1),
|
| - scratch + nn - (2 * qn + 1),
|
| - qn, cy);
|
| - x = rp[dn - 1];
|
| - rp[dn - 1] = x - cy;
|
| - if (cy > x)
|
| - {
|
| - mpn_decr_u (qp, 1);
|
| - mpn_add_n (rp, rp, dp, dn);
|
| - }
|
| - }
|
| - else
|
| - {
|
| - return mpn_mu_div_qr2 (qp, rp, np, nn, dp, dn, scratch);
|
| - }
|
| -
|
| - return 0; /* FIXME */
|
| -}
|
| -
|
| -mp_size_t
|
| -mpn_mu_div_qr_itch (mp_size_t nn, mp_size_t dn, int mua_k)
|
| -{
|
| - mp_size_t qn, m;
|
| - int k;
|
| -
|
| - /* FIXME: This isn't very carefully written, and might grossly overestimate
|
| - the amount of scratch needed, and might perhaps also underestimate it,
|
| - leading to potential buffer overruns. In particular k=0 might lead to
|
| - gross overestimates. */
|
| -
|
| - if (dn == 1)
|
| - return nn;
|
| -
|
| - qn = nn - dn;
|
| - if (qn >= dn)
|
| - {
|
| - k = mpn_fft_best_k (dn + 1, 0);
|
| - m = mpn_fft_next_size (dn + 1, k);
|
| - return (mua_k <= 1
|
| - ? 6 * dn
|
| - : m + 2 * dn);
|
| - }
|
| - else
|
| - {
|
| - k = mpn_fft_best_k (dn + 1, 0);
|
| - m = mpn_fft_next_size (dn + 1, k);
|
| - return (mua_k <= 1
|
| - ? m + 4 * qn
|
| - : m + 2 * qn);
|
| - }
|
| -}
|
|
|