gcc/gmp/mpn/generic/mu_div_qr.c - Issue 3050029: [gcc] GCC 4.5.0=>4.5.1

Unified Diff: gcc/gmp/mpn/generic/mu_div_qr.c

Issue 3050029: [gcc] GCC 4.5.0=>4.5.1 (Closed) Base URL: ssh://git@gitrw.chromium.org:9222/nacl-toolchain.git

Patch Set: Created 10 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: gcc/gmp/mpn/generic/mu_div_qr.c

diff --git a/gcc/gmp/mpn/generic/mu_div_qr.c b/gcc/gmp/mpn/generic/mu_div_qr.c

deleted file mode 100644

index 9049e5907af24c9a92eff7ff44b4fad433ad347c..0000000000000000000000000000000000000000

--- a/gcc/gmp/mpn/generic/mu_div_qr.c

+++ /dev/null

@@ -1,463 +0,0 @@

-/* mpn_mu_div_qr, mpn_preinv_mu_div_qr.

- Compute Q = floor(N / D) and R = N-QD. N is nn limbs and D is dn limbs and

- must be normalized, and Q must be nn-dn limbs. The requirement that Q is

- nn-dn limbs (and not nn-dn+1 limbs) was put in place in order to allow us to

- let N be unmodified during the operation.

- Contributed to the GNU project by Torbjorn Granlund.

- THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE. IT IS

- ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS

- ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP

- RELEASE.

-This file is part of the GNU MP Library.

-The GNU MP Library is free software; you can redistribute it and/or modify

-it under the terms of the GNU Lesser General Public License as published by

-the Free Software Foundation; either version 3 of the License, or (at your

-option) any later version.

-The GNU MP Library is distributed in the hope that it will be useful, but

-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY

-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public

-License for more details.

-You should have received a copy of the GNU Lesser General Public License

-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */

-/* We use the "misunderstanding algorithm" (MUA), discovered by Paul Zimmermann

- and Torbjorn Granlund when Torbjorn misunderstood Paul's explanation of

- Jebelean's bidirectional exact division algorithm.

- The idea of this algorithm is to compute a smaller inverted value than used

- in the standard Barrett algorithm, and thus save time in the Newton

- iterations, and pay just a small price when using the inverted value for

- developing quotient bits.

- Written by Torbjorn Granlund. Paul Zimmermann suggested the use of the

- "wrap around" trick. Based on the GMP divexact code and inspired by code

- contributed to GMP by Karl Hasselstroem.

-*/

-/* CAUTION: This code and the code in mu_divappr_q.c should be edited in lockstep.

- Things to work on:

- * Passing k isn't a great interface. Either 'in' should be passed, or

- determined by the code.

- * The current mpn_mu_div_qr_itch isn't exactly scientifically written.

- Scratch space buffer overruns are not unlikely before some analysis is

- applied. Since scratch requirements are expected to change, such an

- analysis will have to wait til things settle.

- * This isn't optimal when the remainder isn't needed, since the final

- multiplication could be made special and take O(1) time on average, in that

- case. This is particularly bad when qn << dn. At some level, code as in

- GMP 4 mpn_tdiv_qr should be used, effectively dividing the leading 2qn

- dividend limbs by the qn divisor limbs.

- * This isn't optimal when the quotient isn't needed, as it might take a lot

- of space. The computation is always needed, though, so there is not time

- to save with special code.

- * The itch/scratch scheme isn't perhaps such a good idea as it once seemed,

- demonstrated by the fact that the mpn_inv function's scratch needs means

- that we need to keep a large allocation long after it is needed. Things

- are worse as mpn_mul_fft does not accept any scratch parameter, which means

- we'll have a large memory hole while in mpn_mul_fft. In general, a peak

- scratch need in the beginning of a function isn't well-handled by the

- itch/scratch scheme.

- * Some ideas from comments in divexact.c apply to this code too.

-*/

-/* the NOSTAT stuff handles properly the case where files are concatenated */

-#ifdef NOSTAT

-#undef STAT

-#endif

-#ifdef STAT

-#undef STAT

-#define STAT(x) x

-#else

-#define NOSTAT

-#define STAT(x)

-#endif

-#include <stdlib.h> /* for NULL */

-#include "gmp.h"

-#include "gmp-impl.h"

-/* In case k=0 (automatic choice), we distinguish 3 cases:

- (a) dn < qn: in = ceil(qn / ceil(qn/dn))

- (b) dn/3 < qn <= dn: in = ceil(qn / 2)

- (c) qn < dn/3: in = qn

- In all cases we have in <= dn.

- */

-mp_size_t

-mpn_mu_div_qr_choose_in (mp_size_t qn, mp_size_t dn, int k)

- mp_size_t in;

- if (k == 0)

- {

- mp_size_t b;

- if (qn > dn)

- {

- /* Compute an inverse size that is a nice partition of the quotient. */

- b = (qn - 1) / dn + 1; /* ceil(qn/dn), number of blocks */

- in = (qn - 1) / b + 1; /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */

- }

- else if (3 * qn > dn)

- {

- in = (qn - 1) / 2 + 1; /* b = 2 */

- }

- else

- {

- in = (qn - 1) / 1 + 1; /* b = 1 */

- }

- else

- {

- mp_size_t xn;

- xn = MIN (dn, qn);

- in = (xn - 1) / k + 1;

- }

- return in;

-static mp_limb_t

-mpn_mu_div_qr2 (mp_ptr qp,

- mp_ptr rp,

- mp_ptr np,

- mp_size_t nn,

- mp_srcptr dp,

- mp_size_t dn,

- mp_ptr scratch)

- mp_size_t qn, in;

- mp_limb_t cy;

- mp_ptr ip, tp;

- /* FIXME: We should probably not handle tiny operands, but do it for now. */

- if (dn == 1)

- {

- rp[0] = mpn_divrem_1 (scratch, 0L, np, nn, dp[0]);

- MPN_COPY (qp, scratch, nn - 1);

- return scratch[nn - 1];

- }

- qn = nn - dn;

- /* Compute the inverse size. */

- in = mpn_mu_div_qr_choose_in (qn, dn, 0);

- ASSERT (in <= dn);

-#if 1

- /* This alternative inverse computation method gets slightly more accurate

- results. FIXMEs: (1) Temp allocation needs not analysed (2) itch function

- not adapted (3) mpn_invert scratch needs not met. */

- ip = scratch;

- tp = scratch + in + 1;

- /* compute an approximate inverse on (in+1) limbs */

- if (dn == in)

- {

- MPN_COPY (tp + 1, dp, in);

- tp[0] = 1;

- mpn_invert (ip, tp, in + 1, NULL);

- MPN_COPY_INCR (ip, ip + 1, in);

- }

- else

- {

- cy = mpn_add_1 (tp, dp + dn - (in + 1), in + 1, 1);

- if (UNLIKELY (cy != 0))

- MPN_ZERO (ip, in);

- else

- {

- mpn_invert (ip, tp, in + 1, NULL);

- MPN_COPY_INCR (ip, ip + 1, in);

- }

-#else

- /* This older inverse computation method gets slightly worse results than the

- one above. */

- ip = scratch;

- tp = scratch + in;

- /* Compute inverse of D to in+1 limbs, then round to 'in' limbs. Ideally the

- inversion function should do this automatically. */

- if (dn == in)

- {

- tp[in + 1] = 0;

- MPN_COPY (tp + in + 2, dp, in);

- mpn_invert (tp, tp + in + 1, in + 1, NULL);

- }

- else

- {

- mpn_invert (tp, dp + dn - (in + 1), in + 1, NULL);

- }

- cy = mpn_sub_1 (tp, tp, in + 1, GMP_NUMB_HIGHBIT);

- if (UNLIKELY (cy != 0))

- MPN_ZERO (tp + 1, in);

- MPN_COPY (ip, tp + 1, in);

-#endif

-/* We can't really handle qh = 1 like this since we'd here clobber N, which is

- not allowed in the way we've defined this function's API. */

-#if 0

- qh = mpn_cmp (np + qn, dp, dn) >= 0;

- if (qh != 0)

- mpn_sub_n (np + qn, np + qn, dp, dn);

-#endif

- mpn_preinv_mu_div_qr (qp, rp, np, nn, dp, dn, ip, in, scratch + in);

-/* return qh; */

- return 0;

-void

-mpn_preinv_mu_div_qr (mp_ptr qp,

- mp_ptr rp,

- mp_ptr np,

- mp_size_t nn,

- mp_srcptr dp,

- mp_size_t dn,

- mp_srcptr ip,

- mp_size_t in,

- mp_ptr scratch)

- mp_size_t qn;

- mp_limb_t cy;

- mp_ptr tp;

- mp_limb_t r;

- qn = nn - dn;

- if (qn == 0)

- {

- MPN_COPY (rp, np, dn);

- return;

- }

- tp = scratch;

- np += qn;

- qp += qn;

- MPN_COPY (rp, np, dn);

- while (qn > 0)

- {

- if (qn < in)

- {

- ip += in - qn;

- in = qn;

- }

- np -= in;

- qp -= in;

- /* Compute the next block of quotient limbs by multiplying the inverse I

- by the upper part of the partial remainder R. */

- mpn_mul_n (tp, rp + dn - in, ip, in); /* mulhi */

- cy = mpn_add_n (qp, tp + in, rp + dn - in, in); /* I's msb implicit */

- ASSERT_ALWAYS (cy == 0); /* FIXME */

- /* Compute the product of the quotient block and the divisor D, to be

- subtracted from the partial remainder combined with new limbs from the

- dividend N. We only really need the low dn limbs. */

-#if WANT_FFT

- if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD))

- {

- /* Use the wrap-around trick. */

- mp_size_t m, wn;

- int k;

- k = mpn_fft_best_k (dn + 1, 0);

- m = mpn_fft_next_size (dn + 1, k);

- wn = dn + in - m; /* number of wrapped limbs */

- mpn_mul_fft (tp, m, dp, dn, qp, in, k);

- if (wn > 0)

- {

- cy = mpn_add_n (tp, tp, rp + dn - wn, wn);

- mpn_incr_u (tp + wn, cy);

- cy = mpn_cmp (rp + dn - in, tp + dn, m - dn) < 0;

- mpn_decr_u (tp, cy);

- }

- else

-#endif

- mpn_mul (tp, dp, dn, qp, in); /* dn+in limbs, high 'in' cancels */

- r = rp[dn - in] - tp[dn];

- /* Subtract the product from the partial remainder combined with new

- limbs from the dividend N, generating a new partial remainder R. */

- if (dn != in)

- {

- cy = mpn_sub_n (tp, np, tp, in); /* get next 'in' limbs from N */

- cy = mpn_sub_nc (tp + in, rp, tp + in, dn - in, cy);

- MPN_COPY (rp, tp, dn); /* FIXME: try to avoid this */

- }

- else

- {

- cy = mpn_sub_n (rp, np, tp, in); /* get next 'in' limbs from N */

- }

- STAT (int i; int err = 0;

- static int errarr[5]; static int err_rec; static int tot);

- /* Check the remainder R and adjust the quotient as needed. */

- r -= cy;

- while (r != 0)

- {

- /* We loop 0 times with about 69% probability, 1 time with about 31%

- probability, 2 times with about 0.6% probability, if inverse is

- computed as recommended. */

- mpn_incr_u (qp, 1);

- cy = mpn_sub_n (rp, rp, dp, dn);

- r -= cy;

- STAT (err++);

- }

- if (mpn_cmp (rp, dp, dn) >= 0)

- {

- /* This is executed with about 76% probability. */

- mpn_incr_u (qp, 1);

- cy = mpn_sub_n (rp, rp, dp, dn);

- STAT (err++);

- }

- STAT (

- tot++;

- errarr[err]++;

- if (err > err_rec)

- err_rec = err;

- if (tot % 0x10000 == 0)

- {

- for (i = 0; i <= err_rec; i++)

- printf (" %d(%.1f%%)", errarr[i], 100.0*errarr[i]/tot);

- printf ("\n");

- }

- );

- qn -= in;

- }

-#define THRES 100 /* FIXME: somewhat arbitrary */

-#ifdef CHECK

-#undef THRES

-#define THRES 1

-#endif

-mp_limb_t

-mpn_mu_div_qr (mp_ptr qp,

- mp_ptr rp,

- mp_ptr np,

- mp_size_t nn,

- mp_srcptr dp,

- mp_size_t dn,

- mp_ptr scratch)

- mp_size_t qn;

- qn = nn - dn;

- if (qn + THRES < dn)

- {

- /* |______________|________| dividend nn

- |_______|________| divisor dn

- |______| quotient (prel) qn

- |_______________| quotient * ignored-part-of(divisor) dn-1

- */

- mp_limb_t cy, x;

- if (mpn_cmp (np + nn - (qn + 1), dp + dn - (qn + 1), qn + 1) >= 0)

- {

- /* Quotient is 111...111, could optimize this rare case at some point. */

- mpn_mu_div_qr2 (qp, rp, np, nn, dp, dn, scratch);

- return 0;

- }

- /* Compute a preliminary quotient and a partial remainder by dividing the

- most significant limbs of each operand. */

- mpn_mu_div_qr2 (qp, rp + nn - (2 * qn + 1),

- np + nn - (2 * qn + 1), 2 * qn + 1,

- dp + dn - (qn + 1), qn + 1,

- scratch);

- /* Multiply the quotient by the divisor limbs ignored above. */

- if (dn - (qn + 1) > qn)

- mpn_mul (scratch, dp, dn - (qn + 1), qp, qn); /* prod is dn-1 limbs */

- else

- mpn_mul (scratch, qp, qn, dp, dn - (qn + 1)); /* prod is dn-1 limbs */

- cy = mpn_sub_n (rp, np, scratch, nn - (2 * qn + 1));

- cy = mpn_sub_nc (rp + nn - (2 * qn + 1),

- rp + nn - (2 * qn + 1),

- scratch + nn - (2 * qn + 1),

- qn, cy);

- x = rp[dn - 1];

- rp[dn - 1] = x - cy;

- if (cy > x)

- {

- mpn_decr_u (qp, 1);

- mpn_add_n (rp, rp, dp, dn);

- }

- else

- {

- return mpn_mu_div_qr2 (qp, rp, np, nn, dp, dn, scratch);

- }

- return 0; /* FIXME */

-mp_size_t

-mpn_mu_div_qr_itch (mp_size_t nn, mp_size_t dn, int mua_k)

- mp_size_t qn, m;

- int k;

- /* FIXME: This isn't very carefully written, and might grossly overestimate

- the amount of scratch needed, and might perhaps also underestimate it,

- leading to potential buffer overruns. In particular k=0 might lead to

- gross overestimates. */

- if (dn == 1)

- return nn;

- qn = nn - dn;

- if (qn >= dn)

- {

- k = mpn_fft_best_k (dn + 1, 0);

- m = mpn_fft_next_size (dn + 1, k);

- return (mua_k <= 1

- ? 6 * dn

- : m + 2 * dn);

- }

- else

- {

- k = mpn_fft_best_k (dn + 1, 0);

- m = mpn_fft_next_size (dn + 1, k);

- return (mua_k <= 1

- ? m + 4 * qn

- : m + 2 * qn);

- }

« no previous file with comments | « gcc/gmp/mpn/generic/mu_div_q.c ('k') | gcc/gmp/mpn/generic/mullow_basecase.c » ('j') | no next file with comments »