| OLD | NEW |
| (Empty) |
| 1 /* mpn_mod_1s_3p (ap, n, b, cps) | |
| 2 Divide (ap,,n) by b. Return the single-limb remainder. | |
| 3 Requires that d < B / 3. | |
| 4 | |
| 5 Contributed to the GNU project by Torbjorn Granlund. | |
| 6 | |
| 7 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY | |
| 8 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST | |
| 9 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. | |
| 10 | |
| 11 Copyright 2008, 2009 Free Software Foundation, Inc. | |
| 12 | |
| 13 This file is part of the GNU MP Library. | |
| 14 | |
| 15 The GNU MP Library is free software; you can redistribute it and/or modify | |
| 16 it under the terms of the GNU Lesser General Public License as published by | |
| 17 the Free Software Foundation; either version 3 of the License, or (at your | |
| 18 option) any later version. | |
| 19 | |
| 20 The GNU MP Library is distributed in the hope that it will be useful, but | |
| 21 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
| 22 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public | |
| 23 License for more details. | |
| 24 | |
| 25 You should have received a copy of the GNU Lesser General Public License | |
| 26 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ | |
| 27 | |
| 28 #include "gmp.h" | |
| 29 #include "gmp-impl.h" | |
| 30 #include "longlong.h" | |
| 31 | |
| 32 void | |
| 33 mpn_mod_1s_3p_cps (mp_limb_t cps[6], mp_limb_t b) | |
| 34 { | |
| 35 mp_limb_t bi; | |
| 36 mp_limb_t B1modb, B2modb, B3modb, B4modb; | |
| 37 int cnt; | |
| 38 | |
| 39 ASSERT (b <= GMP_NUMB_MAX / 3); | |
| 40 | |
| 41 count_leading_zeros (cnt, b); | |
| 42 | |
| 43 b <<= cnt; | |
| 44 invert_limb (bi, b); | |
| 45 | |
| 46 B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt)); | |
| 47 ASSERT (B1modb <= b); /* NB: not fully reduced mod b */ | |
| 48 udiv_rnd_preinv (B2modb, B1modb, b, bi); | |
| 49 udiv_rnd_preinv (B3modb, B2modb, b, bi); | |
| 50 udiv_rnd_preinv (B4modb, B3modb, b, bi); | |
| 51 | |
| 52 cps[0] = bi; | |
| 53 cps[1] = cnt; | |
| 54 cps[2] = B1modb >> cnt; | |
| 55 cps[3] = B2modb >> cnt; | |
| 56 cps[4] = B3modb >> cnt; | |
| 57 cps[5] = B4modb >> cnt; | |
| 58 } | |
| 59 | |
| 60 mp_limb_t | |
| 61 mpn_mod_1s_3p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[6]) | |
| 62 { | |
| 63 mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r; | |
| 64 mp_limb_t B1modb, B2modb, B3modb, B4modb; | |
| 65 mp_size_t i; | |
| 66 int cnt; | |
| 67 | |
| 68 B1modb = cps[2]; | |
| 69 B2modb = cps[3]; | |
| 70 B3modb = cps[4]; | |
| 71 B4modb = cps[5]; | |
| 72 | |
| 73 umul_ppmm (ph, pl, ap[n - 2], B1modb); | |
| 74 add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 3]); | |
| 75 umul_ppmm (ch, cl, ap[n - 1], B2modb); | |
| 76 add_ssaaaa (rh, rl, ph, pl, ch, cl); | |
| 77 | |
| 78 for (i = n - 6; i >= 0; i -= 3) | |
| 79 { | |
| 80 /* rr = ap[i] < B | |
| 81 + ap[i+1] * (B mod b) <= (B-1)(b-1) | |
| 82 + ap[i+2] * (B^2 mod b) <= (B-1)(b-1) | |
| 83 + LO(rr) * (B^3 mod b) <= (B-1)(b-1) | |
| 84 + HI(rr) * (B^4 mod b) <= (B-1)(b-1) | |
| 85 */ | |
| 86 umul_ppmm (ph, pl, ap[i + 1], B1modb); | |
| 87 add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]); | |
| 88 | |
| 89 umul_ppmm (ch, cl, ap[i + 2], B2modb); | |
| 90 add_ssaaaa (ph, pl, ph, pl, ch, cl); | |
| 91 | |
| 92 umul_ppmm (ch, cl, rl, B3modb); | |
| 93 add_ssaaaa (ph, pl, ph, pl, ch, cl); | |
| 94 | |
| 95 umul_ppmm (rh, rl, rh, B4modb); | |
| 96 add_ssaaaa (rh, rl, rh, rl, ph, pl); | |
| 97 } | |
| 98 | |
| 99 if (i >= -2) | |
| 100 { | |
| 101 umul_ppmm (ph, pl, rl, B1modb); | |
| 102 add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 2]); | |
| 103 umul_ppmm (rh, rl, rh, B2modb); | |
| 104 add_ssaaaa (rh, rl, rh, rl, ph, pl); | |
| 105 if (i >= -1) | |
| 106 { | |
| 107 umul_ppmm (ph, pl, rl, B1modb); | |
| 108 add_ssaaaa (ph, pl, ph, pl, 0, ap[0]); | |
| 109 umul_ppmm (rh, rl, rh, B2modb); | |
| 110 add_ssaaaa (rh, rl, rh, rl, ph, pl); | |
| 111 } | |
| 112 } | |
| 113 | |
| 114 bi = cps[0]; | |
| 115 cnt = cps[1]; | |
| 116 | |
| 117 #if 1 | |
| 118 umul_ppmm (rh, cl, rh, B1modb); | |
| 119 add_ssaaaa (rh, rl, rh, rl, 0, cl); | |
| 120 r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)); | |
| 121 #else | |
| 122 udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt), | |
| 123 (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi); | |
| 124 ASSERT (q <= 3); /* optimize for small quotient? */ | |
| 125 #endif | |
| 126 | |
| 127 udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi); | |
| 128 | |
| 129 return r >> cnt; | |
| 130 } | |
| OLD | NEW |