|
|
Created:
4 years, 6 months ago by mvstanton Modified:
4 years, 6 months ago Reviewers:
Benedikt Meurer CC:
v8-reviews_googlegroups.com Base URL:
https://chromium.googlesource.com/v8/v8.git@master Target Ref:
refs/pending/heads/master Project:
v8 Visibility:
Public. |
Description[Math builtins]: Cleanup in ieee754, restoring MSUN version of log2().
Also added comments for the fdlibm.js port of log10, chosen over the MSUN
version for greater precision.
R=bmeurer@chromium.org
BUG=
Committed: https://crrev.com/42279f16aff29ad5f52ce77019c3ff19ed5a35ec
Cr-Commit-Position: refs/heads/master@{#37060}
Patch Set 1 #
Total comments: 1
Patch Set 2 : Code comments. #Patch Set 3 : REBASE. #
Messages
Total messages: 13 (6 generated)
Hi Benedikt, This restores MSUN log2(), adds a test for log10(100000.0) (which was failing on the windows bots), and adds correct comments on the log10 implementation. Thanks! --Mike
LGTM with nit. https://codereview.chromium.org/2071823002/diff/1/src/base/ieee754.cc File src/base/ieee754.cc (right): https://codereview.chromium.org/2071823002/diff/1/src/base/ieee754.cc#newcode... src/base/ieee754.cc:1005: // ES6 draft 09-27-13, section 20.2.2.21. Nit: Remove the ES6 hint here. And use C-style /* */ commentary for consistency with the rest of the file.
The CQ bit was checked by mvstanton@chromium.org
The patchset sent to the CQ was uploaded after l-g-t-m from bmeurer@chromium.org Link to the patchset: https://codereview.chromium.org/2071823002/#ps20001 (title: "Code comments.")
CQ is trying da patch. Follow status at https://chromium-cq-status.appspot.com/patch-status/2071823002/20001
The CQ bit was unchecked by commit-bot@chromium.org
Failed to apply patch for src/base/ieee754.cc: While running git apply --index -3 -p1; error: patch failed: src/base/ieee754.cc:913 Falling back to three-way merge... Applied patch to 'src/base/ieee754.cc' with conflicts. U src/base/ieee754.cc Patch: src/base/ieee754.cc Index: src/base/ieee754.cc diff --git a/src/base/ieee754.cc b/src/base/ieee754.cc index e642b6327a9b29360db3c09ebc0235b15b3c9f0e..c0e9ebf0c5d2139d2fc0255e8d3f270fa6720e39 100644 --- a/src/base/ieee754.cc +++ b/src/base/ieee754.cc @@ -913,138 +913,25 @@ static inline double k_log1p(double f) { return s * (hfsq + R); } -// ES6 draft 09-27-13, section 20.2.2.22. -// Return the base 2 logarithm of x -// -// fdlibm does not have an explicit log2 function, but fdlibm's pow -// function does implement an accurate log2 function as part of the -// pow implementation. This extracts the core parts of that as a -// separate log2 function. -// -// Method: -// Compute log2(x) in two pieces: -// log2(x) = w1 + w2 -// where w1 has 53-24 = 29 bits of trailing zeroes. -double log2(double x) { - static const double - bp[] = {1.0, 1.5}, - dp_h[] = {0.0, 5.84962487220764160156e-01}, /* 0x3FE2B803, 0x40000000 */ - dp_l[] = {0.0, 1.35003920212974897128e-08}, /* 0x3E4CFDEB, 0x43CFD006 */ - zero = 0.0, one = 1.0, - // Polynomial coefficients for (3/2)*(log2(x) - 2*s - 2/3*s^3) - L1 = 5.99999999999994648725e-01, L2 = 4.28571428578550184252e-01, - L3 = 3.33333329818377432918e-01, L4 = 2.72728123808534006489e-01, - L5 = 2.30660745775561754067e-01, L6 = 2.06975017800338417784e-01, - // cp = 2/(3*ln(2)). Note that cp_h + cp_l is cp, but with more accuracy. - cp = 9.61796693925975554329e-01, cp_h = 9.61796700954437255859e-01, - cp_l = -7.02846165095275826516e-09, two53 = 9007199254740992, /* 2^53 */ - two54 = 1.80143985094819840000e+16; /* 0x43500000, 0x00000000 */ - - static volatile double vzero = 0.0; - double ax, z_h, z_l, p_h, p_l; - double t1, t2, r, t, u, v; - int32_t j, k, n; - int32_t ix, hx; - u_int32_t lx; - - EXTRACT_WORDS(hx, lx, x); - ix = hx & 0x7fffffff; - - // Handle special cases. - // log2(+/- 0) = -Infinity - if ((ix | lx) == 0) return -two54 / vzero; /* log(+-0)=-inf */ - - // log(x) = NaN, if x < 0 - if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */ - - // log2(Infinity) = Infinity, log2(NaN) = NaN - if (ix >= 0x7ff00000) return x; - - ax = fabs(x); - - double ss, s2, s_h, s_l, t_h, t_l; - n = 0; - - /* take care subnormal number */ - if (ix < 0x00100000) { - ax *= two53; - n -= 53; - GET_HIGH_WORD(ix, ax); - } - - n += ((ix) >> 20) - 0x3ff; - j = ix & 0x000fffff; - - /* determine interval */ - ix = j | 0x3ff00000; /* normalize ix */ - if (j <= 0x3988E) { - k = 0; /* |x|<sqrt(3/2) */ - } else if (j < 0xBB67A) { - k = 1; /* |x|<sqrt(3) */ - } else { - k = 0; - n += 1; - ix -= 0x00100000; - } - SET_HIGH_WORD(ax, ix); - - /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */ - u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */ - v = one / (ax + bp[k]); - ss = u * v; - s_h = ss; - SET_LOW_WORD(s_h, 0); - /* t_h=ax+bp[k] High */ - t_h = zero; - SET_HIGH_WORD(t_h, ((ix >> 1) | 0x20000000) + 0x00080000 + (k << 18)); - t_l = ax - (t_h - bp[k]); - s_l = v * ((u - s_h * t_h) - s_h * t_l); - /* compute log(ax) */ - s2 = ss * ss; - r = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6))))); - r += s_l * (s_h + ss); - s2 = s_h * s_h; - t_h = 3.0 + s2 + r; - SET_LOW_WORD(t_h, 0); - t_l = r - ((t_h - 3.0) - s2); - /* u+v = ss*(1+...) */ - u = s_h * t_h; - v = s_l * t_h + t_l * ss; - /* 2/(3log2)*(ss+...) */ - p_h = u + v; - SET_LOW_WORD(p_h, 0); - p_l = v - (p_h - u); - z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3*log2) */ - z_l = cp_l * p_h + p_l * cp + dp_l[k]; - /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */ - t = static_cast<double>(n); - t1 = (((z_h + z_l) + dp_h[k]) + t); - SET_LOW_WORD(t1, 0); - t2 = z_l - (((t1 - t) - dp_h[k]) - z_h); - - // t1 + t2 = log2(ax), sum up because we do not care about extra precision. - return t1 + t2; -} - /* - * Return the base 10 logarithm of x. See e_log.c and k_log.h for most + * Return the base 2 logarithm of x. See e_log.c and k_log.h for most * comments. * - * log10(x) = (f - 0.5*f*f + k_log1p(f)) / ln10 + k * log10(2) + * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel, + * then does the combining and scaling steps + * log2(x) = (f - 0.5*f*f + k_log1p(f)) / ln2 + k * in not-quite-routine extra precision. */ -double log10Old(double x) { +double log2(double x) { static const double - two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ - ivln10hi = 4.34294481878168880939e-01, /* 0x3fdbcb7b, 0x15200000 */ - ivln10lo = 2.50829467116452752298e-11, /* 0x3dbb9438, 0xca9aadd5 */ - log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */ - log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */ + two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ + ivln2hi = 1.44269504072144627571e+00, /* 0x3ff71547, 0x65200000 */ + ivln2lo = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */ static const double zero = 0.0; static volatile double vzero = 0.0; - double f, hfsq, hi, lo, r, val_hi, val_lo, w, y, y2; + double f, hfsq, hi, lo, r, val_hi, val_lo, w, y; int32_t i, k, hx; u_int32_t lx; @@ -1071,27 +958,76 @@ double log10Old(double x) { hfsq = 0.5 * f * f; r = k_log1p(f); - /* See e_log2.c for most details. */ + /* + * f-hfsq must (for args near 1) be evaluated in extra precision + * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2). + * This is fairly efficient since f-hfsq only depends on f, so can + * be evaluated in parallel with R. Not combining hfsq with R also + * keeps R small (though not as small as a true `lo' term would be), + * so that extra precision is not needed for terms involving R. + * + * Compiler bugs involving extra precision used to break Dekker's + * theorem for spitting f-hfsq as hi+lo, unless double_t was used + * or the multi-precision calculations were avoided when double_t + * has extra precision. These problems are now automatically + * avoided as a side effect of the optimization of combining the + * Dekker splitting step with the clear-low-bits step. + * + * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra + * precision to avoid a very large cancellation when x is very near + * these values. Unlike the above cancellations, this problem is + * specific to base 2. It is strange that adding +-1 is so much + * harder than adding +-ln2 or +-log10_2. + * + * This uses Dekker's theorem to normalize y+val_hi, so the + * compiler bugs are back in some configurations, sigh. And I + * don't want to used double_t to avoid them, since that gives a + * pessimization and the support for avoiding the pessimization + * is not yet available. + * + * The multi-precision calculations for the multiplications are + * routine. + */ hi = f - hfsq; SET_LOW_WORD(hi, 0); lo = (f - hi) - hfsq + r; - val_hi = hi * ivln10hi; - y2 = y * log10_2hi; - val_lo = y * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi; + val_hi = hi * ivln2hi; + val_lo = (lo + hi) * ivln2lo + lo * ivln2hi; - /* - * Extra precision in for adding y*log10_2hi is not strictly needed - * since there is no very large cancellation near x = sqrt(2) or - * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs - * with some parallelism and it reduces the error for many args. - */ - w = y2 + val_hi; - val_lo += (y2 - w) + val_hi; + /* spadd(val_hi, val_lo, y), except for not using double_t: */ + w = y + val_hi; + val_lo += (y - w) + val_hi; val_hi = w; return val_lo + val_hi; } +/* + * Return the base 10 logarithm of x + * + * Method : + * Let log10_2hi = leading 40 bits of log10(2) and + * log10_2lo = log10(2) - log10_2hi, + * ivln10 = 1/log(10) rounded. + * Then + * n = ilogb(x), + * if(n<0) n = n+1; + * x = scalbn(x,-n); + * log10(x) := n*log10_2hi + (n*log10_2lo + ivln10*log(x)) + * + * Note 1: + * To guarantee log10(10**n)=n, where 10**n is normal, the rounding + * mode must set to Round-to-Nearest. + * Note 2: + * [1/log(10)] rounded to 53 bits has error .198 ulps; + * log10 is monotonic at all binary break points. + * + * Special cases: + * log10(x) is NaN if x < 0; + * log10(+INF) is +INF; log10(0) is -INF; + * log10(NaN) is that NaN; + * log10(10**N) = N for N=0,1,...,22. + */ double log10(double x) { static const double two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
The CQ bit was checked by mvstanton@chromium.org
The patchset sent to the CQ was uploaded after l-g-t-m from bmeurer@chromium.org Link to the patchset: https://codereview.chromium.org/2071823002/#ps40001 (title: "REBASE.")
CQ is trying da patch. Follow status at https://chromium-cq-status.appspot.com/patch-status/2071823002/40001
Message was sent while issue was closed.
Committed patchset #3 (id:40001)
Message was sent while issue was closed.
Description was changed from ========== [Math builtins]: Cleanup in ieee754, restoring MSUN version of log2(). Also added comments for the fdlibm.js port of log10, chosen over the MSUN version for greater precision. R=bmeurer@chromium.org BUG= ========== to ========== [Math builtins]: Cleanup in ieee754, restoring MSUN version of log2(). Also added comments for the fdlibm.js port of log10, chosen over the MSUN version for greater precision. R=bmeurer@chromium.org BUG= Committed: https://crrev.com/42279f16aff29ad5f52ce77019c3ff19ed5a35ec Cr-Commit-Position: refs/heads/master@{#37060} ==========
Message was sent while issue was closed.
Patchset 3 (id:??) landed as https://crrev.com/42279f16aff29ad5f52ce77019c3ff19ed5a35ec Cr-Commit-Position: refs/heads/master@{#37060} |