src/base/ieee754.cc - Issue 2071823002: [Math builtins]: Cleanup in ieee754, restoring MSUN version of log2().

Unified Diff: src/base/ieee754.cc

Issue 2071823002: [Math builtins]: Cleanup in ieee754, restoring MSUN version of log2(). (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/base/ieee754.cc

diff --git a/src/base/ieee754.cc b/src/base/ieee754.cc

index e642b6327a9b29360db3c09ebc0235b15b3c9f0e..6864d89b6cd53bc22ae5b07461d1eb950b89ab98 100644

--- a/src/base/ieee754.cc

+++ b/src/base/ieee754.cc

@@ -913,138 +913,25 @@ static inline double k_log1p(double f) {

return s * (hfsq + R);

}

-// ES6 draft 09-27-13, section 20.2.2.22.

-// Return the base 2 logarithm of x

-//

-// fdlibm does not have an explicit log2 function, but fdlibm's pow

-// function does implement an accurate log2 function as part of the

-// pow implementation. This extracts the core parts of that as a

-// separate log2 function.

-//

-// Method:

-// Compute log2(x) in two pieces:

-// log2(x) = w1 + w2

-// where w1 has 53-24 = 29 bits of trailing zeroes.

-double log2(double x) {

- static const double

- bp[] = {1.0, 1.5},

- dp_h[] = {0.0, 5.84962487220764160156e-01}, /* 0x3FE2B803, 0x40000000 */

- dp_l[] = {0.0, 1.35003920212974897128e-08}, /* 0x3E4CFDEB, 0x43CFD006 */

- zero = 0.0, one = 1.0,

- // Polynomial coefficients for (3/2)*(log2(x) - 2*s - 2/3*s^3)

- L1 = 5.99999999999994648725e-01, L2 = 4.28571428578550184252e-01,

- L3 = 3.33333329818377432918e-01, L4 = 2.72728123808534006489e-01,

- L5 = 2.30660745775561754067e-01, L6 = 2.06975017800338417784e-01,

- // cp = 2/(3*ln(2)). Note that cp_h + cp_l is cp, but with more accuracy.

- cp = 9.61796693925975554329e-01, cp_h = 9.61796700954437255859e-01,

- cp_l = -7.02846165095275826516e-09, two53 = 9007199254740992, /* 2^53 */

- two54 = 1.80143985094819840000e+16; /* 0x43500000, 0x00000000 */

- static volatile double vzero = 0.0;

- double ax, z_h, z_l, p_h, p_l;

- double t1, t2, r, t, u, v;

- int32_t j, k, n;

- int32_t ix, hx;

- u_int32_t lx;

- EXTRACT_WORDS(hx, lx, x);

- ix = hx & 0x7fffffff;

- // Handle special cases.

- // log2(+/- 0) = -Infinity

- if ((ix | lx) == 0) return -two54 / vzero; /* log(+-0)=-inf */

- // log(x) = NaN, if x < 0

- if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */

- // log2(Infinity) = Infinity, log2(NaN) = NaN

- if (ix >= 0x7ff00000) return x;

- ax = fabs(x);

- double ss, s2, s_h, s_l, t_h, t_l;

- n = 0;

- /* take care subnormal number */

- if (ix < 0x00100000) {

- ax *= two53;

- n -= 53;

- GET_HIGH_WORD(ix, ax);

- }

- n += ((ix) >> 20) - 0x3ff;

- j = ix & 0x000fffff;

- /* determine interval */

- ix = j | 0x3ff00000; /* normalize ix */

- if (j <= 0x3988E) {

- k = 0; /* |x|<sqrt(3/2) */

- } else if (j < 0xBB67A) {

- k = 1; /* |x|<sqrt(3) */

- } else {

- k = 0;

- n += 1;

- ix -= 0x00100000;

- }

- SET_HIGH_WORD(ax, ix);

- /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */

- u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */

- v = one / (ax + bp[k]);

- ss = u * v;

- s_h = ss;

- SET_LOW_WORD(s_h, 0);

- /* t_h=ax+bp[k] High */

- t_h = zero;

- SET_HIGH_WORD(t_h, ((ix >> 1) | 0x20000000) + 0x00080000 + (k << 18));

- t_l = ax - (t_h - bp[k]);

- s_l = v * ((u - s_h * t_h) - s_h * t_l);

- /* compute log(ax) */

- s2 = ss * ss;

- r = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6)))));

- r += s_l * (s_h + ss);

- s2 = s_h * s_h;

- t_h = 3.0 + s2 + r;

- SET_LOW_WORD(t_h, 0);

- t_l = r - ((t_h - 3.0) - s2);

- /* u+v = ss*(1+...) */

- u = s_h * t_h;

- v = s_l * t_h + t_l * ss;

- /* 2/(3log2)*(ss+...) */

- p_h = u + v;

- SET_LOW_WORD(p_h, 0);

- p_l = v - (p_h - u);

- z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3*log2) */

- z_l = cp_l * p_h + p_l * cp + dp_l[k];

- /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */

- t = static_cast<double>(n);

- t1 = (((z_h + z_l) + dp_h[k]) + t);

- SET_LOW_WORD(t1, 0);

- t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);

- // t1 + t2 = log2(ax), sum up because we do not care about extra precision.

- return t1 + t2;

- * Return the base 10 logarithm of x. See e_log.c and k_log.h for most

+ * Return the base 2 logarithm of x. See e_log.c and k_log.h for most

* comments.

- * log10(x) = (f - 0.5*f*f + k_log1p(f)) / ln10 + k * log10(2)

+ * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel,

+ * then does the combining and scaling steps

+ * log2(x) = (f - 0.5*f*f + k_log1p(f)) / ln2 + k

* in not-quite-routine extra precision.

-double log10Old(double x) {

+double log2(double x) {

static const double

- two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */

- ivln10hi = 4.34294481878168880939e-01, /* 0x3fdbcb7b, 0x15200000 */

- ivln10lo = 2.50829467116452752298e-11, /* 0x3dbb9438, 0xca9aadd5 */

- log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */

- log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */

+ two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */

+ ivln2hi = 1.44269504072144627571e+00, /* 0x3ff71547, 0x65200000 */

+ ivln2lo = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */

static const double zero = 0.0;

static volatile double vzero = 0.0;

- double f, hfsq, hi, lo, r, val_hi, val_lo, w, y, y2;

+ double f, hfsq, hi, lo, r, val_hi, val_lo, w, y;

int32_t i, k, hx;

u_int32_t lx;

@@ -1071,27 +958,75 @@ double log10Old(double x) {

hfsq = 0.5 * f * f;

r = k_log1p(f);

- /* See e_log2.c for most details. */

+ /*

+ * f-hfsq must (for args near 1) be evaluated in extra precision

+ * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2).

+ * This is fairly efficient since f-hfsq only depends on f, so can

+ * be evaluated in parallel with R. Not combining hfsq with R also

+ * keeps R small (though not as small as a true `lo' term would be),

+ * so that extra precision is not needed for terms involving R.

+ *

+ * Compiler bugs involving extra precision used to break Dekker's

+ * theorem for spitting f-hfsq as hi+lo, unless double_t was used

+ * or the multi-precision calculations were avoided when double_t

+ * has extra precision. These problems are now automatically

+ * avoided as a side effect of the optimization of combining the

+ * Dekker splitting step with the clear-low-bits step.

+ *

+ * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra

+ * precision to avoid a very large cancellation when x is very near

+ * these values. Unlike the above cancellations, this problem is

+ * specific to base 2. It is strange that adding +-1 is so much

+ * harder than adding +-ln2 or +-log10_2.

+ *

+ * This uses Dekker's theorem to normalize y+val_hi, so the

+ * compiler bugs are back in some configurations, sigh. And I

+ * don't want to used double_t to avoid them, since that gives a

+ * pessimization and the support for avoiding the pessimization

+ * is not yet available.

+ *

+ * The multi-precision calculations for the multiplications are

+ * routine.

+ */

hi = f - hfsq;

SET_LOW_WORD(hi, 0);

lo = (f - hi) - hfsq + r;

- val_hi = hi * ivln10hi;

- y2 = y * log10_2hi;

- val_lo = y * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi;

+ val_hi = hi * ivln2hi;

+ val_lo = (lo + hi) * ivln2lo + lo * ivln2hi;

- /*

- * Extra precision in for adding y*log10_2hi is not strictly needed

- * since there is no very large cancellation near x = sqrt(2) or

- * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs

- * with some parallelism and it reduces the error for many args.

- */

- w = y2 + val_hi;

- val_lo += (y2 - w) + val_hi;

+ /* spadd(val_hi, val_lo, y), except for not using double_t: */

+ w = y + val_hi;

+ val_lo += (y - w) + val_hi;

val_hi = w;

return val_lo + val_hi;

}

+// ES6 draft 09-27-13, section 20.2.2.21.

Benedikt Meurer 2016/06/16 16:47:34 Nit: Remove the ES6 hint here. And use C-style /*

+// Return the base 10 logarithm of x

+//

+// Method :

+// Let log10_2hi = leading 40 bits of log10(2) and

+// log10_2lo = log10(2) - log10_2hi,

+// ivln10 = 1/log(10) rounded.

+// Then

+// n = ilogb(x),

+// if(n<0) n = n+1;

+// x = scalbn(x,-n);

+// log10(x) := n*log10_2hi + (n*log10_2lo + ivln10*log(x))

+//

+// Note 1:

+// To guarantee log10(10**n)=n, where 10**n is normal, the rounding

+// mode must set to Round-to-Nearest.

+// Note 2:

+// [1/log(10)] rounded to 53 bits has error .198 ulps;

+// log10 is monotonic at all binary break points.

+//

+// Special cases:

+// log10(x) is NaN if x < 0;

+// log10(+INF) is +INF; log10(0) is -INF;

+// log10(NaN) is that NaN;

+// log10(10**N) = N for N=0,1,...,22.

double log10(double x) {

static const double

two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */

« no previous file with comments | « no previous file | test/unittests/base/ieee754-unittest.cc » ('j') | no next file with comments »