src/base/ieee754.cc - Issue 2063693002: [builtins] Introduce proper Float64Log2 and Float64Log10 operators.

Side by Side Diff: src/base/ieee754.cc

Issue 2063693002: [builtins] Introduce proper Float64Log2 and Float64Log10 operators. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: More precise log10. Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm).	1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm).

2 //	2 //

3 // ====================================================	3 // ====================================================

4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.	4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.

5 //	5 //

6 // Developed at SunSoft, a Sun Microsystems, Inc. business.	6 // Developed at SunSoft, a Sun Microsystems, Inc. business.

7 // Permission to use, copy, modify, and distribute this	7 // Permission to use, copy, modify, and distribute this

8 // software is freely granted, provided that this notice	8 // software is freely granted, provided that this notice

9 // is preserved.	9 // is preserved.

10 // ====================================================	10 // ====================================================

(...skipping 379 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
390 default: /* case 3 */	390 default: /* case 3 */

391 return (z - pi_lo) - pi; /* atan(-,-) */	391 return (z - pi_lo) - pi; /* atan(-,-) */

392 }	392 }

393 }	393 }

394	394

395 /* log(x)	395 /* log(x)

396 * Return the logrithm of x	396 * Return the logrithm of x

397 *	397 *

398 * Method :	398 * Method :

399 * 1. Argument Reduction: find k and f such that	399 * 1. Argument Reduction: find k and f such that

400 * x = 2^k * (1+f),	400 * x = 2^k * (1+f),

401 * where sqrt(2)/2 < 1+f < sqrt(2) .	401 * where sqrt(2)/2 < 1+f < sqrt(2) .

402 *	402 *

403 * 2. Approximation of log(1+f).	403 * 2. Approximation of log(1+f).

404 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)	404 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)

405 * = 2s + 2/3 s3 + 2/5 s5 + .....,	405 * = 2s + 2/3 s3 + 2/5 s5 + .....,

406 * = 2s + s*R	406 * = 2s + s*R

407 * We use a special Reme algorithm on [0,0.1716] to generate	407 * We use a special Reme algorithm on [0,0.1716] to generate

408 * a polynomial of degree 14 to approximate R The maximum error	408 * a polynomial of degree 14 to approximate R The maximum error

409 * of this polynomial approximation is bounded by 2**-58.45. In	409 * of this polynomial approximation is bounded by 2**-58.45. In

410 * other words,	410 * other words,

(...skipping 267 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
678 s = f / (2.0 + f);	678 s = f / (2.0 + f);

679 z = s * s;	679 z = s * s;

680 R = z * (Lp1 +	680 R = z * (Lp1 +

681 z * (Lp2 + z * (Lp3 + z * (Lp4 + z * (Lp5 + z * (Lp6 + z * Lp7))))));	681 z * (Lp2 + z * (Lp3 + z * (Lp4 + z * (Lp5 + z * (Lp6 + z * Lp7))))));

682 if (k == 0)	682 if (k == 0)

683 return f - (hfsq - s * (hfsq + R));	683 return f - (hfsq - s * (hfsq + R));

684 else	684 else

685 return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f);	685 return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f);

686 }	686 }

687	687

	688 /*

	689 * k_log1p(f):

	690 * Return log(1+f) - f for 1+f in ~[sqrt(2)/2, sqrt(2)].

	691 *

	692 * The following describes the overall strategy for computing

	693 * logarithms in base e. The argument reduction and adding the final

	694 * term of the polynomial are done by the caller for increased accuracy

	695 * when different bases are used.

	696 *

	697 * Method :

	698 * 1. Argument Reduction: find k and f such that

	699 * x = 2^k * (1+f),

	700 * where sqrt(2)/2 < 1+f < sqrt(2) .

	701 *

	702 * 2. Approximation of log(1+f).

	703 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)

	704 * = 2s + 2/3 s3 + 2/5 s5 + .....,

	705 * = 2s + s*R

	706 * We use a special Reme algorithm on [0,0.1716] to generate

	707 * a polynomial of degree 14 to approximate R The maximum error

	708 * of this polynomial approximation is bounded by 2**-58.45. In

	709 * other words,

	710 * 2 4 6 8 10 12 14

	711 * R(z) ~ Lg1s +Lg2s +Lg3s +Lg4s +Lg5s +Lg6s +Lg7*s

	712 * (the values of Lg1 to Lg7 are listed in the program)

	713 * and

	714 * \| 2 14 \| -58.45

	715 * \| Lg1s +...+Lg7s - R(z) \| <= 2

	716 * \| \|

	717 * Note that 2s = f - sf = f - hfsq + shfsq, where hfsq = f*f/2.

	718 * In order to guarantee error in log below 1ulp, we compute log

	719 * by

	720 * log(1+f) = f - s*(f - R) (if f is not too large)

	721 * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)

	722 *

	723 * 3. Finally, log(x) = k*ln2 + log(1+f).

	724 * = kln2_hi+(f-(hfsq-(s(hfsq+R)+k*ln2_lo)))

	725 * Here ln2 is split into two floating point number:

	726 * ln2_hi + ln2_lo,

	727 * where n*ln2_hi is always exact for \|n\| < 2000.

	728 *

	729 * Special cases:

	730 * log(x) is NaN with signal if x < 0 (including -INF) ;

	731 * log(+INF) is +INF; log(0) is -INF with signal;

	732 * log(NaN) is that NaN with no signal.

	733 *

	734 * Accuracy:

	735 * according to an error analysis, the error is always less than

	736 * 1 ulp (unit in the last place).

	737 *

	738 * Constants:

	739 * The hexadecimal values are the intended ones for the following

	740 * constants. The decimal values may be used, provided that the

	741 * compiler will convert from decimal to binary accurately enough

	742 * to produce the hexadecimal values shown.

	743 */

	744

	745 static const double Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */

	746 Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */

	747 Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */

	748 Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */

	749 Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */

	750 Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */

	751 Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */

	752

	753 /*

	754 * We always inline k_log1p(), since doing so produces a

	755 * substantial performance improvement (~40% on amd64).

	756 */

	757 static inline double k_log1p(double f) {

	758 double hfsq, s, z, R, w, t1, t2;

	759

	760 s = f / (2.0 + f);

	761 z = s * s;

	762 w = z * z;

	763 t1 = w * (Lg2 + w * (Lg4 + w * Lg6));

	764 t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));

	765 R = t2 + t1;

	766 hfsq = 0.5 * f * f;

	767 return s * (hfsq + R);

	768 }

	769

	770 // ES6 draft 09-27-13, section 20.2.2.22.

	771 // Return the base 2 logarithm of x

	772 //

	773 // fdlibm does not have an explicit log2 function, but fdlibm's pow

	774 // function does implement an accurate log2 function as part of the

	775 // pow implementation. This extracts the core parts of that as a

	776 // separate log2 function.

	777 //

	778 // Method:

	779 // Compute log2(x) in two pieces:

	780 // log2(x) = w1 + w2

	781 // where w1 has 53-24 = 29 bits of trailing zeroes.

	782 double log2(double x) {

	783 static const double

	784 bp[] = {1.0, 1.5},

	785 dp_h[] = {0.0, 5.84962487220764160156e-01}, /* 0x3FE2B803, 0x40000000 */

	786 dp_l[] = {0.0, 1.35003920212974897128e-08}, /* 0x3E4CFDEB, 0x43CFD006 */

	787 zero = 0.0, one = 1.0,

	788 // Polynomial coefficients for (3/2)(log2(x) - 2s - 2/3*s^3)

	789 L1 = 5.99999999999994648725e-01, L2 = 4.28571428578550184252e-01,

	790 L3 = 3.33333329818377432918e-01, L4 = 2.72728123808534006489e-01,

	791 L5 = 2.30660745775561754067e-01, L6 = 2.06975017800338417784e-01,

	792 // cp = 2/(3*ln(2)). Note that cp_h + cp_l is cp, but with more accuracy.

	793 cp = 9.61796693925975554329e-01, cp_h = 9.61796700954437255859e-01,

	794 cp_l = -7.02846165095275826516e-09, two53 = 9007199254740992, /* 2^53 */

	795 two54 = 1.80143985094819840000e+16; /* 0x43500000, 0x00000000 */

	796

	797 static volatile double vzero = 0.0;

	798 double ax, z_h, z_l, p_h, p_l;

	799 double t1, t2, r, t, u, v;

	800 int32_t j, k, n;

	801 int32_t ix, hx;

	802 u_int32_t lx;

	803

	804 EXTRACT_WORDS(hx, lx, x);

	805 ix = hx & 0x7fffffff;

	806

	807 // Handle special cases.

	808 // log2(+/- 0) = -Infinity

	809 if ((ix \| lx) == 0) return -two54 / vzero; /* log(+-0)=-inf */

	810

	811 // log(x) = NaN, if x < 0

	812 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */

	813

	814 // log2(Infinity) = Infinity, log2(NaN) = NaN

	815 if (ix >= 0x7ff00000) return x;

	816

	817 ax = fabs(x);

	818

	819 double ss, s2, s_h, s_l, t_h, t_l;

	820 n = 0;

	821

	822 /* take care subnormal number */

	823 if (ix < 0x00100000) {

	824 ax *= two53;

	825 n -= 53;

	826 GET_HIGH_WORD(ix, ax);

	827 }

	828

	829 n += ((ix) >> 20) - 0x3ff;

	830 j = ix & 0x000fffff;

	831

	832 /* determine interval */

	833 ix = j \| 0x3ff00000; /* normalize ix */

	834 if (j <= 0x3988E) {

	835 k = 0; /* \|x\|<sqrt(3/2) */

	836 } else if (j < 0xBB67A) {

	837 k = 1; /* \|x\|<sqrt(3) */

	838 } else {

	839 k = 0;

	840 n += 1;

	841 ix -= 0x00100000;

	842 }

	843 SET_HIGH_WORD(ax, ix);

	844

	845 /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */

	846 u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */

	847 v = one / (ax + bp[k]);

	848 ss = u * v;

	849 s_h = ss;

	850 SET_LOW_WORD(s_h, 0);

	851 /* t_h=ax+bp[k] High */

	852 t_h = zero;

	853 SET_HIGH_WORD(t_h, ((ix >> 1) \| 0x20000000) + 0x00080000 + (k << 18));

	854 t_l = ax - (t_h - bp[k]);

	855 s_l = v * ((u - s_h * t_h) - s_h * t_l);

	856 /* compute log(ax) */

	857 s2 = ss * ss;

	858 r = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6)))));

	859 r += s_l * (s_h + ss);

	860 s2 = s_h * s_h;

	861 t_h = 3.0 + s2 + r;

	862 SET_LOW_WORD(t_h, 0);

	863 t_l = r - ((t_h - 3.0) - s2);

	864 /* u+v = ss(1+...) /

	865 u = s_h * t_h;

	866 v = s_l * t_h + t_l * ss;

	867 /* 2/(3log2)(ss+...) /

	868 p_h = u + v;

	869 SET_LOW_WORD(p_h, 0);

	870 p_l = v - (p_h - u);

	871 z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3log2) /

	872 z_l = cp_l * p_h + p_l * cp + dp_l[k];

	873 /* log2(ax) = (ss+..)2/(3log2) = n + dp_h + z_h + z_l */

	874 t = static_cast<double>(n);

	875 t1 = (((z_h + z_l) + dp_h[k]) + t);

	876 SET_LOW_WORD(t1, 0);

	877 t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);

	878

	879 // t1 + t2 = log2(ax), sum up because we do not care about extra precision.

	880 return t1 + t2;

	881 }

	882

	883 /*

	884 * Return the base 10 logarithm of x. See e_log.c and k_log.h for most

	885 * comments.

	886 *

	887 * log10(x) = (f - 0.5ff + k_log1p(f)) / ln10 + k * log10(2)

	888 * in not-quite-routine extra precision.

	889 */

	890 double log10Old(double x) {

	891 static const double

	892 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */

	893 ivln10hi = 4.34294481878168880939e-01, /* 0x3fdbcb7b, 0x15200000 */

	894 ivln10lo = 2.50829467116452752298e-11, /* 0x3dbb9438, 0xca9aadd5 */

	895 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */

	896 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */

	897

	898 static const double zero = 0.0;

	899 static volatile double vzero = 0.0;

	900

	901 double f, hfsq, hi, lo, r, val_hi, val_lo, w, y, y2;

	902 int32_t i, k, hx;

	903 u_int32_t lx;

	904

	905 EXTRACT_WORDS(hx, lx, x);

	906

	907 k = 0;

	908 if (hx < 0x00100000) { /* x < 2*-1022 /

	909 if (((hx & 0x7fffffff) \| lx) == 0)

	910 return -two54 / vzero; /* log(+-0)=-inf */

	911 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */

	912 k -= 54;

	913 x = two54; / subnormal number, scale up x */

	914 GET_HIGH_WORD(hx, x);

	915 }

	916 if (hx >= 0x7ff00000) return x + x;

	917 if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */

	918 k += (hx >> 20) - 1023;

	919 hx &= 0x000fffff;

	920 i = (hx + 0x95f64) & 0x100000;

	921 SET_HIGH_WORD(x, hx \| (i ^ 0x3ff00000)); /* normalize x or x/2 */

	922 k += (i >> 20);

	923 y = static_cast<double>(k);

	924 f = x - 1.0;

	925 hfsq = 0.5 * f * f;

	926 r = k_log1p(f);

	927

	928 /* See e_log2.c for most details. */

	929 hi = f - hfsq;

	930 SET_LOW_WORD(hi, 0);

	931 lo = (f - hi) - hfsq + r;

	932 val_hi = hi * ivln10hi;

	933 y2 = y * log10_2hi;

	934 val_lo = y * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi;

	935

	936 /*

	937 * Extra precision in for adding y*log10_2hi is not strictly needed

	938 * since there is no very large cancellation near x = sqrt(2) or

	939 * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs

	940 * with some parallelism and it reduces the error for many args.

	941 */

	942 w = y2 + val_hi;

	943 val_lo += (y2 - w) + val_hi;

	944 val_hi = w;

	945

	946 return val_lo + val_hi;

	947 }

	948

	949 double log10(double x) {

	950 static const double

	951 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */

	952 ivln10 = 4.34294481903251816668e-01,

	953 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */

	954 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */

	955

	956 static const double zero = 0.0;

	957 static volatile double vzero = 0.0;

	958

	959 double y;

	960 int32_t i, k, hx;

	961 u_int32_t lx;

	962

	963 EXTRACT_WORDS(hx, lx, x);

	964

	965 k = 0;

	966 if (hx < 0x00100000) { /* x < 2*-1022 /

	967 if (((hx & 0x7fffffff) \| lx) == 0)

	968 return -two54 / vzero; /* log(+-0)=-inf */

	969 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */

	970 k -= 54;

	971 x = two54; / subnormal number, scale up x */

	972 GET_HIGH_WORD(hx, x);

	973 GET_LOW_WORD(lx, x);

	974 }

	975 if (hx >= 0x7ff00000) return x + x;

	976 if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */

	977 k += (hx >> 20) - 1023;

	978

	979 i = (k & 0x80000000) >> 31;

	980 hx = (hx & 0x000fffff) \| ((0x3ff - i) << 20);

	981 y = k + i;

	982 SET_HIGH_WORD(x, hx);

	983 SET_LOW_WORD(x, lx);

	984

	985 double z = y * log10_2lo + ivln10 * log(x);

	986 return z + y * log10_2hi;

	987 }

	988

688 } // namespace ieee754	989 } // namespace ieee754

689 } // namespace base	990 } // namespace base

690 } // namespace v8	991 } // namespace v8

OLD	NEW

« no previous file with comments | « src/base/ieee754.h ('k') | src/bootstrapper.cc » ('j') | no next file with comments »