src/base/ieee754.cc - Issue 2071823002: [Math builtins]: Cleanup in ieee754, restoring MSUN version of log2().

Side by Side Diff: src/base/ieee754.cc

Issue 2071823002: [Math builtins]: Cleanup in ieee754, restoring MSUN version of log2(). (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: REBASE. Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm).	1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm).

2 //	2 //

3 // ====================================================	3 // ====================================================

4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.	4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.

5 //	5 //

6 // Developed at SunSoft, a Sun Microsystems, Inc. business.	6 // Developed at SunSoft, a Sun Microsystems, Inc. business.

7 // Permission to use, copy, modify, and distribute this	7 // Permission to use, copy, modify, and distribute this

8 // software is freely granted, provided that this notice	8 // software is freely granted, provided that this notice

9 // is preserved.	9 // is preserved.

10 // ====================================================	10 // ====================================================

(...skipping 856 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
867 s = f / (2.0 + f);	867 s = f / (2.0 + f);

868 z = s * s;	868 z = s * s;

869 R = z * (Lp1 +	869 R = z * (Lp1 +

870 z * (Lp2 + z * (Lp3 + z * (Lp4 + z * (Lp5 + z * (Lp6 + z * Lp7))))));	870 z * (Lp2 + z * (Lp3 + z * (Lp4 + z * (Lp5 + z * (Lp6 + z * Lp7))))));

871 if (k == 0)	871 if (k == 0)

872 return f - (hfsq - s * (hfsq + R));	872 return f - (hfsq - s * (hfsq + R));

873 else	873 else

874 return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f);	874 return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f);

875 }	875 }

876	876

877 // ES6 draft 09-27-13, section 20.2.2.22.	877 /*

878 // Return the base 2 logarithm of x	878 * k_log1p(f):

879 //	879 * Return log(1+f) - f for 1+f in ~[sqrt(2)/2, sqrt(2)].

880 // fdlibm does not have an explicit log2 function, but fdlibm's pow	880 *

881 // function does implement an accurate log2 function as part of the	881 * The following describes the overall strategy for computing

882 // pow implementation. This extracts the core parts of that as a	882 * logarithms in base e. The argument reduction and adding the final

883 // separate log2 function.	883 * term of the polynomial are done by the caller for increased accuracy

884 //	884 * when different bases are used.

885 // Method:	885 *

886 // Compute log2(x) in two pieces:	886 * Method :

887 // log2(x) = w1 + w2	887 * 1. Argument Reduction: find k and f such that

888 // where w1 has 53-24 = 29 bits of trailing zeroes.	888 * x = 2^k * (1+f),

	889 * where sqrt(2)/2 < 1+f < sqrt(2) .

	890 *

	891 * 2. Approximation of log(1+f).

	892 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)

	893 * = 2s + 2/3 s3 + 2/5 s5 + .....,

	894 * = 2s + s*R

	895 * We use a special Reme algorithm on [0,0.1716] to generate

	896 * a polynomial of degree 14 to approximate R The maximum error

	897 * of this polynomial approximation is bounded by 2**-58.45. In

	898 * other words,

	899 * 2 4 6 8 10 12 14

	900 * R(z) ~ Lg1s +Lg2s +Lg3s +Lg4s +Lg5s +Lg6s +Lg7*s

	901 * (the values of Lg1 to Lg7 are listed in the program)

	902 * and

	903 * \| 2 14 \| -58.45

	904 * \| Lg1s +...+Lg7s - R(z) \| <= 2

	905 * \| \|

	906 * Note that 2s = f - sf = f - hfsq + shfsq, where hfsq = f*f/2.

	907 * In order to guarantee error in log below 1ulp, we compute log

	908 * by

	909 * log(1+f) = f - s*(f - R) (if f is not too large)

	910 * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)

	911 *

	912 * 3. Finally, log(x) = k*ln2 + log(1+f).

	913 * = kln2_hi+(f-(hfsq-(s(hfsq+R)+k*ln2_lo)))

	914 * Here ln2 is split into two floating point number:

	915 * ln2_hi + ln2_lo,

	916 * where n*ln2_hi is always exact for \|n\| < 2000.

	917 *

	918 * Special cases:

	919 * log(x) is NaN with signal if x < 0 (including -INF) ;

	920 * log(+INF) is +INF; log(0) is -INF with signal;

	921 * log(NaN) is that NaN with no signal.

	922 *

	923 * Accuracy:

	924 * according to an error analysis, the error is always less than

	925 * 1 ulp (unit in the last place).

	926 *

	927 * Constants:

	928 * The hexadecimal values are the intended ones for the following

	929 * constants. The decimal values may be used, provided that the

	930 * compiler will convert from decimal to binary accurately enough

	931 * to produce the hexadecimal values shown.

	932 */

	933

	934 static const double Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */

	935 Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */

	936 Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */

	937 Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */

	938 Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */

	939 Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */

	940 Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */

	941

	942 /*

	943 * We always inline k_log1p(), since doing so produces a

	944 * substantial performance improvement (~40% on amd64).

	945 */

	946 static inline double k_log1p(double f) {

	947 double hfsq, s, z, R, w, t1, t2;

	948

	949 s = f / (2.0 + f);

	950 z = s * s;

	951 w = z * z;

	952 t1 = w * (Lg2 + w * (Lg4 + w * Lg6));

	953 t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));

	954 R = t2 + t1;

	955 hfsq = 0.5 * f * f;

	956 return s * (hfsq + R);

	957 }

	958

	959 /*

	960 * Return the base 2 logarithm of x. See e_log.c and k_log.h for most

	961 * comments.

	962 *

	963 * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel,

	964 * then does the combining and scaling steps

	965 * log2(x) = (f - 0.5ff + k_log1p(f)) / ln2 + k

	966 * in not-quite-routine extra precision.

	967 */

889 double log2(double x) {	968 double log2(double x) {

890 static const double	969 static const double

891 bp[] = {1.0, 1.5},	970 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */

892 dp_h[] = {0.0, 5.84962487220764160156e-01}, /* 0x3FE2B803, 0x40000000 */	971 ivln2hi = 1.44269504072144627571e+00, /* 0x3ff71547, 0x65200000 */

893 dp_l[] = {0.0, 1.35003920212974897128e-08}, /* 0x3E4CFDEB, 0x43CFD006 */	972 ivln2lo = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */

894 zero = 0.0, one = 1.0,	973

895 // Polynomial coefficients for (3/2)(log2(x) - 2s - 2/3*s^3)	974 static const double zero = 0.0;

896 L1 = 5.99999999999994648725e-01, L2 = 4.28571428578550184252e-01,

897 L3 = 3.33333329818377432918e-01, L4 = 2.72728123808534006489e-01,

898 L5 = 2.30660745775561754067e-01, L6 = 2.06975017800338417784e-01,

899 // cp = 2/(3*ln(2)). Note that cp_h + cp_l is cp, but with more accuracy.

900 cp = 9.61796693925975554329e-01, cp_h = 9.61796700954437255859e-01,

901 cp_l = -7.02846165095275826516e-09, two53 = 9007199254740992, /* 2^53 */

902 two54 = 1.80143985094819840000e+16; /* 0x43500000, 0x00000000 */

903

904 static volatile double vzero = 0.0;	975 static volatile double vzero = 0.0;

905 double ax, z_h, z_l, p_h, p_l;	976

906 double t1, t2, r, t, u, v;	977 double f, hfsq, hi, lo, r, val_hi, val_lo, w, y;

907 int32_t j, k, n;	978 int32_t i, k, hx;

908 int32_t ix, hx;

909 u_int32_t lx;	979 u_int32_t lx;

910	980

911 EXTRACT_WORDS(hx, lx, x);	981 EXTRACT_WORDS(hx, lx, x);

912 ix = hx & 0x7fffffff;	982

913	983 k = 0;

914 // Handle special cases.	984 if (hx < 0x00100000) { /* x < 2*-1022 /

915 // log2(+/- 0) = -Infinity	985 if (((hx & 0x7fffffff) \| lx) == 0)

916 if ((ix \| lx) == 0) return -two54 / vzero; /* log(+-0)=-inf */	986 return -two54 / vzero; /* log(+-0)=-inf */

917	987 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */

918 // log(x) = NaN, if x < 0	988 k -= 54;

919 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */	989 x = two54; / subnormal number, scale up x */

920	990 GET_HIGH_WORD(hx, x);

921 // log2(Infinity) = Infinity, log2(NaN) = NaN

922 if (ix >= 0x7ff00000) return x;

923

924 ax = fabs(x);

925

926 double ss, s2, s_h, s_l, t_h, t_l;

927 n = 0;

928

929 /* take care subnormal number */

930 if (ix < 0x00100000) {

931 ax *= two53;

932 n -= 53;

933 GET_HIGH_WORD(ix, ax);

934 }	991 }

935	992 if (hx >= 0x7ff00000) return x + x;

936 n += ((ix) >> 20) - 0x3ff;	993 if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */

937 j = ix & 0x000fffff;	994 k += (hx >> 20) - 1023;

938	995 hx &= 0x000fffff;

939 /* determine interval */	996 i = (hx + 0x95f64) & 0x100000;

940 ix = j \| 0x3ff00000; /* normalize ix */	997 SET_HIGH_WORD(x, hx \| (i ^ 0x3ff00000)); /* normalize x or x/2 */

941 if (j <= 0x3988E) {	998 k += (i >> 20);

942 k = 0; /* \|x\|<sqrt(3/2) */	999 y = static_cast<double>(k);

943 } else if (j < 0xBB67A) {	1000 f = x - 1.0;

944 k = 1; /* \|x\|<sqrt(3) */	1001 hfsq = 0.5 * f * f;

945 } else {	1002 r = k_log1p(f);

946 k = 0;	1003

947 n += 1;	1004 /*

948 ix -= 0x00100000;	1005 * f-hfsq must (for args near 1) be evaluated in extra precision

949 }	1006 * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2).

950 SET_HIGH_WORD(ax, ix);	1007 * This is fairly efficient since f-hfsq only depends on f, so can

951	1008 * be evaluated in parallel with R. Not combining hfsq with R also

952 /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */	1009 * keeps R small (though not as small as a true `lo' term would be),

953 u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */	1010 * so that extra precision is not needed for terms involving R.

954 v = one / (ax + bp[k]);	1011 *

955 ss = u * v;	1012 * Compiler bugs involving extra precision used to break Dekker's

956 s_h = ss;	1013 * theorem for spitting f-hfsq as hi+lo, unless double_t was used

957 SET_LOW_WORD(s_h, 0);	1014 * or the multi-precision calculations were avoided when double_t

958 /* t_h=ax+bp[k] High */	1015 * has extra precision. These problems are now automatically

959 t_h = zero;	1016 * avoided as a side effect of the optimization of combining the

960 SET_HIGH_WORD(t_h, ((ix >> 1) \| 0x20000000) + 0x00080000 + (k << 18));	1017 * Dekker splitting step with the clear-low-bits step.

961 t_l = ax - (t_h - bp[k]);	1018 *

962 s_l = v * ((u - s_h * t_h) - s_h * t_l);	1019 * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra

963 /* compute log(ax) */	1020 * precision to avoid a very large cancellation when x is very near

964 s2 = ss * ss;	1021 * these values. Unlike the above cancellations, this problem is

965 r = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6)))));	1022 * specific to base 2. It is strange that adding +-1 is so much

966 r += s_l * (s_h + ss);	1023 * harder than adding +-ln2 or +-log10_2.

967 s2 = s_h * s_h;	1024 *

968 t_h = 3.0 + s2 + r;	1025 * This uses Dekker's theorem to normalize y+val_hi, so the

969 SET_LOW_WORD(t_h, 0);	1026 * compiler bugs are back in some configurations, sigh. And I

970 t_l = r - ((t_h - 3.0) - s2);	1027 * don't want to used double_t to avoid them, since that gives a

971 /* u+v = ss(1+...) /	1028 * pessimization and the support for avoiding the pessimization

972 u = s_h * t_h;	1029 * is not yet available.

973 v = s_l * t_h + t_l * ss;	1030 *

974 /* 2/(3log2)(ss+...) /	1031 * The multi-precision calculations for the multiplications are

975 p_h = u + v;	1032 * routine.

976 SET_LOW_WORD(p_h, 0);	1033 */

977 p_l = v - (p_h - u);	1034 hi = f - hfsq;

978 z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3log2) /	1035 SET_LOW_WORD(hi, 0);

979 z_l = cp_l * p_h + p_l * cp + dp_l[k];	1036 lo = (f - hi) - hfsq + r;

980 /* log2(ax) = (ss+..)2/(3log2) = n + dp_h + z_h + z_l */	1037 val_hi = hi * ivln2hi;

981 t = static_cast<double>(n);	1038 val_lo = (lo + hi) * ivln2lo + lo * ivln2hi;

982 t1 = (((z_h + z_l) + dp_h[k]) + t);	1039

983 SET_LOW_WORD(t1, 0);	1040 /* spadd(val_hi, val_lo, y), except for not using double_t: */

984 t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);	1041 w = y + val_hi;

985	1042 val_lo += (y - w) + val_hi;

986 // t1 + t2 = log2(ax), sum up because we do not care about extra precision.	1043 val_hi = w;

987 return t1 + t2;	1044

	1045 return val_lo + val_hi;

988 }	1046 }

989	1047

	1048 /*

	1049 * Return the base 10 logarithm of x

	1050 *

	1051 * Method :

	1052 * Let log10_2hi = leading 40 bits of log10(2) and

	1053 * log10_2lo = log10(2) - log10_2hi,

	1054 * ivln10 = 1/log(10) rounded.

	1055 * Then

	1056 * n = ilogb(x),

	1057 * if(n<0) n = n+1;

	1058 * x = scalbn(x,-n);

	1059 * log10(x) := nlog10_2hi + (nlog10_2lo + ivln10*log(x))

	1060 *

	1061 * Note 1:

	1062 * To guarantee log10(10n)=n, where 10n is normal, the rounding

	1063 * mode must set to Round-to-Nearest.

	1064 * Note 2:

	1065 * [1/log(10)] rounded to 53 bits has error .198 ulps;

	1066 * log10 is monotonic at all binary break points.

	1067 *

	1068 * Special cases:

	1069 * log10(x) is NaN if x < 0;

	1070 * log10(+INF) is +INF; log10(0) is -INF;

	1071 * log10(NaN) is that NaN;

	1072 * log10(10**N) = N for N=0,1,...,22.

	1073 */

990 double log10(double x) {	1074 double log10(double x) {

991 static const double	1075 static const double

992 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */	1076 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */

993 ivln10 = 4.34294481903251816668e-01,	1077 ivln10 = 4.34294481903251816668e-01,

994 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */	1078 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */

995 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */	1079 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */

996	1080

997 static const double zero = 0.0;	1081 static const double zero = 0.0;

998 static volatile double vzero = 0.0;	1082 static volatile double vzero = 0.0;

999	1083

(...skipping 322 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1322 w = t + t; /* t+t is exact */	1406 w = t + t; /* t+t is exact */

1323 r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3t /	1407 r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3t /

1324 t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */	1408 t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */

1325	1409

1326 return (t);	1410 return (t);

1327 }	1411 }

1328	1412

1329 } // namespace ieee754	1413 } // namespace ieee754

1330 } // namespace base	1414 } // namespace base

1331 } // namespace v8	1415 } // namespace v8

OLD	NEW

« no previous file with comments | « no previous file | test/unittests/base/ieee754-unittest.cc » ('j') | no next file with comments »