src/base/ieee754.cc - Issue 2071823002: [Math builtins]: Cleanup in ieee754, restoring MSUN version of log2().

Side by Side Diff: src/base/ieee754.cc

Issue 2071823002: [Math builtins]: Cleanup in ieee754, restoring MSUN version of log2(). (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm).	1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm).

2 //	2 //

3 // ====================================================	3 // ====================================================

4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.	4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.

5 //	5 //

6 // Developed at SunSoft, a Sun Microsystems, Inc. business.	6 // Developed at SunSoft, a Sun Microsystems, Inc. business.

7 // Permission to use, copy, modify, and distribute this	7 // Permission to use, copy, modify, and distribute this

8 // software is freely granted, provided that this notice	8 // software is freely granted, provided that this notice

9 // is preserved.	9 // is preserved.

10 // ====================================================	10 // ====================================================

(...skipping 895 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
906 s = f / (2.0 + f);	906 s = f / (2.0 + f);

907 z = s * s;	907 z = s * s;

908 w = z * z;	908 w = z * z;

909 t1 = w * (Lg2 + w * (Lg4 + w * Lg6));	909 t1 = w * (Lg2 + w * (Lg4 + w * Lg6));

910 t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));	910 t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));

911 R = t2 + t1;	911 R = t2 + t1;

912 hfsq = 0.5 * f * f;	912 hfsq = 0.5 * f * f;

913 return s * (hfsq + R);	913 return s * (hfsq + R);

914 }	914 }

915	915

916 // ES6 draft 09-27-13, section 20.2.2.22.	916 /*

917 // Return the base 2 logarithm of x	917 * Return the base 2 logarithm of x. See e_log.c and k_log.h for most

918 //	918 * comments.

919 // fdlibm does not have an explicit log2 function, but fdlibm's pow	919 *

920 // function does implement an accurate log2 function as part of the	920 * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel,

921 // pow implementation. This extracts the core parts of that as a	921 * then does the combining and scaling steps

922 // separate log2 function.	922 * log2(x) = (f - 0.5ff + k_log1p(f)) / ln2 + k

923 //	923 * in not-quite-routine extra precision.

924 // Method:	924 */

925 // Compute log2(x) in two pieces:

926 // log2(x) = w1 + w2

927 // where w1 has 53-24 = 29 bits of trailing zeroes.

928 double log2(double x) {	925 double log2(double x) {

929 static const double	926 static const double

930 bp[] = {1.0, 1.5},	927 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */

931 dp_h[] = {0.0, 5.84962487220764160156e-01}, /* 0x3FE2B803, 0x40000000 */	928 ivln2hi = 1.44269504072144627571e+00, /* 0x3ff71547, 0x65200000 */

932 dp_l[] = {0.0, 1.35003920212974897128e-08}, /* 0x3E4CFDEB, 0x43CFD006 */	929 ivln2lo = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */

933 zero = 0.0, one = 1.0,

934 // Polynomial coefficients for (3/2)(log2(x) - 2s - 2/3*s^3)

935 L1 = 5.99999999999994648725e-01, L2 = 4.28571428578550184252e-01,

936 L3 = 3.33333329818377432918e-01, L4 = 2.72728123808534006489e-01,

937 L5 = 2.30660745775561754067e-01, L6 = 2.06975017800338417784e-01,

938 // cp = 2/(3*ln(2)). Note that cp_h + cp_l is cp, but with more accuracy.

939 cp = 9.61796693925975554329e-01, cp_h = 9.61796700954437255859e-01,

940 cp_l = -7.02846165095275826516e-09, two53 = 9007199254740992, /* 2^53 */

941 two54 = 1.80143985094819840000e+16; /* 0x43500000, 0x00000000 */

942

943 static volatile double vzero = 0.0;

944 double ax, z_h, z_l, p_h, p_l;

945 double t1, t2, r, t, u, v;

946 int32_t j, k, n;

947 int32_t ix, hx;

948 u_int32_t lx;

949

950 EXTRACT_WORDS(hx, lx, x);

951 ix = hx & 0x7fffffff;

952

953 // Handle special cases.

954 // log2(+/- 0) = -Infinity

955 if ((ix \| lx) == 0) return -two54 / vzero; /* log(+-0)=-inf */

956

957 // log(x) = NaN, if x < 0

958 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */

959

960 // log2(Infinity) = Infinity, log2(NaN) = NaN

961 if (ix >= 0x7ff00000) return x;

962

963 ax = fabs(x);

964

965 double ss, s2, s_h, s_l, t_h, t_l;

966 n = 0;

967

968 /* take care subnormal number */

969 if (ix < 0x00100000) {

970 ax *= two53;

971 n -= 53;

972 GET_HIGH_WORD(ix, ax);

973 }

974

975 n += ((ix) >> 20) - 0x3ff;

976 j = ix & 0x000fffff;

977

978 /* determine interval */

979 ix = j \| 0x3ff00000; /* normalize ix */

980 if (j <= 0x3988E) {

981 k = 0; /* \|x\|<sqrt(3/2) */

982 } else if (j < 0xBB67A) {

983 k = 1; /* \|x\|<sqrt(3) */

984 } else {

985 k = 0;

986 n += 1;

987 ix -= 0x00100000;

988 }

989 SET_HIGH_WORD(ax, ix);

990

991 /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */

992 u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */

993 v = one / (ax + bp[k]);

994 ss = u * v;

995 s_h = ss;

996 SET_LOW_WORD(s_h, 0);

997 /* t_h=ax+bp[k] High */

998 t_h = zero;

999 SET_HIGH_WORD(t_h, ((ix >> 1) \| 0x20000000) + 0x00080000 + (k << 18));

1000 t_l = ax - (t_h - bp[k]);

1001 s_l = v * ((u - s_h * t_h) - s_h * t_l);

1002 /* compute log(ax) */

1003 s2 = ss * ss;

1004 r = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6)))));

1005 r += s_l * (s_h + ss);

1006 s2 = s_h * s_h;

1007 t_h = 3.0 + s2 + r;

1008 SET_LOW_WORD(t_h, 0);

1009 t_l = r - ((t_h - 3.0) - s2);

1010 /* u+v = ss(1+...) /

1011 u = s_h * t_h;

1012 v = s_l * t_h + t_l * ss;

1013 /* 2/(3log2)(ss+...) /

1014 p_h = u + v;

1015 SET_LOW_WORD(p_h, 0);

1016 p_l = v - (p_h - u);

1017 z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3log2) /

1018 z_l = cp_l * p_h + p_l * cp + dp_l[k];

1019 /* log2(ax) = (ss+..)2/(3log2) = n + dp_h + z_h + z_l */

1020 t = static_cast<double>(n);

1021 t1 = (((z_h + z_l) + dp_h[k]) + t);

1022 SET_LOW_WORD(t1, 0);

1023 t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);

1024

1025 // t1 + t2 = log2(ax), sum up because we do not care about extra precision.

1026 return t1 + t2;

1027 }

1028

1029 /*

1030 * Return the base 10 logarithm of x. See e_log.c and k_log.h for most

1031 * comments.

1032 *

1033 * log10(x) = (f - 0.5ff + k_log1p(f)) / ln10 + k * log10(2)

1034 * in not-quite-routine extra precision.

1035 */

1036 double log10Old(double x) {

1037 static const double

1038 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */

1039 ivln10hi = 4.34294481878168880939e-01, /* 0x3fdbcb7b, 0x15200000 */

1040 ivln10lo = 2.50829467116452752298e-11, /* 0x3dbb9438, 0xca9aadd5 */

1041 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */

1042 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */

1043	930

1044 static const double zero = 0.0;	931 static const double zero = 0.0;

1045 static volatile double vzero = 0.0;	932 static volatile double vzero = 0.0;

1046	933

1047 double f, hfsq, hi, lo, r, val_hi, val_lo, w, y, y2;	934 double f, hfsq, hi, lo, r, val_hi, val_lo, w, y;

1048 int32_t i, k, hx;	935 int32_t i, k, hx;

1049 u_int32_t lx;	936 u_int32_t lx;

1050	937

1051 EXTRACT_WORDS(hx, lx, x);	938 EXTRACT_WORDS(hx, lx, x);

1052	939

1053 k = 0;	940 k = 0;

1054 if (hx < 0x00100000) { /* x < 2*-1022 /	941 if (hx < 0x00100000) { /* x < 2*-1022 /

1055 if (((hx & 0x7fffffff) \| lx) == 0)	942 if (((hx & 0x7fffffff) \| lx) == 0)

1056 return -two54 / vzero; /* log(+-0)=-inf */	943 return -two54 / vzero; /* log(+-0)=-inf */

1057 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */	944 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */

1058 k -= 54;	945 k -= 54;

1059 x = two54; / subnormal number, scale up x */	946 x = two54; / subnormal number, scale up x */

1060 GET_HIGH_WORD(hx, x);	947 GET_HIGH_WORD(hx, x);

1061 }	948 }

1062 if (hx >= 0x7ff00000) return x + x;	949 if (hx >= 0x7ff00000) return x + x;

1063 if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */	950 if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */

1064 k += (hx >> 20) - 1023;	951 k += (hx >> 20) - 1023;

1065 hx &= 0x000fffff;	952 hx &= 0x000fffff;

1066 i = (hx + 0x95f64) & 0x100000;	953 i = (hx + 0x95f64) & 0x100000;

1067 SET_HIGH_WORD(x, hx \| (i ^ 0x3ff00000)); /* normalize x or x/2 */	954 SET_HIGH_WORD(x, hx \| (i ^ 0x3ff00000)); /* normalize x or x/2 */

1068 k += (i >> 20);	955 k += (i >> 20);

1069 y = static_cast<double>(k);	956 y = static_cast<double>(k);

1070 f = x - 1.0;	957 f = x - 1.0;

1071 hfsq = 0.5 * f * f;	958 hfsq = 0.5 * f * f;

1072 r = k_log1p(f);	959 r = k_log1p(f);

1073	960

1074 /* See e_log2.c for most details. */	961 /*

	962 * f-hfsq must (for args near 1) be evaluated in extra precision

	963 * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2).

	964 * This is fairly efficient since f-hfsq only depends on f, so can

	965 * be evaluated in parallel with R. Not combining hfsq with R also

	966 * keeps R small (though not as small as a true `lo' term would be),

	967 * so that extra precision is not needed for terms involving R.

	968 *

	969 * Compiler bugs involving extra precision used to break Dekker's

	970 * theorem for spitting f-hfsq as hi+lo, unless double_t was used

	971 * or the multi-precision calculations were avoided when double_t

	972 * has extra precision. These problems are now automatically

	973 * avoided as a side effect of the optimization of combining the

	974 * Dekker splitting step with the clear-low-bits step.

	975 *

	976 * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra

	977 * precision to avoid a very large cancellation when x is very near

	978 * these values. Unlike the above cancellations, this problem is

	979 * specific to base 2. It is strange that adding +-1 is so much

	980 * harder than adding +-ln2 or +-log10_2.

	981 *

	982 * This uses Dekker's theorem to normalize y+val_hi, so the

	983 * compiler bugs are back in some configurations, sigh. And I

	984 * don't want to used double_t to avoid them, since that gives a

	985 * pessimization and the support for avoiding the pessimization

	986 * is not yet available.

	987 *

	988 * The multi-precision calculations for the multiplications are

	989 * routine.

	990 */

1075 hi = f - hfsq;	991 hi = f - hfsq;

1076 SET_LOW_WORD(hi, 0);	992 SET_LOW_WORD(hi, 0);

1077 lo = (f - hi) - hfsq + r;	993 lo = (f - hi) - hfsq + r;

1078 val_hi = hi * ivln10hi;	994 val_hi = hi * ivln2hi;

1079 y2 = y * log10_2hi;	995 val_lo = (lo + hi) * ivln2lo + lo * ivln2hi;

1080 val_lo = y * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi;

1081	996

1082 /*	997 /* spadd(val_hi, val_lo, y), except for not using double_t: */

1083 * Extra precision in for adding y*log10_2hi is not strictly needed	998 w = y + val_hi;

1084 * since there is no very large cancellation near x = sqrt(2) or	999 val_lo += (y - w) + val_hi;

1085 * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs

1086 * with some parallelism and it reduces the error for many args.

1087 */

1088 w = y2 + val_hi;

1089 val_lo += (y2 - w) + val_hi;

1090 val_hi = w;	1000 val_hi = w;

1091	1001

1092 return val_lo + val_hi;	1002 return val_lo + val_hi;

1093 }	1003 }

1094	1004

	1005 // ES6 draft 09-27-13, section 20.2.2.21.
	Benedikt Meurer 2016/06/16 16:47:34 Nit: Remove the ES6 hint here. And use C-style /* Nit: Remove the ES6 hint here. And use C-style /* */ commentary for consistency with the rest of the file.
	1006 // Return the base 10 logarithm of x

	1007 //

	1008 // Method :

	1009 // Let log10_2hi = leading 40 bits of log10(2) and

	1010 // log10_2lo = log10(2) - log10_2hi,

	1011 // ivln10 = 1/log(10) rounded.

	1012 // Then

	1013 // n = ilogb(x),

	1014 // if(n<0) n = n+1;

	1015 // x = scalbn(x,-n);

	1016 // log10(x) := nlog10_2hi + (nlog10_2lo + ivln10*log(x))

	1017 //

	1018 // Note 1:

	1019 // To guarantee log10(10n)=n, where 10n is normal, the rounding

	1020 // mode must set to Round-to-Nearest.

	1021 // Note 2:

	1022 // [1/log(10)] rounded to 53 bits has error .198 ulps;

	1023 // log10 is monotonic at all binary break points.

	1024 //

	1025 // Special cases:

	1026 // log10(x) is NaN if x < 0;

	1027 // log10(+INF) is +INF; log10(0) is -INF;

	1028 // log10(NaN) is that NaN;

	1029 // log10(10**N) = N for N=0,1,...,22.

1095 double log10(double x) {	1030 double log10(double x) {

1096 static const double	1031 static const double

1097 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */	1032 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */

1098 ivln10 = 4.34294481903251816668e-01,	1033 ivln10 = 4.34294481903251816668e-01,

1099 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */	1034 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */

1100 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */	1035 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */

1101	1036

1102 static const double zero = 0.0;	1037 static const double zero = 0.0;

1103 static volatile double vzero = 0.0;	1038 static volatile double vzero = 0.0;

1104	1039

(...skipping 23 matching lines...) Expand all Loading...
1128 SET_HIGH_WORD(x, hx);	1063 SET_HIGH_WORD(x, hx);

1129 SET_LOW_WORD(x, lx);	1064 SET_LOW_WORD(x, lx);

1130	1065

1131 double z = y * log10_2lo + ivln10 * log(x);	1066 double z = y * log10_2lo + ivln10 * log(x);

1132 return z + y * log10_2hi;	1067 return z + y * log10_2hi;

1133 }	1068 }

1134	1069

1135 } // namespace ieee754	1070 } // namespace ieee754

1136 } // namespace base	1071 } // namespace base

1137 } // namespace v8	1072 } // namespace v8

OLD	NEW

« no previous file with comments | « no previous file | test/unittests/base/ieee754-unittest.cc » ('j') | no next file with comments »