Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(97)

Side by Side Diff: src/base/ieee754.cc

Issue 2071823002: [Math builtins]: Cleanup in ieee754, restoring MSUN version of log2(). (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | test/unittests/base/ieee754-unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm). 1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm).
2 // 2 //
3 // ==================================================== 3 // ====================================================
4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5 // 5 //
6 // Developed at SunSoft, a Sun Microsystems, Inc. business. 6 // Developed at SunSoft, a Sun Microsystems, Inc. business.
7 // Permission to use, copy, modify, and distribute this 7 // Permission to use, copy, modify, and distribute this
8 // software is freely granted, provided that this notice 8 // software is freely granted, provided that this notice
9 // is preserved. 9 // is preserved.
10 // ==================================================== 10 // ====================================================
(...skipping 895 matching lines...) Expand 10 before | Expand all | Expand 10 after
906 s = f / (2.0 + f); 906 s = f / (2.0 + f);
907 z = s * s; 907 z = s * s;
908 w = z * z; 908 w = z * z;
909 t1 = w * (Lg2 + w * (Lg4 + w * Lg6)); 909 t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
910 t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7))); 910 t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
911 R = t2 + t1; 911 R = t2 + t1;
912 hfsq = 0.5 * f * f; 912 hfsq = 0.5 * f * f;
913 return s * (hfsq + R); 913 return s * (hfsq + R);
914 } 914 }
915 915
916 // ES6 draft 09-27-13, section 20.2.2.22. 916 /*
917 // Return the base 2 logarithm of x 917 * Return the base 2 logarithm of x. See e_log.c and k_log.h for most
918 // 918 * comments.
919 // fdlibm does not have an explicit log2 function, but fdlibm's pow 919 *
920 // function does implement an accurate log2 function as part of the 920 * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel,
921 // pow implementation. This extracts the core parts of that as a 921 * then does the combining and scaling steps
922 // separate log2 function. 922 * log2(x) = (f - 0.5*f*f + k_log1p(f)) / ln2 + k
923 // 923 * in not-quite-routine extra precision.
924 // Method: 924 */
925 // Compute log2(x) in two pieces:
926 // log2(x) = w1 + w2
927 // where w1 has 53-24 = 29 bits of trailing zeroes.
928 double log2(double x) { 925 double log2(double x) {
929 static const double 926 static const double
930 bp[] = {1.0, 1.5}, 927 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
931 dp_h[] = {0.0, 5.84962487220764160156e-01}, /* 0x3FE2B803, 0x40000000 */ 928 ivln2hi = 1.44269504072144627571e+00, /* 0x3ff71547, 0x65200000 */
932 dp_l[] = {0.0, 1.35003920212974897128e-08}, /* 0x3E4CFDEB, 0x43CFD006 */ 929 ivln2lo = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */
933 zero = 0.0, one = 1.0,
934 // Polynomial coefficients for (3/2)*(log2(x) - 2*s - 2/3*s^3)
935 L1 = 5.99999999999994648725e-01, L2 = 4.28571428578550184252e-01,
936 L3 = 3.33333329818377432918e-01, L4 = 2.72728123808534006489e-01,
937 L5 = 2.30660745775561754067e-01, L6 = 2.06975017800338417784e-01,
938 // cp = 2/(3*ln(2)). Note that cp_h + cp_l is cp, but with more accuracy.
939 cp = 9.61796693925975554329e-01, cp_h = 9.61796700954437255859e-01,
940 cp_l = -7.02846165095275826516e-09, two53 = 9007199254740992, /* 2^53 */
941 two54 = 1.80143985094819840000e+16; /* 0x43500000, 0x00000000 */
942
943 static volatile double vzero = 0.0;
944 double ax, z_h, z_l, p_h, p_l;
945 double t1, t2, r, t, u, v;
946 int32_t j, k, n;
947 int32_t ix, hx;
948 u_int32_t lx;
949
950 EXTRACT_WORDS(hx, lx, x);
951 ix = hx & 0x7fffffff;
952
953 // Handle special cases.
954 // log2(+/- 0) = -Infinity
955 if ((ix | lx) == 0) return -two54 / vzero; /* log(+-0)=-inf */
956
957 // log(x) = NaN, if x < 0
958 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
959
960 // log2(Infinity) = Infinity, log2(NaN) = NaN
961 if (ix >= 0x7ff00000) return x;
962
963 ax = fabs(x);
964
965 double ss, s2, s_h, s_l, t_h, t_l;
966 n = 0;
967
968 /* take care subnormal number */
969 if (ix < 0x00100000) {
970 ax *= two53;
971 n -= 53;
972 GET_HIGH_WORD(ix, ax);
973 }
974
975 n += ((ix) >> 20) - 0x3ff;
976 j = ix & 0x000fffff;
977
978 /* determine interval */
979 ix = j | 0x3ff00000; /* normalize ix */
980 if (j <= 0x3988E) {
981 k = 0; /* |x|<sqrt(3/2) */
982 } else if (j < 0xBB67A) {
983 k = 1; /* |x|<sqrt(3) */
984 } else {
985 k = 0;
986 n += 1;
987 ix -= 0x00100000;
988 }
989 SET_HIGH_WORD(ax, ix);
990
991 /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
992 u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */
993 v = one / (ax + bp[k]);
994 ss = u * v;
995 s_h = ss;
996 SET_LOW_WORD(s_h, 0);
997 /* t_h=ax+bp[k] High */
998 t_h = zero;
999 SET_HIGH_WORD(t_h, ((ix >> 1) | 0x20000000) + 0x00080000 + (k << 18));
1000 t_l = ax - (t_h - bp[k]);
1001 s_l = v * ((u - s_h * t_h) - s_h * t_l);
1002 /* compute log(ax) */
1003 s2 = ss * ss;
1004 r = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6)))));
1005 r += s_l * (s_h + ss);
1006 s2 = s_h * s_h;
1007 t_h = 3.0 + s2 + r;
1008 SET_LOW_WORD(t_h, 0);
1009 t_l = r - ((t_h - 3.0) - s2);
1010 /* u+v = ss*(1+...) */
1011 u = s_h * t_h;
1012 v = s_l * t_h + t_l * ss;
1013 /* 2/(3log2)*(ss+...) */
1014 p_h = u + v;
1015 SET_LOW_WORD(p_h, 0);
1016 p_l = v - (p_h - u);
1017 z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3*log2) */
1018 z_l = cp_l * p_h + p_l * cp + dp_l[k];
1019 /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */
1020 t = static_cast<double>(n);
1021 t1 = (((z_h + z_l) + dp_h[k]) + t);
1022 SET_LOW_WORD(t1, 0);
1023 t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
1024
1025 // t1 + t2 = log2(ax), sum up because we do not care about extra precision.
1026 return t1 + t2;
1027 }
1028
1029 /*
1030 * Return the base 10 logarithm of x. See e_log.c and k_log.h for most
1031 * comments.
1032 *
1033 * log10(x) = (f - 0.5*f*f + k_log1p(f)) / ln10 + k * log10(2)
1034 * in not-quite-routine extra precision.
1035 */
1036 double log10Old(double x) {
1037 static const double
1038 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
1039 ivln10hi = 4.34294481878168880939e-01, /* 0x3fdbcb7b, 0x15200000 */
1040 ivln10lo = 2.50829467116452752298e-11, /* 0x3dbb9438, 0xca9aadd5 */
1041 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */
1042 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */
1043 930
1044 static const double zero = 0.0; 931 static const double zero = 0.0;
1045 static volatile double vzero = 0.0; 932 static volatile double vzero = 0.0;
1046 933
1047 double f, hfsq, hi, lo, r, val_hi, val_lo, w, y, y2; 934 double f, hfsq, hi, lo, r, val_hi, val_lo, w, y;
1048 int32_t i, k, hx; 935 int32_t i, k, hx;
1049 u_int32_t lx; 936 u_int32_t lx;
1050 937
1051 EXTRACT_WORDS(hx, lx, x); 938 EXTRACT_WORDS(hx, lx, x);
1052 939
1053 k = 0; 940 k = 0;
1054 if (hx < 0x00100000) { /* x < 2**-1022 */ 941 if (hx < 0x00100000) { /* x < 2**-1022 */
1055 if (((hx & 0x7fffffff) | lx) == 0) 942 if (((hx & 0x7fffffff) | lx) == 0)
1056 return -two54 / vzero; /* log(+-0)=-inf */ 943 return -two54 / vzero; /* log(+-0)=-inf */
1057 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */ 944 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
1058 k -= 54; 945 k -= 54;
1059 x *= two54; /* subnormal number, scale up x */ 946 x *= two54; /* subnormal number, scale up x */
1060 GET_HIGH_WORD(hx, x); 947 GET_HIGH_WORD(hx, x);
1061 } 948 }
1062 if (hx >= 0x7ff00000) return x + x; 949 if (hx >= 0x7ff00000) return x + x;
1063 if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */ 950 if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */
1064 k += (hx >> 20) - 1023; 951 k += (hx >> 20) - 1023;
1065 hx &= 0x000fffff; 952 hx &= 0x000fffff;
1066 i = (hx + 0x95f64) & 0x100000; 953 i = (hx + 0x95f64) & 0x100000;
1067 SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */ 954 SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */
1068 k += (i >> 20); 955 k += (i >> 20);
1069 y = static_cast<double>(k); 956 y = static_cast<double>(k);
1070 f = x - 1.0; 957 f = x - 1.0;
1071 hfsq = 0.5 * f * f; 958 hfsq = 0.5 * f * f;
1072 r = k_log1p(f); 959 r = k_log1p(f);
1073 960
1074 /* See e_log2.c for most details. */ 961 /*
962 * f-hfsq must (for args near 1) be evaluated in extra precision
963 * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2).
964 * This is fairly efficient since f-hfsq only depends on f, so can
965 * be evaluated in parallel with R. Not combining hfsq with R also
966 * keeps R small (though not as small as a true `lo' term would be),
967 * so that extra precision is not needed for terms involving R.
968 *
969 * Compiler bugs involving extra precision used to break Dekker's
970 * theorem for spitting f-hfsq as hi+lo, unless double_t was used
971 * or the multi-precision calculations were avoided when double_t
972 * has extra precision. These problems are now automatically
973 * avoided as a side effect of the optimization of combining the
974 * Dekker splitting step with the clear-low-bits step.
975 *
976 * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra
977 * precision to avoid a very large cancellation when x is very near
978 * these values. Unlike the above cancellations, this problem is
979 * specific to base 2. It is strange that adding +-1 is so much
980 * harder than adding +-ln2 or +-log10_2.
981 *
982 * This uses Dekker's theorem to normalize y+val_hi, so the
983 * compiler bugs are back in some configurations, sigh. And I
984 * don't want to used double_t to avoid them, since that gives a
985 * pessimization and the support for avoiding the pessimization
986 * is not yet available.
987 *
988 * The multi-precision calculations for the multiplications are
989 * routine.
990 */
1075 hi = f - hfsq; 991 hi = f - hfsq;
1076 SET_LOW_WORD(hi, 0); 992 SET_LOW_WORD(hi, 0);
1077 lo = (f - hi) - hfsq + r; 993 lo = (f - hi) - hfsq + r;
1078 val_hi = hi * ivln10hi; 994 val_hi = hi * ivln2hi;
1079 y2 = y * log10_2hi; 995 val_lo = (lo + hi) * ivln2lo + lo * ivln2hi;
1080 val_lo = y * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi;
1081 996
1082 /* 997 /* spadd(val_hi, val_lo, y), except for not using double_t: */
1083 * Extra precision in for adding y*log10_2hi is not strictly needed 998 w = y + val_hi;
1084 * since there is no very large cancellation near x = sqrt(2) or 999 val_lo += (y - w) + val_hi;
1085 * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs
1086 * with some parallelism and it reduces the error for many args.
1087 */
1088 w = y2 + val_hi;
1089 val_lo += (y2 - w) + val_hi;
1090 val_hi = w; 1000 val_hi = w;
1091 1001
1092 return val_lo + val_hi; 1002 return val_lo + val_hi;
1093 } 1003 }
1094 1004
1005 // ES6 draft 09-27-13, section 20.2.2.21.
Benedikt Meurer 2016/06/16 16:47:34 Nit: Remove the ES6 hint here. And use C-style /*
1006 // Return the base 10 logarithm of x
1007 //
1008 // Method :
1009 // Let log10_2hi = leading 40 bits of log10(2) and
1010 // log10_2lo = log10(2) - log10_2hi,
1011 // ivln10 = 1/log(10) rounded.
1012 // Then
1013 // n = ilogb(x),
1014 // if(n<0) n = n+1;
1015 // x = scalbn(x,-n);
1016 // log10(x) := n*log10_2hi + (n*log10_2lo + ivln10*log(x))
1017 //
1018 // Note 1:
1019 // To guarantee log10(10**n)=n, where 10**n is normal, the rounding
1020 // mode must set to Round-to-Nearest.
1021 // Note 2:
1022 // [1/log(10)] rounded to 53 bits has error .198 ulps;
1023 // log10 is monotonic at all binary break points.
1024 //
1025 // Special cases:
1026 // log10(x) is NaN if x < 0;
1027 // log10(+INF) is +INF; log10(0) is -INF;
1028 // log10(NaN) is that NaN;
1029 // log10(10**N) = N for N=0,1,...,22.
1095 double log10(double x) { 1030 double log10(double x) {
1096 static const double 1031 static const double
1097 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ 1032 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
1098 ivln10 = 4.34294481903251816668e-01, 1033 ivln10 = 4.34294481903251816668e-01,
1099 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */ 1034 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */
1100 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */ 1035 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */
1101 1036
1102 static const double zero = 0.0; 1037 static const double zero = 0.0;
1103 static volatile double vzero = 0.0; 1038 static volatile double vzero = 0.0;
1104 1039
(...skipping 23 matching lines...) Expand all
1128 SET_HIGH_WORD(x, hx); 1063 SET_HIGH_WORD(x, hx);
1129 SET_LOW_WORD(x, lx); 1064 SET_LOW_WORD(x, lx);
1130 1065
1131 double z = y * log10_2lo + ivln10 * log(x); 1066 double z = y * log10_2lo + ivln10 * log(x);
1132 return z + y * log10_2hi; 1067 return z + y * log10_2hi;
1133 } 1068 }
1134 1069
1135 } // namespace ieee754 1070 } // namespace ieee754
1136 } // namespace base 1071 } // namespace base
1137 } // namespace v8 1072 } // namespace v8
OLDNEW
« no previous file with comments | « no previous file | test/unittests/base/ieee754-unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698