Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm). | 1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm). |
| 2 // | 2 // |
| 3 // ==================================================== | 3 // ==================================================== |
| 4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. | 4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. |
| 5 // | 5 // |
| 6 // Developed at SunSoft, a Sun Microsystems, Inc. business. | 6 // Developed at SunSoft, a Sun Microsystems, Inc. business. |
| 7 // Permission to use, copy, modify, and distribute this | 7 // Permission to use, copy, modify, and distribute this |
| 8 // software is freely granted, provided that this notice | 8 // software is freely granted, provided that this notice |
| 9 // is preserved. | 9 // is preserved. |
| 10 // ==================================================== | 10 // ==================================================== |
| (...skipping 895 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 906 s = f / (2.0 + f); | 906 s = f / (2.0 + f); |
| 907 z = s * s; | 907 z = s * s; |
| 908 w = z * z; | 908 w = z * z; |
| 909 t1 = w * (Lg2 + w * (Lg4 + w * Lg6)); | 909 t1 = w * (Lg2 + w * (Lg4 + w * Lg6)); |
| 910 t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7))); | 910 t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7))); |
| 911 R = t2 + t1; | 911 R = t2 + t1; |
| 912 hfsq = 0.5 * f * f; | 912 hfsq = 0.5 * f * f; |
| 913 return s * (hfsq + R); | 913 return s * (hfsq + R); |
| 914 } | 914 } |
| 915 | 915 |
| 916 // ES6 draft 09-27-13, section 20.2.2.22. | 916 /* |
| 917 // Return the base 2 logarithm of x | 917 * Return the base 2 logarithm of x. See e_log.c and k_log.h for most |
| 918 // | 918 * comments. |
| 919 // fdlibm does not have an explicit log2 function, but fdlibm's pow | 919 * |
| 920 // function does implement an accurate log2 function as part of the | 920 * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel, |
| 921 // pow implementation. This extracts the core parts of that as a | 921 * then does the combining and scaling steps |
| 922 // separate log2 function. | 922 * log2(x) = (f - 0.5*f*f + k_log1p(f)) / ln2 + k |
| 923 // | 923 * in not-quite-routine extra precision. |
| 924 // Method: | 924 */ |
| 925 // Compute log2(x) in two pieces: | |
| 926 // log2(x) = w1 + w2 | |
| 927 // where w1 has 53-24 = 29 bits of trailing zeroes. | |
| 928 double log2(double x) { | 925 double log2(double x) { |
| 929 static const double | 926 static const double |
| 930 bp[] = {1.0, 1.5}, | 927 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ |
| 931 dp_h[] = {0.0, 5.84962487220764160156e-01}, /* 0x3FE2B803, 0x40000000 */ | 928 ivln2hi = 1.44269504072144627571e+00, /* 0x3ff71547, 0x65200000 */ |
| 932 dp_l[] = {0.0, 1.35003920212974897128e-08}, /* 0x3E4CFDEB, 0x43CFD006 */ | 929 ivln2lo = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */ |
| 933 zero = 0.0, one = 1.0, | |
| 934 // Polynomial coefficients for (3/2)*(log2(x) - 2*s - 2/3*s^3) | |
| 935 L1 = 5.99999999999994648725e-01, L2 = 4.28571428578550184252e-01, | |
| 936 L3 = 3.33333329818377432918e-01, L4 = 2.72728123808534006489e-01, | |
| 937 L5 = 2.30660745775561754067e-01, L6 = 2.06975017800338417784e-01, | |
| 938 // cp = 2/(3*ln(2)). Note that cp_h + cp_l is cp, but with more accuracy. | |
| 939 cp = 9.61796693925975554329e-01, cp_h = 9.61796700954437255859e-01, | |
| 940 cp_l = -7.02846165095275826516e-09, two53 = 9007199254740992, /* 2^53 */ | |
| 941 two54 = 1.80143985094819840000e+16; /* 0x43500000, 0x00000000 */ | |
| 942 | |
| 943 static volatile double vzero = 0.0; | |
| 944 double ax, z_h, z_l, p_h, p_l; | |
| 945 double t1, t2, r, t, u, v; | |
| 946 int32_t j, k, n; | |
| 947 int32_t ix, hx; | |
| 948 u_int32_t lx; | |
| 949 | |
| 950 EXTRACT_WORDS(hx, lx, x); | |
| 951 ix = hx & 0x7fffffff; | |
| 952 | |
| 953 // Handle special cases. | |
| 954 // log2(+/- 0) = -Infinity | |
| 955 if ((ix | lx) == 0) return -two54 / vzero; /* log(+-0)=-inf */ | |
| 956 | |
| 957 // log(x) = NaN, if x < 0 | |
| 958 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */ | |
| 959 | |
| 960 // log2(Infinity) = Infinity, log2(NaN) = NaN | |
| 961 if (ix >= 0x7ff00000) return x; | |
| 962 | |
| 963 ax = fabs(x); | |
| 964 | |
| 965 double ss, s2, s_h, s_l, t_h, t_l; | |
| 966 n = 0; | |
| 967 | |
| 968 /* take care subnormal number */ | |
| 969 if (ix < 0x00100000) { | |
| 970 ax *= two53; | |
| 971 n -= 53; | |
| 972 GET_HIGH_WORD(ix, ax); | |
| 973 } | |
| 974 | |
| 975 n += ((ix) >> 20) - 0x3ff; | |
| 976 j = ix & 0x000fffff; | |
| 977 | |
| 978 /* determine interval */ | |
| 979 ix = j | 0x3ff00000; /* normalize ix */ | |
| 980 if (j <= 0x3988E) { | |
| 981 k = 0; /* |x|<sqrt(3/2) */ | |
| 982 } else if (j < 0xBB67A) { | |
| 983 k = 1; /* |x|<sqrt(3) */ | |
| 984 } else { | |
| 985 k = 0; | |
| 986 n += 1; | |
| 987 ix -= 0x00100000; | |
| 988 } | |
| 989 SET_HIGH_WORD(ax, ix); | |
| 990 | |
| 991 /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */ | |
| 992 u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */ | |
| 993 v = one / (ax + bp[k]); | |
| 994 ss = u * v; | |
| 995 s_h = ss; | |
| 996 SET_LOW_WORD(s_h, 0); | |
| 997 /* t_h=ax+bp[k] High */ | |
| 998 t_h = zero; | |
| 999 SET_HIGH_WORD(t_h, ((ix >> 1) | 0x20000000) + 0x00080000 + (k << 18)); | |
| 1000 t_l = ax - (t_h - bp[k]); | |
| 1001 s_l = v * ((u - s_h * t_h) - s_h * t_l); | |
| 1002 /* compute log(ax) */ | |
| 1003 s2 = ss * ss; | |
| 1004 r = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6))))); | |
| 1005 r += s_l * (s_h + ss); | |
| 1006 s2 = s_h * s_h; | |
| 1007 t_h = 3.0 + s2 + r; | |
| 1008 SET_LOW_WORD(t_h, 0); | |
| 1009 t_l = r - ((t_h - 3.0) - s2); | |
| 1010 /* u+v = ss*(1+...) */ | |
| 1011 u = s_h * t_h; | |
| 1012 v = s_l * t_h + t_l * ss; | |
| 1013 /* 2/(3log2)*(ss+...) */ | |
| 1014 p_h = u + v; | |
| 1015 SET_LOW_WORD(p_h, 0); | |
| 1016 p_l = v - (p_h - u); | |
| 1017 z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3*log2) */ | |
| 1018 z_l = cp_l * p_h + p_l * cp + dp_l[k]; | |
| 1019 /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */ | |
| 1020 t = static_cast<double>(n); | |
| 1021 t1 = (((z_h + z_l) + dp_h[k]) + t); | |
| 1022 SET_LOW_WORD(t1, 0); | |
| 1023 t2 = z_l - (((t1 - t) - dp_h[k]) - z_h); | |
| 1024 | |
| 1025 // t1 + t2 = log2(ax), sum up because we do not care about extra precision. | |
| 1026 return t1 + t2; | |
| 1027 } | |
| 1028 | |
| 1029 /* | |
| 1030 * Return the base 10 logarithm of x. See e_log.c and k_log.h for most | |
| 1031 * comments. | |
| 1032 * | |
| 1033 * log10(x) = (f - 0.5*f*f + k_log1p(f)) / ln10 + k * log10(2) | |
| 1034 * in not-quite-routine extra precision. | |
| 1035 */ | |
| 1036 double log10Old(double x) { | |
| 1037 static const double | |
| 1038 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ | |
| 1039 ivln10hi = 4.34294481878168880939e-01, /* 0x3fdbcb7b, 0x15200000 */ | |
| 1040 ivln10lo = 2.50829467116452752298e-11, /* 0x3dbb9438, 0xca9aadd5 */ | |
| 1041 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */ | |
| 1042 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */ | |
| 1043 | 930 |
| 1044 static const double zero = 0.0; | 931 static const double zero = 0.0; |
| 1045 static volatile double vzero = 0.0; | 932 static volatile double vzero = 0.0; |
| 1046 | 933 |
| 1047 double f, hfsq, hi, lo, r, val_hi, val_lo, w, y, y2; | 934 double f, hfsq, hi, lo, r, val_hi, val_lo, w, y; |
| 1048 int32_t i, k, hx; | 935 int32_t i, k, hx; |
| 1049 u_int32_t lx; | 936 u_int32_t lx; |
| 1050 | 937 |
| 1051 EXTRACT_WORDS(hx, lx, x); | 938 EXTRACT_WORDS(hx, lx, x); |
| 1052 | 939 |
| 1053 k = 0; | 940 k = 0; |
| 1054 if (hx < 0x00100000) { /* x < 2**-1022 */ | 941 if (hx < 0x00100000) { /* x < 2**-1022 */ |
| 1055 if (((hx & 0x7fffffff) | lx) == 0) | 942 if (((hx & 0x7fffffff) | lx) == 0) |
| 1056 return -two54 / vzero; /* log(+-0)=-inf */ | 943 return -two54 / vzero; /* log(+-0)=-inf */ |
| 1057 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */ | 944 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */ |
| 1058 k -= 54; | 945 k -= 54; |
| 1059 x *= two54; /* subnormal number, scale up x */ | 946 x *= two54; /* subnormal number, scale up x */ |
| 1060 GET_HIGH_WORD(hx, x); | 947 GET_HIGH_WORD(hx, x); |
| 1061 } | 948 } |
| 1062 if (hx >= 0x7ff00000) return x + x; | 949 if (hx >= 0x7ff00000) return x + x; |
| 1063 if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */ | 950 if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */ |
| 1064 k += (hx >> 20) - 1023; | 951 k += (hx >> 20) - 1023; |
| 1065 hx &= 0x000fffff; | 952 hx &= 0x000fffff; |
| 1066 i = (hx + 0x95f64) & 0x100000; | 953 i = (hx + 0x95f64) & 0x100000; |
| 1067 SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */ | 954 SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */ |
| 1068 k += (i >> 20); | 955 k += (i >> 20); |
| 1069 y = static_cast<double>(k); | 956 y = static_cast<double>(k); |
| 1070 f = x - 1.0; | 957 f = x - 1.0; |
| 1071 hfsq = 0.5 * f * f; | 958 hfsq = 0.5 * f * f; |
| 1072 r = k_log1p(f); | 959 r = k_log1p(f); |
| 1073 | 960 |
| 1074 /* See e_log2.c for most details. */ | 961 /* |
| 962 * f-hfsq must (for args near 1) be evaluated in extra precision | |
| 963 * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2). | |
| 964 * This is fairly efficient since f-hfsq only depends on f, so can | |
| 965 * be evaluated in parallel with R. Not combining hfsq with R also | |
| 966 * keeps R small (though not as small as a true `lo' term would be), | |
| 967 * so that extra precision is not needed for terms involving R. | |
| 968 * | |
| 969 * Compiler bugs involving extra precision used to break Dekker's | |
| 970 * theorem for spitting f-hfsq as hi+lo, unless double_t was used | |
| 971 * or the multi-precision calculations were avoided when double_t | |
| 972 * has extra precision. These problems are now automatically | |
| 973 * avoided as a side effect of the optimization of combining the | |
| 974 * Dekker splitting step with the clear-low-bits step. | |
| 975 * | |
| 976 * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra | |
| 977 * precision to avoid a very large cancellation when x is very near | |
| 978 * these values. Unlike the above cancellations, this problem is | |
| 979 * specific to base 2. It is strange that adding +-1 is so much | |
| 980 * harder than adding +-ln2 or +-log10_2. | |
| 981 * | |
| 982 * This uses Dekker's theorem to normalize y+val_hi, so the | |
| 983 * compiler bugs are back in some configurations, sigh. And I | |
| 984 * don't want to used double_t to avoid them, since that gives a | |
| 985 * pessimization and the support for avoiding the pessimization | |
| 986 * is not yet available. | |
| 987 * | |
| 988 * The multi-precision calculations for the multiplications are | |
| 989 * routine. | |
| 990 */ | |
| 1075 hi = f - hfsq; | 991 hi = f - hfsq; |
| 1076 SET_LOW_WORD(hi, 0); | 992 SET_LOW_WORD(hi, 0); |
| 1077 lo = (f - hi) - hfsq + r; | 993 lo = (f - hi) - hfsq + r; |
| 1078 val_hi = hi * ivln10hi; | 994 val_hi = hi * ivln2hi; |
| 1079 y2 = y * log10_2hi; | 995 val_lo = (lo + hi) * ivln2lo + lo * ivln2hi; |
| 1080 val_lo = y * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi; | |
| 1081 | 996 |
| 1082 /* | 997 /* spadd(val_hi, val_lo, y), except for not using double_t: */ |
| 1083 * Extra precision in for adding y*log10_2hi is not strictly needed | 998 w = y + val_hi; |
| 1084 * since there is no very large cancellation near x = sqrt(2) or | 999 val_lo += (y - w) + val_hi; |
| 1085 * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs | |
| 1086 * with some parallelism and it reduces the error for many args. | |
| 1087 */ | |
| 1088 w = y2 + val_hi; | |
| 1089 val_lo += (y2 - w) + val_hi; | |
| 1090 val_hi = w; | 1000 val_hi = w; |
| 1091 | 1001 |
| 1092 return val_lo + val_hi; | 1002 return val_lo + val_hi; |
| 1093 } | 1003 } |
| 1094 | 1004 |
| 1005 // ES6 draft 09-27-13, section 20.2.2.21. | |
|
Benedikt Meurer
2016/06/16 16:47:34
Nit: Remove the ES6 hint here. And use C-style /*
| |
| 1006 // Return the base 10 logarithm of x | |
| 1007 // | |
| 1008 // Method : | |
| 1009 // Let log10_2hi = leading 40 bits of log10(2) and | |
| 1010 // log10_2lo = log10(2) - log10_2hi, | |
| 1011 // ivln10 = 1/log(10) rounded. | |
| 1012 // Then | |
| 1013 // n = ilogb(x), | |
| 1014 // if(n<0) n = n+1; | |
| 1015 // x = scalbn(x,-n); | |
| 1016 // log10(x) := n*log10_2hi + (n*log10_2lo + ivln10*log(x)) | |
| 1017 // | |
| 1018 // Note 1: | |
| 1019 // To guarantee log10(10**n)=n, where 10**n is normal, the rounding | |
| 1020 // mode must set to Round-to-Nearest. | |
| 1021 // Note 2: | |
| 1022 // [1/log(10)] rounded to 53 bits has error .198 ulps; | |
| 1023 // log10 is monotonic at all binary break points. | |
| 1024 // | |
| 1025 // Special cases: | |
| 1026 // log10(x) is NaN if x < 0; | |
| 1027 // log10(+INF) is +INF; log10(0) is -INF; | |
| 1028 // log10(NaN) is that NaN; | |
| 1029 // log10(10**N) = N for N=0,1,...,22. | |
| 1095 double log10(double x) { | 1030 double log10(double x) { |
| 1096 static const double | 1031 static const double |
| 1097 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ | 1032 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ |
| 1098 ivln10 = 4.34294481903251816668e-01, | 1033 ivln10 = 4.34294481903251816668e-01, |
| 1099 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */ | 1034 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */ |
| 1100 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */ | 1035 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */ |
| 1101 | 1036 |
| 1102 static const double zero = 0.0; | 1037 static const double zero = 0.0; |
| 1103 static volatile double vzero = 0.0; | 1038 static volatile double vzero = 0.0; |
| 1104 | 1039 |
| (...skipping 23 matching lines...) Expand all Loading... | |
| 1128 SET_HIGH_WORD(x, hx); | 1063 SET_HIGH_WORD(x, hx); |
| 1129 SET_LOW_WORD(x, lx); | 1064 SET_LOW_WORD(x, lx); |
| 1130 | 1065 |
| 1131 double z = y * log10_2lo + ivln10 * log(x); | 1066 double z = y * log10_2lo + ivln10 * log(x); |
| 1132 return z + y * log10_2hi; | 1067 return z + y * log10_2hi; |
| 1133 } | 1068 } |
| 1134 | 1069 |
| 1135 } // namespace ieee754 | 1070 } // namespace ieee754 |
| 1136 } // namespace base | 1071 } // namespace base |
| 1137 } // namespace v8 | 1072 } // namespace v8 |
| OLD | NEW |