OLD | NEW |
---|---|
1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm). | 1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm). |
2 // | 2 // |
3 // ==================================================== | 3 // ==================================================== |
4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. | 4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. |
5 // | 5 // |
6 // Developed at SunSoft, a Sun Microsystems, Inc. business. | 6 // Developed at SunSoft, a Sun Microsystems, Inc. business. |
7 // Permission to use, copy, modify, and distribute this | 7 // Permission to use, copy, modify, and distribute this |
8 // software is freely granted, provided that this notice | 8 // software is freely granted, provided that this notice |
9 // is preserved. | 9 // is preserved. |
10 // ==================================================== | 10 // ==================================================== |
(...skipping 895 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
906 s = f / (2.0 + f); | 906 s = f / (2.0 + f); |
907 z = s * s; | 907 z = s * s; |
908 w = z * z; | 908 w = z * z; |
909 t1 = w * (Lg2 + w * (Lg4 + w * Lg6)); | 909 t1 = w * (Lg2 + w * (Lg4 + w * Lg6)); |
910 t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7))); | 910 t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7))); |
911 R = t2 + t1; | 911 R = t2 + t1; |
912 hfsq = 0.5 * f * f; | 912 hfsq = 0.5 * f * f; |
913 return s * (hfsq + R); | 913 return s * (hfsq + R); |
914 } | 914 } |
915 | 915 |
916 // ES6 draft 09-27-13, section 20.2.2.22. | 916 /* |
917 // Return the base 2 logarithm of x | 917 * Return the base 2 logarithm of x. See e_log.c and k_log.h for most |
918 // | 918 * comments. |
919 // fdlibm does not have an explicit log2 function, but fdlibm's pow | 919 * |
920 // function does implement an accurate log2 function as part of the | 920 * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel, |
921 // pow implementation. This extracts the core parts of that as a | 921 * then does the combining and scaling steps |
922 // separate log2 function. | 922 * log2(x) = (f - 0.5*f*f + k_log1p(f)) / ln2 + k |
923 // | 923 * in not-quite-routine extra precision. |
924 // Method: | 924 */ |
925 // Compute log2(x) in two pieces: | |
926 // log2(x) = w1 + w2 | |
927 // where w1 has 53-24 = 29 bits of trailing zeroes. | |
928 double log2(double x) { | 925 double log2(double x) { |
929 static const double | 926 static const double |
930 bp[] = {1.0, 1.5}, | 927 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ |
931 dp_h[] = {0.0, 5.84962487220764160156e-01}, /* 0x3FE2B803, 0x40000000 */ | 928 ivln2hi = 1.44269504072144627571e+00, /* 0x3ff71547, 0x65200000 */ |
932 dp_l[] = {0.0, 1.35003920212974897128e-08}, /* 0x3E4CFDEB, 0x43CFD006 */ | 929 ivln2lo = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */ |
933 zero = 0.0, one = 1.0, | |
934 // Polynomial coefficients for (3/2)*(log2(x) - 2*s - 2/3*s^3) | |
935 L1 = 5.99999999999994648725e-01, L2 = 4.28571428578550184252e-01, | |
936 L3 = 3.33333329818377432918e-01, L4 = 2.72728123808534006489e-01, | |
937 L5 = 2.30660745775561754067e-01, L6 = 2.06975017800338417784e-01, | |
938 // cp = 2/(3*ln(2)). Note that cp_h + cp_l is cp, but with more accuracy. | |
939 cp = 9.61796693925975554329e-01, cp_h = 9.61796700954437255859e-01, | |
940 cp_l = -7.02846165095275826516e-09, two53 = 9007199254740992, /* 2^53 */ | |
941 two54 = 1.80143985094819840000e+16; /* 0x43500000, 0x00000000 */ | |
942 | |
943 static volatile double vzero = 0.0; | |
944 double ax, z_h, z_l, p_h, p_l; | |
945 double t1, t2, r, t, u, v; | |
946 int32_t j, k, n; | |
947 int32_t ix, hx; | |
948 u_int32_t lx; | |
949 | |
950 EXTRACT_WORDS(hx, lx, x); | |
951 ix = hx & 0x7fffffff; | |
952 | |
953 // Handle special cases. | |
954 // log2(+/- 0) = -Infinity | |
955 if ((ix | lx) == 0) return -two54 / vzero; /* log(+-0)=-inf */ | |
956 | |
957 // log(x) = NaN, if x < 0 | |
958 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */ | |
959 | |
960 // log2(Infinity) = Infinity, log2(NaN) = NaN | |
961 if (ix >= 0x7ff00000) return x; | |
962 | |
963 ax = fabs(x); | |
964 | |
965 double ss, s2, s_h, s_l, t_h, t_l; | |
966 n = 0; | |
967 | |
968 /* take care subnormal number */ | |
969 if (ix < 0x00100000) { | |
970 ax *= two53; | |
971 n -= 53; | |
972 GET_HIGH_WORD(ix, ax); | |
973 } | |
974 | |
975 n += ((ix) >> 20) - 0x3ff; | |
976 j = ix & 0x000fffff; | |
977 | |
978 /* determine interval */ | |
979 ix = j | 0x3ff00000; /* normalize ix */ | |
980 if (j <= 0x3988E) { | |
981 k = 0; /* |x|<sqrt(3/2) */ | |
982 } else if (j < 0xBB67A) { | |
983 k = 1; /* |x|<sqrt(3) */ | |
984 } else { | |
985 k = 0; | |
986 n += 1; | |
987 ix -= 0x00100000; | |
988 } | |
989 SET_HIGH_WORD(ax, ix); | |
990 | |
991 /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */ | |
992 u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */ | |
993 v = one / (ax + bp[k]); | |
994 ss = u * v; | |
995 s_h = ss; | |
996 SET_LOW_WORD(s_h, 0); | |
997 /* t_h=ax+bp[k] High */ | |
998 t_h = zero; | |
999 SET_HIGH_WORD(t_h, ((ix >> 1) | 0x20000000) + 0x00080000 + (k << 18)); | |
1000 t_l = ax - (t_h - bp[k]); | |
1001 s_l = v * ((u - s_h * t_h) - s_h * t_l); | |
1002 /* compute log(ax) */ | |
1003 s2 = ss * ss; | |
1004 r = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6))))); | |
1005 r += s_l * (s_h + ss); | |
1006 s2 = s_h * s_h; | |
1007 t_h = 3.0 + s2 + r; | |
1008 SET_LOW_WORD(t_h, 0); | |
1009 t_l = r - ((t_h - 3.0) - s2); | |
1010 /* u+v = ss*(1+...) */ | |
1011 u = s_h * t_h; | |
1012 v = s_l * t_h + t_l * ss; | |
1013 /* 2/(3log2)*(ss+...) */ | |
1014 p_h = u + v; | |
1015 SET_LOW_WORD(p_h, 0); | |
1016 p_l = v - (p_h - u); | |
1017 z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3*log2) */ | |
1018 z_l = cp_l * p_h + p_l * cp + dp_l[k]; | |
1019 /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */ | |
1020 t = static_cast<double>(n); | |
1021 t1 = (((z_h + z_l) + dp_h[k]) + t); | |
1022 SET_LOW_WORD(t1, 0); | |
1023 t2 = z_l - (((t1 - t) - dp_h[k]) - z_h); | |
1024 | |
1025 // t1 + t2 = log2(ax), sum up because we do not care about extra precision. | |
1026 return t1 + t2; | |
1027 } | |
1028 | |
1029 /* | |
1030 * Return the base 10 logarithm of x. See e_log.c and k_log.h for most | |
1031 * comments. | |
1032 * | |
1033 * log10(x) = (f - 0.5*f*f + k_log1p(f)) / ln10 + k * log10(2) | |
1034 * in not-quite-routine extra precision. | |
1035 */ | |
1036 double log10Old(double x) { | |
1037 static const double | |
1038 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ | |
1039 ivln10hi = 4.34294481878168880939e-01, /* 0x3fdbcb7b, 0x15200000 */ | |
1040 ivln10lo = 2.50829467116452752298e-11, /* 0x3dbb9438, 0xca9aadd5 */ | |
1041 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */ | |
1042 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */ | |
1043 | 930 |
1044 static const double zero = 0.0; | 931 static const double zero = 0.0; |
1045 static volatile double vzero = 0.0; | 932 static volatile double vzero = 0.0; |
1046 | 933 |
1047 double f, hfsq, hi, lo, r, val_hi, val_lo, w, y, y2; | 934 double f, hfsq, hi, lo, r, val_hi, val_lo, w, y; |
1048 int32_t i, k, hx; | 935 int32_t i, k, hx; |
1049 u_int32_t lx; | 936 u_int32_t lx; |
1050 | 937 |
1051 EXTRACT_WORDS(hx, lx, x); | 938 EXTRACT_WORDS(hx, lx, x); |
1052 | 939 |
1053 k = 0; | 940 k = 0; |
1054 if (hx < 0x00100000) { /* x < 2**-1022 */ | 941 if (hx < 0x00100000) { /* x < 2**-1022 */ |
1055 if (((hx & 0x7fffffff) | lx) == 0) | 942 if (((hx & 0x7fffffff) | lx) == 0) |
1056 return -two54 / vzero; /* log(+-0)=-inf */ | 943 return -two54 / vzero; /* log(+-0)=-inf */ |
1057 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */ | 944 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */ |
1058 k -= 54; | 945 k -= 54; |
1059 x *= two54; /* subnormal number, scale up x */ | 946 x *= two54; /* subnormal number, scale up x */ |
1060 GET_HIGH_WORD(hx, x); | 947 GET_HIGH_WORD(hx, x); |
1061 } | 948 } |
1062 if (hx >= 0x7ff00000) return x + x; | 949 if (hx >= 0x7ff00000) return x + x; |
1063 if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */ | 950 if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */ |
1064 k += (hx >> 20) - 1023; | 951 k += (hx >> 20) - 1023; |
1065 hx &= 0x000fffff; | 952 hx &= 0x000fffff; |
1066 i = (hx + 0x95f64) & 0x100000; | 953 i = (hx + 0x95f64) & 0x100000; |
1067 SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */ | 954 SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */ |
1068 k += (i >> 20); | 955 k += (i >> 20); |
1069 y = static_cast<double>(k); | 956 y = static_cast<double>(k); |
1070 f = x - 1.0; | 957 f = x - 1.0; |
1071 hfsq = 0.5 * f * f; | 958 hfsq = 0.5 * f * f; |
1072 r = k_log1p(f); | 959 r = k_log1p(f); |
1073 | 960 |
1074 /* See e_log2.c for most details. */ | 961 /* |
962 * f-hfsq must (for args near 1) be evaluated in extra precision | |
963 * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2). | |
964 * This is fairly efficient since f-hfsq only depends on f, so can | |
965 * be evaluated in parallel with R. Not combining hfsq with R also | |
966 * keeps R small (though not as small as a true `lo' term would be), | |
967 * so that extra precision is not needed for terms involving R. | |
968 * | |
969 * Compiler bugs involving extra precision used to break Dekker's | |
970 * theorem for spitting f-hfsq as hi+lo, unless double_t was used | |
971 * or the multi-precision calculations were avoided when double_t | |
972 * has extra precision. These problems are now automatically | |
973 * avoided as a side effect of the optimization of combining the | |
974 * Dekker splitting step with the clear-low-bits step. | |
975 * | |
976 * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra | |
977 * precision to avoid a very large cancellation when x is very near | |
978 * these values. Unlike the above cancellations, this problem is | |
979 * specific to base 2. It is strange that adding +-1 is so much | |
980 * harder than adding +-ln2 or +-log10_2. | |
981 * | |
982 * This uses Dekker's theorem to normalize y+val_hi, so the | |
983 * compiler bugs are back in some configurations, sigh. And I | |
984 * don't want to used double_t to avoid them, since that gives a | |
985 * pessimization and the support for avoiding the pessimization | |
986 * is not yet available. | |
987 * | |
988 * The multi-precision calculations for the multiplications are | |
989 * routine. | |
990 */ | |
1075 hi = f - hfsq; | 991 hi = f - hfsq; |
1076 SET_LOW_WORD(hi, 0); | 992 SET_LOW_WORD(hi, 0); |
1077 lo = (f - hi) - hfsq + r; | 993 lo = (f - hi) - hfsq + r; |
1078 val_hi = hi * ivln10hi; | 994 val_hi = hi * ivln2hi; |
1079 y2 = y * log10_2hi; | 995 val_lo = (lo + hi) * ivln2lo + lo * ivln2hi; |
1080 val_lo = y * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi; | |
1081 | 996 |
1082 /* | 997 /* spadd(val_hi, val_lo, y), except for not using double_t: */ |
1083 * Extra precision in for adding y*log10_2hi is not strictly needed | 998 w = y + val_hi; |
1084 * since there is no very large cancellation near x = sqrt(2) or | 999 val_lo += (y - w) + val_hi; |
1085 * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs | |
1086 * with some parallelism and it reduces the error for many args. | |
1087 */ | |
1088 w = y2 + val_hi; | |
1089 val_lo += (y2 - w) + val_hi; | |
1090 val_hi = w; | 1000 val_hi = w; |
1091 | 1001 |
1092 return val_lo + val_hi; | 1002 return val_lo + val_hi; |
1093 } | 1003 } |
1094 | 1004 |
1005 // ES6 draft 09-27-13, section 20.2.2.21. | |
Benedikt Meurer
2016/06/16 16:47:34
Nit: Remove the ES6 hint here. And use C-style /*
| |
1006 // Return the base 10 logarithm of x | |
1007 // | |
1008 // Method : | |
1009 // Let log10_2hi = leading 40 bits of log10(2) and | |
1010 // log10_2lo = log10(2) - log10_2hi, | |
1011 // ivln10 = 1/log(10) rounded. | |
1012 // Then | |
1013 // n = ilogb(x), | |
1014 // if(n<0) n = n+1; | |
1015 // x = scalbn(x,-n); | |
1016 // log10(x) := n*log10_2hi + (n*log10_2lo + ivln10*log(x)) | |
1017 // | |
1018 // Note 1: | |
1019 // To guarantee log10(10**n)=n, where 10**n is normal, the rounding | |
1020 // mode must set to Round-to-Nearest. | |
1021 // Note 2: | |
1022 // [1/log(10)] rounded to 53 bits has error .198 ulps; | |
1023 // log10 is monotonic at all binary break points. | |
1024 // | |
1025 // Special cases: | |
1026 // log10(x) is NaN if x < 0; | |
1027 // log10(+INF) is +INF; log10(0) is -INF; | |
1028 // log10(NaN) is that NaN; | |
1029 // log10(10**N) = N for N=0,1,...,22. | |
1095 double log10(double x) { | 1030 double log10(double x) { |
1096 static const double | 1031 static const double |
1097 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ | 1032 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ |
1098 ivln10 = 4.34294481903251816668e-01, | 1033 ivln10 = 4.34294481903251816668e-01, |
1099 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */ | 1034 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */ |
1100 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */ | 1035 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */ |
1101 | 1036 |
1102 static const double zero = 0.0; | 1037 static const double zero = 0.0; |
1103 static volatile double vzero = 0.0; | 1038 static volatile double vzero = 0.0; |
1104 | 1039 |
(...skipping 23 matching lines...) Expand all Loading... | |
1128 SET_HIGH_WORD(x, hx); | 1063 SET_HIGH_WORD(x, hx); |
1129 SET_LOW_WORD(x, lx); | 1064 SET_LOW_WORD(x, lx); |
1130 | 1065 |
1131 double z = y * log10_2lo + ivln10 * log(x); | 1066 double z = y * log10_2lo + ivln10 * log(x); |
1132 return z + y * log10_2hi; | 1067 return z + y * log10_2hi; |
1133 } | 1068 } |
1134 | 1069 |
1135 } // namespace ieee754 | 1070 } // namespace ieee754 |
1136 } // namespace base | 1071 } // namespace base |
1137 } // namespace v8 | 1072 } // namespace v8 |
OLD | NEW |