OLD | NEW |
1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm). | 1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm). |
2 // | 2 // |
3 // ==================================================== | 3 // ==================================================== |
4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. | 4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. |
5 // | 5 // |
6 // Developed at SunSoft, a Sun Microsystems, Inc. business. | 6 // Developed at SunSoft, a Sun Microsystems, Inc. business. |
7 // Permission to use, copy, modify, and distribute this | 7 // Permission to use, copy, modify, and distribute this |
8 // software is freely granted, provided that this notice | 8 // software is freely granted, provided that this notice |
9 // is preserved. | 9 // is preserved. |
10 // ==================================================== | 10 // ==================================================== |
(...skipping 856 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
867 s = f / (2.0 + f); | 867 s = f / (2.0 + f); |
868 z = s * s; | 868 z = s * s; |
869 R = z * (Lp1 + | 869 R = z * (Lp1 + |
870 z * (Lp2 + z * (Lp3 + z * (Lp4 + z * (Lp5 + z * (Lp6 + z * Lp7)))))); | 870 z * (Lp2 + z * (Lp3 + z * (Lp4 + z * (Lp5 + z * (Lp6 + z * Lp7)))))); |
871 if (k == 0) | 871 if (k == 0) |
872 return f - (hfsq - s * (hfsq + R)); | 872 return f - (hfsq - s * (hfsq + R)); |
873 else | 873 else |
874 return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f); | 874 return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f); |
875 } | 875 } |
876 | 876 |
877 // ES6 draft 09-27-13, section 20.2.2.22. | 877 /* |
878 // Return the base 2 logarithm of x | 878 * k_log1p(f): |
879 // | 879 * Return log(1+f) - f for 1+f in ~[sqrt(2)/2, sqrt(2)]. |
880 // fdlibm does not have an explicit log2 function, but fdlibm's pow | 880 * |
881 // function does implement an accurate log2 function as part of the | 881 * The following describes the overall strategy for computing |
882 // pow implementation. This extracts the core parts of that as a | 882 * logarithms in base e. The argument reduction and adding the final |
883 // separate log2 function. | 883 * term of the polynomial are done by the caller for increased accuracy |
884 // | 884 * when different bases are used. |
885 // Method: | 885 * |
886 // Compute log2(x) in two pieces: | 886 * Method : |
887 // log2(x) = w1 + w2 | 887 * 1. Argument Reduction: find k and f such that |
888 // where w1 has 53-24 = 29 bits of trailing zeroes. | 888 * x = 2^k * (1+f), |
| 889 * where sqrt(2)/2 < 1+f < sqrt(2) . |
| 890 * |
| 891 * 2. Approximation of log(1+f). |
| 892 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) |
| 893 * = 2s + 2/3 s**3 + 2/5 s**5 + ....., |
| 894 * = 2s + s*R |
| 895 * We use a special Reme algorithm on [0,0.1716] to generate |
| 896 * a polynomial of degree 14 to approximate R The maximum error |
| 897 * of this polynomial approximation is bounded by 2**-58.45. In |
| 898 * other words, |
| 899 * 2 4 6 8 10 12 14 |
| 900 * R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s |
| 901 * (the values of Lg1 to Lg7 are listed in the program) |
| 902 * and |
| 903 * | 2 14 | -58.45 |
| 904 * | Lg1*s +...+Lg7*s - R(z) | <= 2 |
| 905 * | | |
| 906 * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. |
| 907 * In order to guarantee error in log below 1ulp, we compute log |
| 908 * by |
| 909 * log(1+f) = f - s*(f - R) (if f is not too large) |
| 910 * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy) |
| 911 * |
| 912 * 3. Finally, log(x) = k*ln2 + log(1+f). |
| 913 * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) |
| 914 * Here ln2 is split into two floating point number: |
| 915 * ln2_hi + ln2_lo, |
| 916 * where n*ln2_hi is always exact for |n| < 2000. |
| 917 * |
| 918 * Special cases: |
| 919 * log(x) is NaN with signal if x < 0 (including -INF) ; |
| 920 * log(+INF) is +INF; log(0) is -INF with signal; |
| 921 * log(NaN) is that NaN with no signal. |
| 922 * |
| 923 * Accuracy: |
| 924 * according to an error analysis, the error is always less than |
| 925 * 1 ulp (unit in the last place). |
| 926 * |
| 927 * Constants: |
| 928 * The hexadecimal values are the intended ones for the following |
| 929 * constants. The decimal values may be used, provided that the |
| 930 * compiler will convert from decimal to binary accurately enough |
| 931 * to produce the hexadecimal values shown. |
| 932 */ |
| 933 |
| 934 static const double Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */ |
| 935 Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ |
| 936 Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */ |
| 937 Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ |
| 938 Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ |
| 939 Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ |
| 940 Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ |
| 941 |
| 942 /* |
| 943 * We always inline k_log1p(), since doing so produces a |
| 944 * substantial performance improvement (~40% on amd64). |
| 945 */ |
| 946 static inline double k_log1p(double f) { |
| 947 double hfsq, s, z, R, w, t1, t2; |
| 948 |
| 949 s = f / (2.0 + f); |
| 950 z = s * s; |
| 951 w = z * z; |
| 952 t1 = w * (Lg2 + w * (Lg4 + w * Lg6)); |
| 953 t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7))); |
| 954 R = t2 + t1; |
| 955 hfsq = 0.5 * f * f; |
| 956 return s * (hfsq + R); |
| 957 } |
| 958 |
| 959 /* |
| 960 * Return the base 2 logarithm of x. See e_log.c and k_log.h for most |
| 961 * comments. |
| 962 * |
| 963 * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel, |
| 964 * then does the combining and scaling steps |
| 965 * log2(x) = (f - 0.5*f*f + k_log1p(f)) / ln2 + k |
| 966 * in not-quite-routine extra precision. |
| 967 */ |
889 double log2(double x) { | 968 double log2(double x) { |
890 static const double | 969 static const double |
891 bp[] = {1.0, 1.5}, | 970 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ |
892 dp_h[] = {0.0, 5.84962487220764160156e-01}, /* 0x3FE2B803, 0x40000000 */ | 971 ivln2hi = 1.44269504072144627571e+00, /* 0x3ff71547, 0x65200000 */ |
893 dp_l[] = {0.0, 1.35003920212974897128e-08}, /* 0x3E4CFDEB, 0x43CFD006 */ | 972 ivln2lo = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */ |
894 zero = 0.0, one = 1.0, | 973 |
895 // Polynomial coefficients for (3/2)*(log2(x) - 2*s - 2/3*s^3) | 974 static const double zero = 0.0; |
896 L1 = 5.99999999999994648725e-01, L2 = 4.28571428578550184252e-01, | |
897 L3 = 3.33333329818377432918e-01, L4 = 2.72728123808534006489e-01, | |
898 L5 = 2.30660745775561754067e-01, L6 = 2.06975017800338417784e-01, | |
899 // cp = 2/(3*ln(2)). Note that cp_h + cp_l is cp, but with more accuracy. | |
900 cp = 9.61796693925975554329e-01, cp_h = 9.61796700954437255859e-01, | |
901 cp_l = -7.02846165095275826516e-09, two53 = 9007199254740992, /* 2^53 */ | |
902 two54 = 1.80143985094819840000e+16; /* 0x43500000, 0x00000000 */ | |
903 | |
904 static volatile double vzero = 0.0; | 975 static volatile double vzero = 0.0; |
905 double ax, z_h, z_l, p_h, p_l; | 976 |
906 double t1, t2, r, t, u, v; | 977 double f, hfsq, hi, lo, r, val_hi, val_lo, w, y; |
907 int32_t j, k, n; | 978 int32_t i, k, hx; |
908 int32_t ix, hx; | |
909 u_int32_t lx; | 979 u_int32_t lx; |
910 | 980 |
911 EXTRACT_WORDS(hx, lx, x); | 981 EXTRACT_WORDS(hx, lx, x); |
912 ix = hx & 0x7fffffff; | 982 |
913 | 983 k = 0; |
914 // Handle special cases. | 984 if (hx < 0x00100000) { /* x < 2**-1022 */ |
915 // log2(+/- 0) = -Infinity | 985 if (((hx & 0x7fffffff) | lx) == 0) |
916 if ((ix | lx) == 0) return -two54 / vzero; /* log(+-0)=-inf */ | 986 return -two54 / vzero; /* log(+-0)=-inf */ |
917 | 987 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */ |
918 // log(x) = NaN, if x < 0 | 988 k -= 54; |
919 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */ | 989 x *= two54; /* subnormal number, scale up x */ |
920 | 990 GET_HIGH_WORD(hx, x); |
921 // log2(Infinity) = Infinity, log2(NaN) = NaN | |
922 if (ix >= 0x7ff00000) return x; | |
923 | |
924 ax = fabs(x); | |
925 | |
926 double ss, s2, s_h, s_l, t_h, t_l; | |
927 n = 0; | |
928 | |
929 /* take care subnormal number */ | |
930 if (ix < 0x00100000) { | |
931 ax *= two53; | |
932 n -= 53; | |
933 GET_HIGH_WORD(ix, ax); | |
934 } | 991 } |
935 | 992 if (hx >= 0x7ff00000) return x + x; |
936 n += ((ix) >> 20) - 0x3ff; | 993 if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */ |
937 j = ix & 0x000fffff; | 994 k += (hx >> 20) - 1023; |
938 | 995 hx &= 0x000fffff; |
939 /* determine interval */ | 996 i = (hx + 0x95f64) & 0x100000; |
940 ix = j | 0x3ff00000; /* normalize ix */ | 997 SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */ |
941 if (j <= 0x3988E) { | 998 k += (i >> 20); |
942 k = 0; /* |x|<sqrt(3/2) */ | 999 y = static_cast<double>(k); |
943 } else if (j < 0xBB67A) { | 1000 f = x - 1.0; |
944 k = 1; /* |x|<sqrt(3) */ | 1001 hfsq = 0.5 * f * f; |
945 } else { | 1002 r = k_log1p(f); |
946 k = 0; | 1003 |
947 n += 1; | 1004 /* |
948 ix -= 0x00100000; | 1005 * f-hfsq must (for args near 1) be evaluated in extra precision |
949 } | 1006 * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2). |
950 SET_HIGH_WORD(ax, ix); | 1007 * This is fairly efficient since f-hfsq only depends on f, so can |
951 | 1008 * be evaluated in parallel with R. Not combining hfsq with R also |
952 /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */ | 1009 * keeps R small (though not as small as a true `lo' term would be), |
953 u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */ | 1010 * so that extra precision is not needed for terms involving R. |
954 v = one / (ax + bp[k]); | 1011 * |
955 ss = u * v; | 1012 * Compiler bugs involving extra precision used to break Dekker's |
956 s_h = ss; | 1013 * theorem for spitting f-hfsq as hi+lo, unless double_t was used |
957 SET_LOW_WORD(s_h, 0); | 1014 * or the multi-precision calculations were avoided when double_t |
958 /* t_h=ax+bp[k] High */ | 1015 * has extra precision. These problems are now automatically |
959 t_h = zero; | 1016 * avoided as a side effect of the optimization of combining the |
960 SET_HIGH_WORD(t_h, ((ix >> 1) | 0x20000000) + 0x00080000 + (k << 18)); | 1017 * Dekker splitting step with the clear-low-bits step. |
961 t_l = ax - (t_h - bp[k]); | 1018 * |
962 s_l = v * ((u - s_h * t_h) - s_h * t_l); | 1019 * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra |
963 /* compute log(ax) */ | 1020 * precision to avoid a very large cancellation when x is very near |
964 s2 = ss * ss; | 1021 * these values. Unlike the above cancellations, this problem is |
965 r = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6))))); | 1022 * specific to base 2. It is strange that adding +-1 is so much |
966 r += s_l * (s_h + ss); | 1023 * harder than adding +-ln2 or +-log10_2. |
967 s2 = s_h * s_h; | 1024 * |
968 t_h = 3.0 + s2 + r; | 1025 * This uses Dekker's theorem to normalize y+val_hi, so the |
969 SET_LOW_WORD(t_h, 0); | 1026 * compiler bugs are back in some configurations, sigh. And I |
970 t_l = r - ((t_h - 3.0) - s2); | 1027 * don't want to used double_t to avoid them, since that gives a |
971 /* u+v = ss*(1+...) */ | 1028 * pessimization and the support for avoiding the pessimization |
972 u = s_h * t_h; | 1029 * is not yet available. |
973 v = s_l * t_h + t_l * ss; | 1030 * |
974 /* 2/(3log2)*(ss+...) */ | 1031 * The multi-precision calculations for the multiplications are |
975 p_h = u + v; | 1032 * routine. |
976 SET_LOW_WORD(p_h, 0); | 1033 */ |
977 p_l = v - (p_h - u); | 1034 hi = f - hfsq; |
978 z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3*log2) */ | 1035 SET_LOW_WORD(hi, 0); |
979 z_l = cp_l * p_h + p_l * cp + dp_l[k]; | 1036 lo = (f - hi) - hfsq + r; |
980 /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */ | 1037 val_hi = hi * ivln2hi; |
981 t = static_cast<double>(n); | 1038 val_lo = (lo + hi) * ivln2lo + lo * ivln2hi; |
982 t1 = (((z_h + z_l) + dp_h[k]) + t); | 1039 |
983 SET_LOW_WORD(t1, 0); | 1040 /* spadd(val_hi, val_lo, y), except for not using double_t: */ |
984 t2 = z_l - (((t1 - t) - dp_h[k]) - z_h); | 1041 w = y + val_hi; |
985 | 1042 val_lo += (y - w) + val_hi; |
986 // t1 + t2 = log2(ax), sum up because we do not care about extra precision. | 1043 val_hi = w; |
987 return t1 + t2; | 1044 |
| 1045 return val_lo + val_hi; |
988 } | 1046 } |
989 | 1047 |
| 1048 /* |
| 1049 * Return the base 10 logarithm of x |
| 1050 * |
| 1051 * Method : |
| 1052 * Let log10_2hi = leading 40 bits of log10(2) and |
| 1053 * log10_2lo = log10(2) - log10_2hi, |
| 1054 * ivln10 = 1/log(10) rounded. |
| 1055 * Then |
| 1056 * n = ilogb(x), |
| 1057 * if(n<0) n = n+1; |
| 1058 * x = scalbn(x,-n); |
| 1059 * log10(x) := n*log10_2hi + (n*log10_2lo + ivln10*log(x)) |
| 1060 * |
| 1061 * Note 1: |
| 1062 * To guarantee log10(10**n)=n, where 10**n is normal, the rounding |
| 1063 * mode must set to Round-to-Nearest. |
| 1064 * Note 2: |
| 1065 * [1/log(10)] rounded to 53 bits has error .198 ulps; |
| 1066 * log10 is monotonic at all binary break points. |
| 1067 * |
| 1068 * Special cases: |
| 1069 * log10(x) is NaN if x < 0; |
| 1070 * log10(+INF) is +INF; log10(0) is -INF; |
| 1071 * log10(NaN) is that NaN; |
| 1072 * log10(10**N) = N for N=0,1,...,22. |
| 1073 */ |
990 double log10(double x) { | 1074 double log10(double x) { |
991 static const double | 1075 static const double |
992 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ | 1076 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ |
993 ivln10 = 4.34294481903251816668e-01, | 1077 ivln10 = 4.34294481903251816668e-01, |
994 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */ | 1078 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */ |
995 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */ | 1079 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */ |
996 | 1080 |
997 static const double zero = 0.0; | 1081 static const double zero = 0.0; |
998 static volatile double vzero = 0.0; | 1082 static volatile double vzero = 0.0; |
999 | 1083 |
(...skipping 322 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1322 w = t + t; /* t+t is exact */ | 1406 w = t + t; /* t+t is exact */ |
1323 r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */ | 1407 r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */ |
1324 t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */ | 1408 t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */ |
1325 | 1409 |
1326 return (t); | 1410 return (t); |
1327 } | 1411 } |
1328 | 1412 |
1329 } // namespace ieee754 | 1413 } // namespace ieee754 |
1330 } // namespace base | 1414 } // namespace base |
1331 } // namespace v8 | 1415 } // namespace v8 |
OLD | NEW |