OLD | NEW |
1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm). | 1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm). |
2 // | 2 // |
3 // ==================================================== | 3 // ==================================================== |
4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. | 4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. |
5 // | 5 // |
6 // Developed at SunSoft, a Sun Microsystems, Inc. business. | 6 // Developed at SunSoft, a Sun Microsystems, Inc. business. |
7 // Permission to use, copy, modify, and distribute this | 7 // Permission to use, copy, modify, and distribute this |
8 // software is freely granted, provided that this notice | 8 // software is freely granted, provided that this notice |
9 // is preserved. | 9 // is preserved. |
10 // ==================================================== | 10 // ==================================================== |
(...skipping 379 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
390 default: /* case 3 */ | 390 default: /* case 3 */ |
391 return (z - pi_lo) - pi; /* atan(-,-) */ | 391 return (z - pi_lo) - pi; /* atan(-,-) */ |
392 } | 392 } |
393 } | 393 } |
394 | 394 |
395 /* log(x) | 395 /* log(x) |
396 * Return the logrithm of x | 396 * Return the logrithm of x |
397 * | 397 * |
398 * Method : | 398 * Method : |
399 * 1. Argument Reduction: find k and f such that | 399 * 1. Argument Reduction: find k and f such that |
400 * x = 2^k * (1+f), | 400 * x = 2^k * (1+f), |
401 * where sqrt(2)/2 < 1+f < sqrt(2) . | 401 * where sqrt(2)/2 < 1+f < sqrt(2) . |
402 * | 402 * |
403 * 2. Approximation of log(1+f). | 403 * 2. Approximation of log(1+f). |
404 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) | 404 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) |
405 * = 2s + 2/3 s**3 + 2/5 s**5 + ....., | 405 * = 2s + 2/3 s**3 + 2/5 s**5 + ....., |
406 * = 2s + s*R | 406 * = 2s + s*R |
407 * We use a special Reme algorithm on [0,0.1716] to generate | 407 * We use a special Reme algorithm on [0,0.1716] to generate |
408 * a polynomial of degree 14 to approximate R The maximum error | 408 * a polynomial of degree 14 to approximate R The maximum error |
409 * of this polynomial approximation is bounded by 2**-58.45. In | 409 * of this polynomial approximation is bounded by 2**-58.45. In |
410 * other words, | 410 * other words, |
(...skipping 267 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
678 s = f / (2.0 + f); | 678 s = f / (2.0 + f); |
679 z = s * s; | 679 z = s * s; |
680 R = z * (Lp1 + | 680 R = z * (Lp1 + |
681 z * (Lp2 + z * (Lp3 + z * (Lp4 + z * (Lp5 + z * (Lp6 + z * Lp7)))))); | 681 z * (Lp2 + z * (Lp3 + z * (Lp4 + z * (Lp5 + z * (Lp6 + z * Lp7)))))); |
682 if (k == 0) | 682 if (k == 0) |
683 return f - (hfsq - s * (hfsq + R)); | 683 return f - (hfsq - s * (hfsq + R)); |
684 else | 684 else |
685 return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f); | 685 return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f); |
686 } | 686 } |
687 | 687 |
| 688 /* |
| 689 * k_log1p(f): |
| 690 * Return log(1+f) - f for 1+f in ~[sqrt(2)/2, sqrt(2)]. |
| 691 * |
| 692 * The following describes the overall strategy for computing |
| 693 * logarithms in base e. The argument reduction and adding the final |
| 694 * term of the polynomial are done by the caller for increased accuracy |
| 695 * when different bases are used. |
| 696 * |
| 697 * Method : |
| 698 * 1. Argument Reduction: find k and f such that |
| 699 * x = 2^k * (1+f), |
| 700 * where sqrt(2)/2 < 1+f < sqrt(2) . |
| 701 * |
| 702 * 2. Approximation of log(1+f). |
| 703 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) |
| 704 * = 2s + 2/3 s**3 + 2/5 s**5 + ....., |
| 705 * = 2s + s*R |
| 706 * We use a special Reme algorithm on [0,0.1716] to generate |
| 707 * a polynomial of degree 14 to approximate R The maximum error |
| 708 * of this polynomial approximation is bounded by 2**-58.45. In |
| 709 * other words, |
| 710 * 2 4 6 8 10 12 14 |
| 711 * R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s |
| 712 * (the values of Lg1 to Lg7 are listed in the program) |
| 713 * and |
| 714 * | 2 14 | -58.45 |
| 715 * | Lg1*s +...+Lg7*s - R(z) | <= 2 |
| 716 * | | |
| 717 * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. |
| 718 * In order to guarantee error in log below 1ulp, we compute log |
| 719 * by |
| 720 * log(1+f) = f - s*(f - R) (if f is not too large) |
| 721 * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy) |
| 722 * |
| 723 * 3. Finally, log(x) = k*ln2 + log(1+f). |
| 724 * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) |
| 725 * Here ln2 is split into two floating point number: |
| 726 * ln2_hi + ln2_lo, |
| 727 * where n*ln2_hi is always exact for |n| < 2000. |
| 728 * |
| 729 * Special cases: |
| 730 * log(x) is NaN with signal if x < 0 (including -INF) ; |
| 731 * log(+INF) is +INF; log(0) is -INF with signal; |
| 732 * log(NaN) is that NaN with no signal. |
| 733 * |
| 734 * Accuracy: |
| 735 * according to an error analysis, the error is always less than |
| 736 * 1 ulp (unit in the last place). |
| 737 * |
| 738 * Constants: |
| 739 * The hexadecimal values are the intended ones for the following |
| 740 * constants. The decimal values may be used, provided that the |
| 741 * compiler will convert from decimal to binary accurately enough |
| 742 * to produce the hexadecimal values shown. |
| 743 */ |
| 744 |
| 745 static const double Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */ |
| 746 Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ |
| 747 Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */ |
| 748 Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ |
| 749 Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ |
| 750 Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ |
| 751 Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ |
| 752 |
| 753 /* |
| 754 * We always inline k_log1p(), since doing so produces a |
| 755 * substantial performance improvement (~40% on amd64). |
| 756 */ |
| 757 static inline double k_log1p(double f) { |
| 758 double hfsq, s, z, R, w, t1, t2; |
| 759 |
| 760 s = f / (2.0 + f); |
| 761 z = s * s; |
| 762 w = z * z; |
| 763 t1 = w * (Lg2 + w * (Lg4 + w * Lg6)); |
| 764 t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7))); |
| 765 R = t2 + t1; |
| 766 hfsq = 0.5 * f * f; |
| 767 return s * (hfsq + R); |
| 768 } |
| 769 |
| 770 // ES6 draft 09-27-13, section 20.2.2.22. |
| 771 // Return the base 2 logarithm of x |
| 772 // |
| 773 // fdlibm does not have an explicit log2 function, but fdlibm's pow |
| 774 // function does implement an accurate log2 function as part of the |
| 775 // pow implementation. This extracts the core parts of that as a |
| 776 // separate log2 function. |
| 777 // |
| 778 // Method: |
| 779 // Compute log2(x) in two pieces: |
| 780 // log2(x) = w1 + w2 |
| 781 // where w1 has 53-24 = 29 bits of trailing zeroes. |
| 782 double log2(double x) { |
| 783 static const double |
| 784 bp[] = {1.0, 1.5}, |
| 785 dp_h[] = {0.0, 5.84962487220764160156e-01}, /* 0x3FE2B803, 0x40000000 */ |
| 786 dp_l[] = {0.0, 1.35003920212974897128e-08}, /* 0x3E4CFDEB, 0x43CFD006 */ |
| 787 zero = 0.0, one = 1.0, |
| 788 // Polynomial coefficients for (3/2)*(log2(x) - 2*s - 2/3*s^3) |
| 789 L1 = 5.99999999999994648725e-01, L2 = 4.28571428578550184252e-01, |
| 790 L3 = 3.33333329818377432918e-01, L4 = 2.72728123808534006489e-01, |
| 791 L5 = 2.30660745775561754067e-01, L6 = 2.06975017800338417784e-01, |
| 792 // cp = 2/(3*ln(2)). Note that cp_h + cp_l is cp, but with more accuracy. |
| 793 cp = 9.61796693925975554329e-01, cp_h = 9.61796700954437255859e-01, |
| 794 cp_l = -7.02846165095275826516e-09, two53 = 9007199254740992, /* 2^53 */ |
| 795 two54 = 1.80143985094819840000e+16; /* 0x43500000, 0x00000000 */ |
| 796 |
| 797 static volatile double vzero = 0.0; |
| 798 double ax, z_h, z_l, p_h, p_l; |
| 799 double t1, t2, r, t, u, v; |
| 800 int32_t j, k, n; |
| 801 int32_t ix, hx; |
| 802 u_int32_t lx; |
| 803 |
| 804 EXTRACT_WORDS(hx, lx, x); |
| 805 ix = hx & 0x7fffffff; |
| 806 |
| 807 // Handle special cases. |
| 808 // log2(+/- 0) = -Infinity |
| 809 if ((ix | lx) == 0) return -two54 / vzero; /* log(+-0)=-inf */ |
| 810 |
| 811 // log(x) = NaN, if x < 0 |
| 812 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */ |
| 813 |
| 814 // log2(Infinity) = Infinity, log2(NaN) = NaN |
| 815 if (ix >= 0x7ff00000) return x; |
| 816 |
| 817 ax = fabs(x); |
| 818 |
| 819 double ss, s2, s_h, s_l, t_h, t_l; |
| 820 n = 0; |
| 821 |
| 822 /* take care subnormal number */ |
| 823 if (ix < 0x00100000) { |
| 824 ax *= two53; |
| 825 n -= 53; |
| 826 GET_HIGH_WORD(ix, ax); |
| 827 } |
| 828 |
| 829 n += ((ix) >> 20) - 0x3ff; |
| 830 j = ix & 0x000fffff; |
| 831 |
| 832 /* determine interval */ |
| 833 ix = j | 0x3ff00000; /* normalize ix */ |
| 834 if (j <= 0x3988E) { |
| 835 k = 0; /* |x|<sqrt(3/2) */ |
| 836 } else if (j < 0xBB67A) { |
| 837 k = 1; /* |x|<sqrt(3) */ |
| 838 } else { |
| 839 k = 0; |
| 840 n += 1; |
| 841 ix -= 0x00100000; |
| 842 } |
| 843 SET_HIGH_WORD(ax, ix); |
| 844 |
| 845 /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */ |
| 846 u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */ |
| 847 v = one / (ax + bp[k]); |
| 848 ss = u * v; |
| 849 s_h = ss; |
| 850 SET_LOW_WORD(s_h, 0); |
| 851 /* t_h=ax+bp[k] High */ |
| 852 t_h = zero; |
| 853 SET_HIGH_WORD(t_h, ((ix >> 1) | 0x20000000) + 0x00080000 + (k << 18)); |
| 854 t_l = ax - (t_h - bp[k]); |
| 855 s_l = v * ((u - s_h * t_h) - s_h * t_l); |
| 856 /* compute log(ax) */ |
| 857 s2 = ss * ss; |
| 858 r = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6))))); |
| 859 r += s_l * (s_h + ss); |
| 860 s2 = s_h * s_h; |
| 861 t_h = 3.0 + s2 + r; |
| 862 SET_LOW_WORD(t_h, 0); |
| 863 t_l = r - ((t_h - 3.0) - s2); |
| 864 /* u+v = ss*(1+...) */ |
| 865 u = s_h * t_h; |
| 866 v = s_l * t_h + t_l * ss; |
| 867 /* 2/(3log2)*(ss+...) */ |
| 868 p_h = u + v; |
| 869 SET_LOW_WORD(p_h, 0); |
| 870 p_l = v - (p_h - u); |
| 871 z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3*log2) */ |
| 872 z_l = cp_l * p_h + p_l * cp + dp_l[k]; |
| 873 /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */ |
| 874 t = static_cast<double>(n); |
| 875 t1 = (((z_h + z_l) + dp_h[k]) + t); |
| 876 SET_LOW_WORD(t1, 0); |
| 877 t2 = z_l - (((t1 - t) - dp_h[k]) - z_h); |
| 878 |
| 879 // t1 + t2 = log2(ax), sum up because we do not care about extra precision. |
| 880 return t1 + t2; |
| 881 } |
| 882 |
| 883 /* |
| 884 * Return the base 10 logarithm of x. See e_log.c and k_log.h for most |
| 885 * comments. |
| 886 * |
| 887 * log10(x) = (f - 0.5*f*f + k_log1p(f)) / ln10 + k * log10(2) |
| 888 * in not-quite-routine extra precision. |
| 889 */ |
| 890 double log10Old(double x) { |
| 891 static const double |
| 892 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ |
| 893 ivln10hi = 4.34294481878168880939e-01, /* 0x3fdbcb7b, 0x15200000 */ |
| 894 ivln10lo = 2.50829467116452752298e-11, /* 0x3dbb9438, 0xca9aadd5 */ |
| 895 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */ |
| 896 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */ |
| 897 |
| 898 static const double zero = 0.0; |
| 899 static volatile double vzero = 0.0; |
| 900 |
| 901 double f, hfsq, hi, lo, r, val_hi, val_lo, w, y, y2; |
| 902 int32_t i, k, hx; |
| 903 u_int32_t lx; |
| 904 |
| 905 EXTRACT_WORDS(hx, lx, x); |
| 906 |
| 907 k = 0; |
| 908 if (hx < 0x00100000) { /* x < 2**-1022 */ |
| 909 if (((hx & 0x7fffffff) | lx) == 0) |
| 910 return -two54 / vzero; /* log(+-0)=-inf */ |
| 911 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */ |
| 912 k -= 54; |
| 913 x *= two54; /* subnormal number, scale up x */ |
| 914 GET_HIGH_WORD(hx, x); |
| 915 } |
| 916 if (hx >= 0x7ff00000) return x + x; |
| 917 if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */ |
| 918 k += (hx >> 20) - 1023; |
| 919 hx &= 0x000fffff; |
| 920 i = (hx + 0x95f64) & 0x100000; |
| 921 SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */ |
| 922 k += (i >> 20); |
| 923 y = static_cast<double>(k); |
| 924 f = x - 1.0; |
| 925 hfsq = 0.5 * f * f; |
| 926 r = k_log1p(f); |
| 927 |
| 928 /* See e_log2.c for most details. */ |
| 929 hi = f - hfsq; |
| 930 SET_LOW_WORD(hi, 0); |
| 931 lo = (f - hi) - hfsq + r; |
| 932 val_hi = hi * ivln10hi; |
| 933 y2 = y * log10_2hi; |
| 934 val_lo = y * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi; |
| 935 |
| 936 /* |
| 937 * Extra precision in for adding y*log10_2hi is not strictly needed |
| 938 * since there is no very large cancellation near x = sqrt(2) or |
| 939 * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs |
| 940 * with some parallelism and it reduces the error for many args. |
| 941 */ |
| 942 w = y2 + val_hi; |
| 943 val_lo += (y2 - w) + val_hi; |
| 944 val_hi = w; |
| 945 |
| 946 return val_lo + val_hi; |
| 947 } |
| 948 |
| 949 double log10(double x) { |
| 950 static const double |
| 951 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ |
| 952 ivln10 = 4.34294481903251816668e-01, |
| 953 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */ |
| 954 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */ |
| 955 |
| 956 static const double zero = 0.0; |
| 957 static volatile double vzero = 0.0; |
| 958 |
| 959 double y; |
| 960 int32_t i, k, hx; |
| 961 u_int32_t lx; |
| 962 |
| 963 EXTRACT_WORDS(hx, lx, x); |
| 964 |
| 965 k = 0; |
| 966 if (hx < 0x00100000) { /* x < 2**-1022 */ |
| 967 if (((hx & 0x7fffffff) | lx) == 0) |
| 968 return -two54 / vzero; /* log(+-0)=-inf */ |
| 969 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */ |
| 970 k -= 54; |
| 971 x *= two54; /* subnormal number, scale up x */ |
| 972 GET_HIGH_WORD(hx, x); |
| 973 GET_LOW_WORD(lx, x); |
| 974 } |
| 975 if (hx >= 0x7ff00000) return x + x; |
| 976 if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */ |
| 977 k += (hx >> 20) - 1023; |
| 978 |
| 979 i = (k & 0x80000000) >> 31; |
| 980 hx = (hx & 0x000fffff) | ((0x3ff - i) << 20); |
| 981 y = k + i; |
| 982 SET_HIGH_WORD(x, hx); |
| 983 SET_LOW_WORD(x, lx); |
| 984 |
| 985 double z = y * log10_2lo + ivln10 * log(x); |
| 986 return z + y * log10_2hi; |
| 987 } |
| 988 |
688 } // namespace ieee754 | 989 } // namespace ieee754 |
689 } // namespace base | 990 } // namespace base |
690 } // namespace v8 | 991 } // namespace v8 |
OLD | NEW |