src/base/ieee754.cc - Issue 2068743002: [builtins] Unify Atanh, Cbrt and Expm1 as exports from flibm.

Side by Side Diff: src/base/ieee754.cc

Issue 2068743002: [builtins] Unify Atanh, Cbrt and Expm1 as exports from flibm. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Fixed type warning. Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm).	1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm).

2 //	2 //

3 // ====================================================	3 // ====================================================

4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.	4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.

5 //	5 //

6 // Developed at SunSoft, a Sun Microsystems, Inc. business.	6 // Developed at SunSoft, a Sun Microsystems, Inc. business.

7 // Permission to use, copy, modify, and distribute this	7 // Permission to use, copy, modify, and distribute this

8 // software is freely granted, provided that this notice	8 // software is freely granted, provided that this notice

9 // is preserved.	9 // is preserved.

10 // ====================================================	10 // ====================================================

(...skipping 520 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
531 y = one - ((lo - (x * c) / (2.0 - c)) - hi);	531 y = one - ((lo - (x * c) / (2.0 - c)) - hi);

532 }	532 }

533 if (k >= -1021) {	533 if (k >= -1021) {

534 if (k == 1024) return y * 2.0 * two1023;	534 if (k == 1024) return y * 2.0 * two1023;

535 return y * twopk;	535 return y * twopk;

536 } else {	536 } else {

537 return y * twopk * twom1000;	537 return y * twopk * twom1000;

538 }	538 }

539 }	539 }

540	540

	541 /*

	542 * Method :

	543 * 1.Reduced x to positive by atanh(-x) = -atanh(x)

	544 * 2.For x>=0.5

	545 * 1 2x x

	546 * atanh(x) = --- * log(1 + -------) = 0.5 * log1p(2 * --------)

	547 * 2 1 - x 1 - x

	548 *

	549 * For x<0.5

	550 * atanh(x) = 0.5log1p(2x+2xx/(1-x))

	551 *

	552 * Special cases:

	553 * atanh(x) is NaN if \|x\| > 1 with signal;

	554 * atanh(NaN) is that NaN with no signal;

	555 * atanh(+-1) is +-INF with signal.

	556 *

	557 */

	558 double atanh(double x) {

	559 static const double one = 1.0, huge = 1e300;

	560 static const double zero = 0.0;

	561

	562 double t;

	563 int32_t hx, ix;

	564 u_int32_t lx;

	565 EXTRACT_WORDS(hx, lx, x);

	566 ix = hx & 0x7fffffff;

	567 if ((ix \| ((lx \| -static_cast<int32_t>(lx)) >> 31)) > 0x3ff00000) /* \|x\|>1 */

	568 return (x - x) / (x - x);

	569 if (ix == 0x3ff00000) return x / zero;

	570 if (ix < 0x3e300000 && (huge + x) > zero) return x; /* x<2*-28 /

	571 SET_HIGH_WORD(x, ix);

	572 if (ix < 0x3fe00000) { /* x < 0.5 */

	573 t = x + x;

	574 t = 0.5 * log1p(t + t * x / (one - x));

	575 } else {

	576 t = 0.5 * log1p((x + x) / (one - x));

	577 }

	578 if (hx >= 0)

	579 return t;

	580 else

	581 return -t;

	582 }

	583

541 /* log(x)	584 /* log(x)

542 * Return the logrithm of x	585 * Return the logrithm of x

543 *	586 *

544 * Method :	587 * Method :

545 * 1. Argument Reduction: find k and f such that	588 * 1. Argument Reduction: find k and f such that

546 * x = 2^k * (1+f),	589 * x = 2^k * (1+f),

547 * where sqrt(2)/2 < 1+f < sqrt(2) .	590 * where sqrt(2)/2 < 1+f < sqrt(2) .

548 *	591 *

549 * 2. Approximation of log(1+f).	592 * 2. Approximation of log(1+f).

550 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)	593 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)

(...skipping 273 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
824 s = f / (2.0 + f);	867 s = f / (2.0 + f);

825 z = s * s;	868 z = s * s;

826 R = z * (Lp1 +	869 R = z * (Lp1 +

827 z * (Lp2 + z * (Lp3 + z * (Lp4 + z * (Lp5 + z * (Lp6 + z * Lp7))))));	870 z * (Lp2 + z * (Lp3 + z * (Lp4 + z * (Lp5 + z * (Lp6 + z * Lp7))))));

828 if (k == 0)	871 if (k == 0)

829 return f - (hfsq - s * (hfsq + R));	872 return f - (hfsq - s * (hfsq + R));

830 else	873 else

831 return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f);	874 return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f);

832 }	875 }

833	876

834 /*

835 * k_log1p(f):

836 * Return log(1+f) - f for 1+f in ~[sqrt(2)/2, sqrt(2)].

837 *

838 * The following describes the overall strategy for computing

839 * logarithms in base e. The argument reduction and adding the final

840 * term of the polynomial are done by the caller for increased accuracy

841 * when different bases are used.

842 *

843 * Method :

844 * 1. Argument Reduction: find k and f such that

845 * x = 2^k * (1+f),

846 * where sqrt(2)/2 < 1+f < sqrt(2) .

847 *

848 * 2. Approximation of log(1+f).

849 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)

850 * = 2s + 2/3 s3 + 2/5 s5 + .....,

851 * = 2s + s*R

852 * We use a special Reme algorithm on [0,0.1716] to generate

853 * a polynomial of degree 14 to approximate R The maximum error

854 * of this polynomial approximation is bounded by 2**-58.45. In

855 * other words,

856 * 2 4 6 8 10 12 14

857 * R(z) ~ Lg1s +Lg2s +Lg3s +Lg4s +Lg5s +Lg6s +Lg7*s

858 * (the values of Lg1 to Lg7 are listed in the program)

859 * and

860 * \| 2 14 \| -58.45

861 * \| Lg1s +...+Lg7s - R(z) \| <= 2

862 * \| \|

863 * Note that 2s = f - sf = f - hfsq + shfsq, where hfsq = f*f/2.

864 * In order to guarantee error in log below 1ulp, we compute log

865 * by

866 * log(1+f) = f - s*(f - R) (if f is not too large)

867 * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)

868 *

869 * 3. Finally, log(x) = k*ln2 + log(1+f).

870 * = kln2_hi+(f-(hfsq-(s(hfsq+R)+k*ln2_lo)))

871 * Here ln2 is split into two floating point number:

872 * ln2_hi + ln2_lo,

873 * where n*ln2_hi is always exact for \|n\| < 2000.

874 *

875 * Special cases:

876 * log(x) is NaN with signal if x < 0 (including -INF) ;

877 * log(+INF) is +INF; log(0) is -INF with signal;

878 * log(NaN) is that NaN with no signal.

879 *

880 * Accuracy:

881 * according to an error analysis, the error is always less than

882 * 1 ulp (unit in the last place).

883 *

884 * Constants:

885 * The hexadecimal values are the intended ones for the following

886 * constants. The decimal values may be used, provided that the

887 * compiler will convert from decimal to binary accurately enough

888 * to produce the hexadecimal values shown.

889 */

890

891 static const double Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */

892 Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */

893 Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */

894 Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */

895 Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */

896 Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */

897 Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */

898

899 /*

900 * We always inline k_log1p(), since doing so produces a

901 * substantial performance improvement (~40% on amd64).

902 */

903 static inline double k_log1p(double f) {

904 double hfsq, s, z, R, w, t1, t2;

905

906 s = f / (2.0 + f);

907 z = s * s;

908 w = z * z;

909 t1 = w * (Lg2 + w * (Lg4 + w * Lg6));

910 t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));

911 R = t2 + t1;

912 hfsq = 0.5 * f * f;

913 return s * (hfsq + R);

914 }

915

916 // ES6 draft 09-27-13, section 20.2.2.22.	877 // ES6 draft 09-27-13, section 20.2.2.22.

917 // Return the base 2 logarithm of x	878 // Return the base 2 logarithm of x

918 //	879 //

919 // fdlibm does not have an explicit log2 function, but fdlibm's pow	880 // fdlibm does not have an explicit log2 function, but fdlibm's pow

920 // function does implement an accurate log2 function as part of the	881 // function does implement an accurate log2 function as part of the

921 // pow implementation. This extracts the core parts of that as a	882 // pow implementation. This extracts the core parts of that as a

922 // separate log2 function.	883 // separate log2 function.

923 //	884 //

924 // Method:	885 // Method:

925 // Compute log2(x) in two pieces:	886 // Compute log2(x) in two pieces:

(...skipping 93 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1019 /* log2(ax) = (ss+..)2/(3log2) = n + dp_h + z_h + z_l */	980 /* log2(ax) = (ss+..)2/(3log2) = n + dp_h + z_h + z_l */

1020 t = static_cast<double>(n);	981 t = static_cast<double>(n);

1021 t1 = (((z_h + z_l) + dp_h[k]) + t);	982 t1 = (((z_h + z_l) + dp_h[k]) + t);

1022 SET_LOW_WORD(t1, 0);	983 SET_LOW_WORD(t1, 0);

1023 t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);	984 t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);

1024	985

1025 // t1 + t2 = log2(ax), sum up because we do not care about extra precision.	986 // t1 + t2 = log2(ax), sum up because we do not care about extra precision.

1026 return t1 + t2;	987 return t1 + t2;

1027 }	988 }

1028	989

1029 /*

1030 * Return the base 10 logarithm of x. See e_log.c and k_log.h for most

1031 * comments.

1032 *

1033 * log10(x) = (f - 0.5ff + k_log1p(f)) / ln10 + k * log10(2)

1034 * in not-quite-routine extra precision.

1035 */

1036 double log10Old(double x) {

1037 static const double

1038 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */

1039 ivln10hi = 4.34294481878168880939e-01, /* 0x3fdbcb7b, 0x15200000 */

1040 ivln10lo = 2.50829467116452752298e-11, /* 0x3dbb9438, 0xca9aadd5 */

1041 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */

1042 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */

1043

1044 static const double zero = 0.0;

1045 static volatile double vzero = 0.0;

1046

1047 double f, hfsq, hi, lo, r, val_hi, val_lo, w, y, y2;

1048 int32_t i, k, hx;

1049 u_int32_t lx;

1050

1051 EXTRACT_WORDS(hx, lx, x);

1052

1053 k = 0;

1054 if (hx < 0x00100000) { /* x < 2*-1022 /

1055 if (((hx & 0x7fffffff) \| lx) == 0)

1056 return -two54 / vzero; /* log(+-0)=-inf */

1057 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */

1058 k -= 54;

1059 x = two54; / subnormal number, scale up x */

1060 GET_HIGH_WORD(hx, x);

1061 }

1062 if (hx >= 0x7ff00000) return x + x;

1063 if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */

1064 k += (hx >> 20) - 1023;

1065 hx &= 0x000fffff;

1066 i = (hx + 0x95f64) & 0x100000;

1067 SET_HIGH_WORD(x, hx \| (i ^ 0x3ff00000)); /* normalize x or x/2 */

1068 k += (i >> 20);

1069 y = static_cast<double>(k);

1070 f = x - 1.0;

1071 hfsq = 0.5 * f * f;

1072 r = k_log1p(f);

1073

1074 /* See e_log2.c for most details. */

1075 hi = f - hfsq;

1076 SET_LOW_WORD(hi, 0);

1077 lo = (f - hi) - hfsq + r;

1078 val_hi = hi * ivln10hi;

1079 y2 = y * log10_2hi;

1080 val_lo = y * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi;

1081

1082 /*

1083 * Extra precision in for adding y*log10_2hi is not strictly needed

1084 * since there is no very large cancellation near x = sqrt(2) or

1085 * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs

1086 * with some parallelism and it reduces the error for many args.

1087 */

1088 w = y2 + val_hi;

1089 val_lo += (y2 - w) + val_hi;

1090 val_hi = w;

1091

1092 return val_lo + val_hi;

1093 }

1094

1095 double log10(double x) {	990 double log10(double x) {

1096 static const double	991 static const double

1097 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */	992 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */

1098 ivln10 = 4.34294481903251816668e-01,	993 ivln10 = 4.34294481903251816668e-01,

1099 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */	994 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */

1100 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */	995 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */

1101	996

1102 static const double zero = 0.0;	997 static const double zero = 0.0;

1103 static volatile double vzero = 0.0;	998 static volatile double vzero = 0.0;

1104	999

(...skipping 20 matching lines...) Expand all Loading...
1125 i = (k & 0x80000000) >> 31;	1020 i = (k & 0x80000000) >> 31;

1126 hx = (hx & 0x000fffff) \| ((0x3ff - i) << 20);	1021 hx = (hx & 0x000fffff) \| ((0x3ff - i) << 20);

1127 y = k + i;	1022 y = k + i;

1128 SET_HIGH_WORD(x, hx);	1023 SET_HIGH_WORD(x, hx);

1129 SET_LOW_WORD(x, lx);	1024 SET_LOW_WORD(x, lx);

1130	1025

1131 double z = y * log10_2lo + ivln10 * log(x);	1026 double z = y * log10_2lo + ivln10 * log(x);

1132 return z + y * log10_2hi;	1027 return z + y * log10_2hi;

1133 }	1028 }

1134	1029

	1030 /* expm1(x)

	1031 * Returns exp(x)-1, the exponential of x minus 1.

	1032 *

	1033 * Method

	1034 * 1. Argument reduction:

	1035 * Given x, find r and integer k such that

	1036 *

	1037 * x = kln2 + r, \|r\| <= 0.5ln2 ~ 0.34658

	1038 *

	1039 * Here a correction term c will be computed to compensate

	1040 * the error in r when rounded to a floating-point number.

	1041 *

	1042 * 2. Approximating expm1(r) by a special rational function on

	1043 * the interval [0,0.34658]:

	1044 * Since

	1045 * r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 - r^4/360 + ...

	1046 * we define R1(r*r) by

	1047 * r(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 R1(r*r)

	1048 * That is,

	1049 * R1(r*2) = 6/r ((exp(r)+1)/(exp(r)-1) - 2/r)

	1050 * = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r))

	1051 * = 1 - r^2/60 + r^4/2520 - r^6/100800 + ...

	1052 * We use a special Reme algorithm on [0,0.347] to generate

	1053 * a polynomial of degree 5 in r*r to approximate R1. The

	1054 * maximum error of this polynomial approximation is bounded

	1055 * by 2**-61. In other words,

	1056 * R1(z) ~ 1.0 + Q1z + Q2z*2 + Q3z*3 + Q4z*4 + Q5z**5

	1057 * where Q1 = -1.6666666666666567384E-2,

	1058 * Q2 = 3.9682539681370365873E-4,

	1059 * Q3 = -9.9206344733435987357E-6,

	1060 * Q4 = 2.5051361420808517002E-7,

	1061 * Q5 = -6.2843505682382617102E-9;

	1062 * z = r*r,

	1063 * with error bounded by

	1064 * \| 5 \| -61

	1065 * \| 1.0+Q1z+...+Q5z - R1(z) \| <= 2

	1066 * \| \|

	1067 *

	1068 * expm1(r) = exp(r)-1 is then computed by the following

	1069 * specific way which minimize the accumulation rounding error:

	1070 * 2 3

	1071 * r r [ 3 - (R1 + R1*r/2) ]

	1072 * expm1(r) = r + --- + --- * [--------------------]

	1073 * 2 2 [ 6 - r(3 - R1r/2) ]

	1074 *

	1075 * To compensate the error in the argument reduction, we use

	1076 * expm1(r+c) = expm1(r) + c + expm1(r)*c

	1077 * ~ expm1(r) + c + r*c

	1078 * Thus c+r*c will be added in as the correction terms for

	1079 * expm1(r+c). Now rearrange the term to avoid optimization

	1080 * screw up:

	1081 * ( 2 2 )

	1082 * ({ ( r [ R1 - (3 - R1*r/2) ] ) } r )

	1083 * expm1(r+c)~r - ({r(--- [--------------------]-c)-c} - --- )

	1084 * ({ ( 2 [ 6 - r(3 - R1r/2) ] ) } 2 )

	1085 * ( )

	1086 *

	1087 * = r - E

	1088 * 3. Scale back to obtain expm1(x):

	1089 * From step 1, we have

	1090 * expm1(x) = either 2^k*[expm1(r)+1] - 1

	1091 * = or 2^k*[expm1(r) + (1-2^-k)]

	1092 * 4. Implementation notes:

	1093 * (A). To save one multiplication, we scale the coefficient Qi

	1094 * to Qi*2^i, and replace z by (x^2)/2.

	1095 * (B). To achieve maximum accuracy, we compute expm1(x) by

	1096 * (i) if x < -56*ln2, return -1.0, (raise inexact if x!=inf)

	1097 * (ii) if k=0, return r-E

	1098 * (iii) if k=-1, return 0.5*(r-E)-0.5

	1099 * (iv) if k=1 if r < -0.25, return 2*((r+0.5)- E)

	1100 * else return 1.0+2.0*(r-E);

	1101 * (v) if (k<-2\|\|k>56) return 2^k(1-(E-r)) - 1 (or exp(x)-1)

	1102 * (vi) if k <= 20, return 2^k((1-2^-k)-(E-r)), else

	1103 * (vii) return 2^k(1-((E+2^-k)-r))

	1104 *

	1105 * Special cases:

	1106 * expm1(INF) is INF, expm1(NaN) is NaN;

	1107 * expm1(-INF) is -1, and

	1108 * for finite argument, only expm1(0)=0 is exact.

	1109 *

	1110 * Accuracy:

	1111 * according to an error analysis, the error is always less than

	1112 * 1 ulp (unit in the last place).

	1113 *

	1114 * Misc. info.

	1115 * For IEEE double

	1116 * if x > 7.09782712893383973096e+02 then expm1(x) overflow

	1117 *

	1118 * Constants:

	1119 * The hexadecimal values are the intended ones for the following

	1120 * constants. The decimal values may be used, provided that the

	1121 * compiler will convert from decimal to binary accurately enough

	1122 * to produce the hexadecimal values shown.

	1123 */

	1124 double expm1(double x) {

	1125 static const double

	1126 one = 1.0,

	1127 tiny = 1.0e-300,

	1128 o_threshold = 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */

	1129 ln2_hi = 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */

	1130 ln2_lo = 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */

	1131 invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */

	1132 /* Scaled Q's: Qn_here = 2*n Qn_above, for R(2*z) where z = hxs =

	1133 xx/2: /

	1134 Q1 = -3.33333333333331316428e-02, /* BFA11111 111110F4 */

	1135 Q2 = 1.58730158725481460165e-03, /* 3F5A01A0 19FE5585 */

	1136 Q3 = -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */

	1137 Q4 = 4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */

	1138 Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */

	1139

	1140 static volatile double huge = 1.0e+300;

	1141

	1142 double y, hi, lo, c, t, e, hxs, hfx, r1, twopk;

	1143 int32_t k, xsb;

	1144 u_int32_t hx;

	1145

	1146 GET_HIGH_WORD(hx, x);

	1147 xsb = hx & 0x80000000; /* sign bit of x */

	1148 hx &= 0x7fffffff; /* high word of \|x\| */

	1149

	1150 /* filter out huge and non-finite argument */

	1151 if (hx >= 0x4043687A) { /* if \|x\|>=56ln2 /

	1152 if (hx >= 0x40862E42) { /* if \|x\|>=709.78... */

	1153 if (hx >= 0x7ff00000) {

	1154 u_int32_t low;

	1155 GET_LOW_WORD(low, x);

	1156 if (((hx & 0xfffff) \| low) != 0)

	1157 return x + x; /* NaN */

	1158 else

	1159 return (xsb == 0) ? x : -1.0; /* exp(+-inf)={inf,-1} */

	1160 }

	1161 if (x > o_threshold) return huge * huge; /* overflow */

	1162 }

	1163 if (xsb != 0) { /* x < -56ln2, return -1.0 with inexact /

	1164 if (x + tiny < 0.0) /* raise inexact */

	1165 return tiny - one; /* return -1 */

	1166 }

	1167 }

	1168

	1169 /* argument reduction */

	1170 if (hx > 0x3fd62e42) { /* if \|x\| > 0.5 ln2 */

	1171 if (hx < 0x3FF0A2B2) { /* and \|x\| < 1.5 ln2 */

	1172 if (xsb == 0) {

	1173 hi = x - ln2_hi;

	1174 lo = ln2_lo;

	1175 k = 1;

	1176 } else {

	1177 hi = x + ln2_hi;

	1178 lo = -ln2_lo;

	1179 k = -1;

	1180 }

	1181 } else {

	1182 k = invln2 * x + ((xsb == 0) ? 0.5 : -0.5);

	1183 t = k;

	1184 hi = x - t * ln2_hi; /* tln2_hi is exact here /

	1185 lo = t * ln2_lo;

	1186 }

	1187 STRICT_ASSIGN(double, x, hi - lo);

	1188 c = (hi - x) - lo;

	1189 } else if (hx < 0x3c900000) { /* when \|x\|<2*-54, return x /

	1190 t = huge + x; /* return x with inexact flags when x!=0 */

	1191 return x - (t - (huge + x));

	1192 } else {

	1193 k = 0;

	1194 }

	1195

	1196 /* x is now in primary range */

	1197 hfx = 0.5 * x;

	1198 hxs = x * hfx;

	1199 r1 = one + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5))));

	1200 t = 3.0 - r1 * hfx;

	1201 e = hxs * ((r1 - t) / (6.0 - x * t));

	1202 if (k == 0) {

	1203 return x - (x * e - hxs); /* c is 0 */

	1204 } else {

	1205 INSERT_WORDS(twopk, 0x3ff00000 + (k << 20), 0); /* 2^k */

	1206 e = (x * (e - c) - c);

	1207 e -= hxs;

	1208 if (k == -1) return 0.5 * (x - e) - 0.5;

	1209 if (k == 1) {

	1210 if (x < -0.25)

	1211 return -2.0 * (e - (x + 0.5));

	1212 else

	1213 return one + 2.0 * (x - e);

	1214 }

	1215 if (k <= -2 \|\| k > 56) { /* suffice to return exp(x)-1 */

	1216 y = one - (e - x);

	1217 // TODO(mvstanton): is this replacement for the hex float

	1218 // sufficient?

	1219 // if (k == 1024) y = y2.00x1p1023;

	1220 if (k == 1024)

	1221 y = y * 2.0 * 8.98846567431158e+307;

	1222 else

	1223 y = y * twopk;

	1224 return y - one;

	1225 }

	1226 t = one;

	1227 if (k < 20) {

	1228 SET_HIGH_WORD(t, 0x3ff00000 - (0x200000 >> k)); /* t=1-2^-k */

	1229 y = t - (e - x);

	1230 y = y * twopk;

	1231 } else {

	1232 SET_HIGH_WORD(t, ((0x3ff - k) << 20)); /* 2^-k */

	1233 y = x - (e + t);

	1234 y += one;

	1235 y = y * twopk;

	1236 }

	1237 }

	1238 return y;

	1239 }

	1240

	1241 double cbrt(double x) {

	1242 static const u_int32_t

	1243 B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)220 /

	1244 B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)220 /

	1245

	1246 /* \|1/cbrt(x) - p(x)\| < 2*-23.5 (~[-7.93e-8, 7.929e-8]). /

	1247 static const double P0 = 1.87595182427177009643, /* 0x3ffe03e6, 0x0f61e692 */

	1248 P1 = -1.88497979543377169875, /* 0xbffe28e0, 0x92f02420 */

	1249 P2 = 1.621429720105354466140, /* 0x3ff9f160, 0x4a49d6c2 */

	1250 P3 = -0.758397934778766047437, /* 0xbfe844cb, 0xbee751d9 */

	1251 P4 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */

	1252

	1253 int32_t hx;

	1254 union {

	1255 double value;

	1256 uint64_t bits;

	1257 } u;

	1258 double r, s, t = 0.0, w;

	1259 u_int32_t sign;

	1260 u_int32_t high, low;

	1261

	1262 EXTRACT_WORDS(hx, low, x);

	1263 sign = hx & 0x80000000; /* sign= sign(x) */

	1264 hx ^= sign;

	1265 if (hx >= 0x7ff00000) return (x + x); /* cbrt(NaN,INF) is itself */

	1266

	1267 /*

	1268 * Rough cbrt to 5 bits:

	1269 * cbrt(2*e(1+m) ~= 2*(e/3)(1+(e%3+m)/3)

	1270 * where e is integral and >= 0, m is real and in [0, 1), and "/" and

	1271 * "%" are integer division and modulus with rounding towards minus

	1272 * infinity. The RHS is always >= the LHS and has a maximum relative

	1273 * error of about 1 in 16. Adding a bias of -0.03306235651 to the

	1274 * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE

	1275 * floating point representation, for finite positive normal values,

	1276 * ordinary integer divison of the value in bits magically gives

	1277 * almost exactly the RHS of the above provided we first subtract the

	1278 * exponent bias (1023 for doubles) and later add it back. We do the

	1279 * subtraction virtually to keep e >= 0 so that ordinary integer

	1280 * division rounds towards minus infinity; this is also efficient.

	1281 */

	1282 if (hx < 0x00100000) { /* zero or subnormal? */

	1283 if ((hx \| low) == 0) return (x); /* cbrt(0) is itself */

	1284 SET_HIGH_WORD(t, 0x43500000); /* set t= 2*54 /

	1285 t *= x;

	1286 GET_HIGH_WORD(high, t);

	1287 INSERT_WORDS(t, sign \| ((high & 0x7fffffff) / 3 + B2), 0);

	1288 } else {

	1289 INSERT_WORDS(t, sign \| (hx / 3 + B1), 0);

	1290 }

	1291

	1292 /*

	1293 * New cbrt to 23 bits:

	1294 * cbrt(x) = tcbrt(x/t3) ~= tP(t**3/x)

	1295 * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)

	1296 * to within 2**-23.5 when \|r - 1\| < 1/10. The rough approximation

	1297 * has produced t such than \|t/cbrt(x) - 1\| ~< 1/32, and cubing this

	1298 * gives us bounds for r = t**3/x.

	1299 *

	1300 * Try to optimize for parallel evaluation as in k_tanf.c.

	1301 */

	1302 r = (t * t) * (t / x);

	1303 t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4));

	1304

	1305 /*

	1306 * Round t away from zero to 23 bits (sloppily except for ensuring that

	1307 * the result is larger in magnitude than cbrt(x) but not much more than

	1308 * 2 23-bit ulps larger). With rounding towards zero, the error bound

	1309 * would be ~5/6 instead of ~4/6. With a maximum error of 2 23-bit ulps

	1310 * in the rounded t, the infinite-precision error in the Newton

	1311 * approximation barely affects third digit in the final error

	1312 * 0.667; the error in the rounded t can be up to about 3 23-bit ulps

	1313 * before the final error is larger than 0.667 ulps.

	1314 */

	1315 u.value = t;

	1316 u.bits = (u.bits + 0x80000000) & 0xffffffffc0000000ULL;

	1317 t = u.value;

	1318

	1319 /* one step Newton iteration to 53 bits with error < 0.667 ulps */

	1320 s = t * t; /* tt is exact /

	1321 r = x / s; /* error <= 0.5 ulps; \|r\| < \|t\| */

	1322 w = t + t; /* t+t is exact */

	1323 r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3t /

	1324 t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */

	1325

	1326 return (t);

	1327 }

	1328

1135 } // namespace ieee754	1329 } // namespace ieee754

1136 } // namespace base	1330 } // namespace base

1137 } // namespace v8	1331 } // namespace v8

OLD	NEW

« no previous file with comments | « src/base/ieee754.h ('k') | src/bootstrapper.cc » ('j') | no next file with comments »