Index: src/base/ieee754.cc |
diff --git a/src/base/ieee754.cc b/src/base/ieee754.cc |
index 4d11bdafe8b89e26a737a3c332f39fc7d6b45b0f..e642b6327a9b29360db3c09ebc0235b15b3c9f0e 100644 |
--- a/src/base/ieee754.cc |
+++ b/src/base/ieee754.cc |
@@ -392,6 +392,152 @@ double atan2(double y, double x) { |
} |
} |
+/* exp(x) |
+ * Returns the exponential of x. |
+ * |
+ * Method |
+ * 1. Argument reduction: |
+ * Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658. |
+ * Given x, find r and integer k such that |
+ * |
+ * x = k*ln2 + r, |r| <= 0.5*ln2. |
+ * |
+ * Here r will be represented as r = hi-lo for better |
+ * accuracy. |
+ * |
+ * 2. Approximation of exp(r) by a special rational function on |
+ * the interval [0,0.34658]: |
+ * Write |
+ * R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ... |
+ * We use a special Remes algorithm on [0,0.34658] to generate |
+ * a polynomial of degree 5 to approximate R. The maximum error |
+ * of this polynomial approximation is bounded by 2**-59. In |
+ * other words, |
+ * R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5 |
+ * (where z=r*r, and the values of P1 to P5 are listed below) |
+ * and |
+ * | 5 | -59 |
+ * | 2.0+P1*z+...+P5*z - R(z) | <= 2 |
+ * | | |
+ * The computation of exp(r) thus becomes |
+ * 2*r |
+ * exp(r) = 1 + ------- |
+ * R - r |
+ * r*R1(r) |
+ * = 1 + r + ----------- (for better accuracy) |
+ * 2 - R1(r) |
+ * where |
+ * 2 4 10 |
+ * R1(r) = r - (P1*r + P2*r + ... + P5*r ). |
+ * |
+ * 3. Scale back to obtain exp(x): |
+ * From step 1, we have |
+ * exp(x) = 2^k * exp(r) |
+ * |
+ * Special cases: |
+ * exp(INF) is INF, exp(NaN) is NaN; |
+ * exp(-INF) is 0, and |
+ * for finite argument, only exp(0)=1 is exact. |
+ * |
+ * Accuracy: |
+ * according to an error analysis, the error is always less than |
+ * 1 ulp (unit in the last place). |
+ * |
+ * Misc. info. |
+ * For IEEE double |
+ * if x > 7.09782712893383973096e+02 then exp(x) overflow |
+ * if x < -7.45133219101941108420e+02 then exp(x) underflow |
+ * |
+ * Constants: |
+ * The hexadecimal values are the intended ones for the following |
+ * constants. The decimal values may be used, provided that the |
+ * compiler will convert from decimal to binary accurately enough |
+ * to produce the hexadecimal values shown. |
+ */ |
+double exp(double x) { |
+ static const double |
+ one = 1.0, |
+ halF[2] = {0.5, -0.5}, |
+ o_threshold = 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */ |
+ u_threshold = -7.45133219101941108420e+02, /* 0xc0874910, 0xD52D3051 */ |
+ ln2HI[2] = {6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */ |
+ -6.93147180369123816490e-01}, /* 0xbfe62e42, 0xfee00000 */ |
+ ln2LO[2] = {1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */ |
+ -1.90821492927058770002e-10}, /* 0xbdea39ef, 0x35793c76 */ |
+ invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */ |
+ P1 = 1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */ |
+ P2 = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */ |
+ P3 = 6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */ |
+ P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */ |
+ P5 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ |
+ |
+ static volatile double |
+ huge = 1.0e+300, |
+ twom1000 = 9.33263618503218878990e-302, /* 2**-1000=0x01700000,0*/ |
+ two1023 = 8.988465674311579539e307; /* 0x1p1023 */ |
+ |
+ double y, hi = 0.0, lo = 0.0, c, t, twopk; |
+ int32_t k = 0, xsb; |
+ u_int32_t hx; |
+ |
+ GET_HIGH_WORD(hx, x); |
+ xsb = (hx >> 31) & 1; /* sign bit of x */ |
+ hx &= 0x7fffffff; /* high word of |x| */ |
+ |
+ /* filter out non-finite argument */ |
+ if (hx >= 0x40862E42) { /* if |x|>=709.78... */ |
+ if (hx >= 0x7ff00000) { |
+ u_int32_t lx; |
+ GET_LOW_WORD(lx, x); |
+ if (((hx & 0xfffff) | lx) != 0) |
+ return x + x; /* NaN */ |
+ else |
+ return (xsb == 0) ? x : 0.0; /* exp(+-inf)={inf,0} */ |
+ } |
+ if (x > o_threshold) return huge * huge; /* overflow */ |
+ if (x < u_threshold) return twom1000 * twom1000; /* underflow */ |
+ } |
+ |
+ /* argument reduction */ |
+ if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */ |
+ if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */ |
+ hi = x - ln2HI[xsb]; |
+ lo = ln2LO[xsb]; |
+ k = 1 - xsb - xsb; |
+ } else { |
+ k = static_cast<int>(invln2 * x + halF[xsb]); |
+ t = k; |
+ hi = x - t * ln2HI[0]; /* t*ln2HI is exact here */ |
+ lo = t * ln2LO[0]; |
+ } |
+ STRICT_ASSIGN(double, x, hi - lo); |
+ } else if (hx < 0x3e300000) { /* when |x|<2**-28 */ |
+ if (huge + x > one) return one + x; /* trigger inexact */ |
+ } else { |
+ k = 0; |
+ } |
+ |
+ /* x is now in primary range */ |
+ t = x * x; |
+ if (k >= -1021) { |
+ INSERT_WORDS(twopk, 0x3ff00000 + (k << 20), 0); |
+ } else { |
+ INSERT_WORDS(twopk, 0x3ff00000 + ((k + 1000) << 20), 0); |
+ } |
+ c = x - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5)))); |
+ if (k == 0) { |
+ return one - ((x * c) / (c - 2.0) - x); |
+ } else { |
+ y = one - ((lo - (x * c) / (2.0 - c)) - hi); |
+ } |
+ if (k >= -1021) { |
+ if (k == 1024) return y * 2.0 * two1023; |
+ return y * twopk; |
+ } else { |
+ return y * twopk * twom1000; |
+ } |
+} |
+ |
/* log(x) |
* Return the logrithm of x |
* |