| OLD | NEW |
| 1 /* origin: FreeBSD /usr/src/lib/msun/src/s_cbrtl.c */ | 1 /* origin: FreeBSD /usr/src/lib/msun/src/s_cbrtl.c */ |
| 2 /*- | 2 /*- |
| 3 * ==================================================== | 3 * ==================================================== |
| 4 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. | 4 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. |
| 5 * Copyright (c) 2009-2011, Bruce D. Evans, Steven G. Kargl, David Schultz. | 5 * Copyright (c) 2009-2011, Bruce D. Evans, Steven G. Kargl, David Schultz. |
| 6 * | 6 * |
| 7 * Developed at SunPro, a Sun Microsystems, Inc. business. | 7 * Developed at SunPro, a Sun Microsystems, Inc. business. |
| 8 * Permission to use, copy, modify, and distribute this | 8 * Permission to use, copy, modify, and distribute this |
| 9 * software is freely granted, provided that this notice | 9 * software is freely granted, provided that this notice |
| 10 * is preserved. | 10 * is preserved. |
| 11 * ==================================================== | 11 * ==================================================== |
| 12 * | 12 * |
| 13 * The argument reduction and testing for exceptional cases was | 13 * The argument reduction and testing for exceptional cases was |
| 14 * written by Steven G. Kargl with input from Bruce D. Evans | 14 * written by Steven G. Kargl with input from Bruce D. Evans |
| 15 * and David A. Schultz. | 15 * and David A. Schultz. |
| 16 */ | 16 */ |
| 17 | 17 |
| 18 #include "libm.h" | 18 #include "libm.h" |
| 19 | 19 |
| 20 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 | 20 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 |
| 21 long double cbrtl(long double x) | 21 long double cbrtl(long double x) { |
| 22 { | 22 return cbrt(x); |
| 23 » return cbrt(x); | |
| 24 } | 23 } |
| 25 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 | 24 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 |
| 26 static const unsigned B1 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23
*/ | 25 static const unsigned B1 = |
| 26 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */ |
| 27 | 27 |
| 28 long double cbrtl(long double x) | 28 long double cbrtl(long double x) { |
| 29 { | 29 union ldshape u = {x}, v; |
| 30 » union ldshape u = {x}, v; | 30 union { |
| 31 » union {float f; uint32_t i;} uft; | 31 float f; |
| 32 » long double r, s, t, w; | 32 uint32_t i; |
| 33 » double_t dr, dt, dx; | 33 } uft; |
| 34 » float_t ft; | 34 long double r, s, t, w; |
| 35 » int e = u.i.se & 0x7fff; | 35 double_t dr, dt, dx; |
| 36 » int sign = u.i.se & 0x8000; | 36 float_t ft; |
| 37 int e = u.i.se & 0x7fff; |
| 38 int sign = u.i.se & 0x8000; |
| 37 | 39 |
| 38 » /* | 40 /* |
| 39 » * If x = +-Inf, then cbrt(x) = +-Inf. | 41 * If x = +-Inf, then cbrt(x) = +-Inf. |
| 40 » * If x = NaN, then cbrt(x) = NaN. | 42 * If x = NaN, then cbrt(x) = NaN. |
| 41 » */ | 43 */ |
| 42 » if (e == 0x7fff) | 44 if (e == 0x7fff) |
| 43 » » return x + x; | 45 return x + x; |
| 44 » if (e == 0) { | 46 if (e == 0) { |
| 45 » » /* Adjust subnormal numbers. */ | 47 /* Adjust subnormal numbers. */ |
| 46 » » u.f *= 0x1p120; | 48 u.f *= 0x1p120; |
| 47 » » e = u.i.se & 0x7fff; | 49 e = u.i.se & 0x7fff; |
| 48 » » /* If x = +-0, then cbrt(x) = +-0. */ | 50 /* If x = +-0, then cbrt(x) = +-0. */ |
| 49 » » if (e == 0) | 51 if (e == 0) |
| 50 » » » return x; | 52 return x; |
| 51 » » e -= 120; | 53 e -= 120; |
| 52 » } | 54 } |
| 53 » e -= 0x3fff; | 55 e -= 0x3fff; |
| 54 » u.i.se = 0x3fff; | 56 u.i.se = 0x3fff; |
| 55 » x = u.f; | 57 x = u.f; |
| 56 » switch (e % 3) { | 58 switch (e % 3) { |
| 57 » case 1: | 59 case 1: |
| 58 » case -2: | 60 case -2: |
| 59 » » x *= 2; | 61 x *= 2; |
| 60 » » e--; | 62 e--; |
| 61 » » break; | 63 break; |
| 62 » case 2: | 64 case 2: |
| 63 » case -1: | 65 case -1: |
| 64 » » x *= 4; | 66 x *= 4; |
| 65 » » e -= 2; | 67 e -= 2; |
| 66 » » break; | 68 break; |
| 67 » } | 69 } |
| 68 » v.f = 1.0; | 70 v.f = 1.0; |
| 69 » v.i.se = sign | (0x3fff + e/3); | 71 v.i.se = sign | (0x3fff + e / 3); |
| 70 | 72 |
| 71 » /* | 73 /* |
| 72 » * The following is the guts of s_cbrtf, with the handling of | 74 * The following is the guts of s_cbrtf, with the handling of |
| 73 » * special values removed and extra care for accuracy not taken, | 75 * special values removed and extra care for accuracy not taken, |
| 74 » * but with most of the extra accuracy not discarded. | 76 * but with most of the extra accuracy not discarded. |
| 75 » */ | 77 */ |
| 76 | 78 |
| 77 » /* ~5-bit estimate: */ | 79 /* ~5-bit estimate: */ |
| 78 » uft.f = x; | 80 uft.f = x; |
| 79 » uft.i = (uft.i & 0x7fffffff)/3 + B1; | 81 uft.i = (uft.i & 0x7fffffff) / 3 + B1; |
| 80 » ft = uft.f; | 82 ft = uft.f; |
| 81 | 83 |
| 82 » /* ~16-bit estimate: */ | 84 /* ~16-bit estimate: */ |
| 83 » dx = x; | 85 dx = x; |
| 84 » dt = ft; | 86 dt = ft; |
| 85 » dr = dt * dt * dt; | 87 dr = dt * dt * dt; |
| 86 » dt = dt * (dx + dx + dr) / (dx + dr + dr); | 88 dt = dt * (dx + dx + dr) / (dx + dr + dr); |
| 87 | 89 |
| 88 » /* ~47-bit estimate: */ | 90 /* ~47-bit estimate: */ |
| 89 » dr = dt * dt * dt; | 91 dr = dt * dt * dt; |
| 90 » dt = dt * (dx + dx + dr) / (dx + dr + dr); | 92 dt = dt * (dx + dx + dr) / (dx + dr + dr); |
| 91 | 93 |
| 92 #if LDBL_MANT_DIG == 64 | 94 #if LDBL_MANT_DIG == 64 |
| 93 » /* | 95 /* |
| 94 » * dt is cbrtl(x) to ~47 bits (after x has been reduced to 1 <= x < 8). | 96 * dt is cbrtl(x) to ~47 bits (after x has been reduced to 1 <= x < 8). |
| 95 » * Round it away from zero to 32 bits (32 so that t*t is exact, and | 97 * Round it away from zero to 32 bits (32 so that t*t is exact, and |
| 96 » * away from zero for technical reasons). | 98 * away from zero for technical reasons). |
| 97 » */ | 99 */ |
| 98 » t = dt + (0x1.0p32L + 0x1.0p-31L) - 0x1.0p32; | 100 t = dt + (0x1.0p32L + 0x1.0p-31L) - 0x1.0p32; |
| 99 #elif LDBL_MANT_DIG == 113 | 101 #elif LDBL_MANT_DIG == 113 |
| 100 » /* | 102 /* |
| 101 » * Round dt away from zero to 47 bits. Since we don't trust the 47, | 103 * Round dt away from zero to 47 bits. Since we don't trust the 47, |
| 102 » * add 2 47-bit ulps instead of 1 to round up. Rounding is slow and | 104 * add 2 47-bit ulps instead of 1 to round up. Rounding is slow and |
| 103 » * might be avoidable in this case, since on most machines dt will | 105 * might be avoidable in this case, since on most machines dt will |
| 104 » * have been evaluated in 53-bit precision and the technical reasons | 106 * have been evaluated in 53-bit precision and the technical reasons |
| 105 » * for rounding up might not apply to either case in cbrtl() since | 107 * for rounding up might not apply to either case in cbrtl() since |
| 106 » * dt is much more accurate than needed. | 108 * dt is much more accurate than needed. |
| 107 » */ | 109 */ |
| 108 » t = dt + 0x2.0p-46 + 0x1.0p60L - 0x1.0p60; | 110 t = dt + 0x2.0p-46 + 0x1.0p60L - 0x1.0p60; |
| 109 #endif | 111 #endif |
| 110 | 112 |
| 111 » /* | 113 /* |
| 112 » * Final step Newton iteration to 64 or 113 bits with | 114 * Final step Newton iteration to 64 or 113 bits with |
| 113 » * error < 0.667 ulps | 115 * error < 0.667 ulps |
| 114 » */ | 116 */ |
| 115 » s = t*t; /* t*t is exact */ | 117 s = t * t; /* t*t is exact */ |
| 116 » r = x/s; /* error <= 0.5 ulps; |r| < |t| */ | 118 r = x / s; /* error <= 0.5 ulps; |r| < |t| */ |
| 117 » w = t+t; /* t+t is exact */ | 119 w = t + t; /* t+t is exact */ |
| 118 » r = (r-t)/(w+r); /* r-t is exact; w+r ~= 3*t */ | 120 r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */ |
| 119 » t = t+t*r; /* error <= 0.5 + 0.5/3 + epsilon */ | 121 t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */ |
| 120 | 122 |
| 121 » t *= v.f; | 123 t *= v.f; |
| 122 » return t; | 124 return t; |
| 123 } | 125 } |
| 124 #endif | 126 #endif |
| OLD | NEW |