OLD | NEW |
1 /* origin: FreeBSD /usr/src/lib/msun/src/s_cbrtl.c */ | 1 /* origin: FreeBSD /usr/src/lib/msun/src/s_cbrtl.c */ |
2 /*- | 2 /*- |
3 * ==================================================== | 3 * ==================================================== |
4 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. | 4 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. |
5 * Copyright (c) 2009-2011, Bruce D. Evans, Steven G. Kargl, David Schultz. | 5 * Copyright (c) 2009-2011, Bruce D. Evans, Steven G. Kargl, David Schultz. |
6 * | 6 * |
7 * Developed at SunPro, a Sun Microsystems, Inc. business. | 7 * Developed at SunPro, a Sun Microsystems, Inc. business. |
8 * Permission to use, copy, modify, and distribute this | 8 * Permission to use, copy, modify, and distribute this |
9 * software is freely granted, provided that this notice | 9 * software is freely granted, provided that this notice |
10 * is preserved. | 10 * is preserved. |
11 * ==================================================== | 11 * ==================================================== |
12 * | 12 * |
13 * The argument reduction and testing for exceptional cases was | 13 * The argument reduction and testing for exceptional cases was |
14 * written by Steven G. Kargl with input from Bruce D. Evans | 14 * written by Steven G. Kargl with input from Bruce D. Evans |
15 * and David A. Schultz. | 15 * and David A. Schultz. |
16 */ | 16 */ |
17 | 17 |
18 #include "libm.h" | 18 #include "libm.h" |
19 | 19 |
20 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 | 20 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 |
21 long double cbrtl(long double x) | 21 long double cbrtl(long double x) { |
22 { | 22 return cbrt(x); |
23 » return cbrt(x); | |
24 } | 23 } |
25 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 | 24 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 |
26 static const unsigned B1 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23
*/ | 25 static const unsigned B1 = |
| 26 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */ |
27 | 27 |
28 long double cbrtl(long double x) | 28 long double cbrtl(long double x) { |
29 { | 29 union ldshape u = {x}, v; |
30 » union ldshape u = {x}, v; | 30 union { |
31 » union {float f; uint32_t i;} uft; | 31 float f; |
32 » long double r, s, t, w; | 32 uint32_t i; |
33 » double_t dr, dt, dx; | 33 } uft; |
34 » float_t ft; | 34 long double r, s, t, w; |
35 » int e = u.i.se & 0x7fff; | 35 double_t dr, dt, dx; |
36 » int sign = u.i.se & 0x8000; | 36 float_t ft; |
| 37 int e = u.i.se & 0x7fff; |
| 38 int sign = u.i.se & 0x8000; |
37 | 39 |
38 » /* | 40 /* |
39 » * If x = +-Inf, then cbrt(x) = +-Inf. | 41 * If x = +-Inf, then cbrt(x) = +-Inf. |
40 » * If x = NaN, then cbrt(x) = NaN. | 42 * If x = NaN, then cbrt(x) = NaN. |
41 » */ | 43 */ |
42 » if (e == 0x7fff) | 44 if (e == 0x7fff) |
43 » » return x + x; | 45 return x + x; |
44 » if (e == 0) { | 46 if (e == 0) { |
45 » » /* Adjust subnormal numbers. */ | 47 /* Adjust subnormal numbers. */ |
46 » » u.f *= 0x1p120; | 48 u.f *= 0x1p120; |
47 » » e = u.i.se & 0x7fff; | 49 e = u.i.se & 0x7fff; |
48 » » /* If x = +-0, then cbrt(x) = +-0. */ | 50 /* If x = +-0, then cbrt(x) = +-0. */ |
49 » » if (e == 0) | 51 if (e == 0) |
50 » » » return x; | 52 return x; |
51 » » e -= 120; | 53 e -= 120; |
52 » } | 54 } |
53 » e -= 0x3fff; | 55 e -= 0x3fff; |
54 » u.i.se = 0x3fff; | 56 u.i.se = 0x3fff; |
55 » x = u.f; | 57 x = u.f; |
56 » switch (e % 3) { | 58 switch (e % 3) { |
57 » case 1: | 59 case 1: |
58 » case -2: | 60 case -2: |
59 » » x *= 2; | 61 x *= 2; |
60 » » e--; | 62 e--; |
61 » » break; | 63 break; |
62 » case 2: | 64 case 2: |
63 » case -1: | 65 case -1: |
64 » » x *= 4; | 66 x *= 4; |
65 » » e -= 2; | 67 e -= 2; |
66 » » break; | 68 break; |
67 » } | 69 } |
68 » v.f = 1.0; | 70 v.f = 1.0; |
69 » v.i.se = sign | (0x3fff + e/3); | 71 v.i.se = sign | (0x3fff + e / 3); |
70 | 72 |
71 » /* | 73 /* |
72 » * The following is the guts of s_cbrtf, with the handling of | 74 * The following is the guts of s_cbrtf, with the handling of |
73 » * special values removed and extra care for accuracy not taken, | 75 * special values removed and extra care for accuracy not taken, |
74 » * but with most of the extra accuracy not discarded. | 76 * but with most of the extra accuracy not discarded. |
75 » */ | 77 */ |
76 | 78 |
77 » /* ~5-bit estimate: */ | 79 /* ~5-bit estimate: */ |
78 » uft.f = x; | 80 uft.f = x; |
79 » uft.i = (uft.i & 0x7fffffff)/3 + B1; | 81 uft.i = (uft.i & 0x7fffffff) / 3 + B1; |
80 » ft = uft.f; | 82 ft = uft.f; |
81 | 83 |
82 » /* ~16-bit estimate: */ | 84 /* ~16-bit estimate: */ |
83 » dx = x; | 85 dx = x; |
84 » dt = ft; | 86 dt = ft; |
85 » dr = dt * dt * dt; | 87 dr = dt * dt * dt; |
86 » dt = dt * (dx + dx + dr) / (dx + dr + dr); | 88 dt = dt * (dx + dx + dr) / (dx + dr + dr); |
87 | 89 |
88 » /* ~47-bit estimate: */ | 90 /* ~47-bit estimate: */ |
89 » dr = dt * dt * dt; | 91 dr = dt * dt * dt; |
90 » dt = dt * (dx + dx + dr) / (dx + dr + dr); | 92 dt = dt * (dx + dx + dr) / (dx + dr + dr); |
91 | 93 |
92 #if LDBL_MANT_DIG == 64 | 94 #if LDBL_MANT_DIG == 64 |
93 » /* | 95 /* |
94 » * dt is cbrtl(x) to ~47 bits (after x has been reduced to 1 <= x < 8). | 96 * dt is cbrtl(x) to ~47 bits (after x has been reduced to 1 <= x < 8). |
95 » * Round it away from zero to 32 bits (32 so that t*t is exact, and | 97 * Round it away from zero to 32 bits (32 so that t*t is exact, and |
96 » * away from zero for technical reasons). | 98 * away from zero for technical reasons). |
97 » */ | 99 */ |
98 » t = dt + (0x1.0p32L + 0x1.0p-31L) - 0x1.0p32; | 100 t = dt + (0x1.0p32L + 0x1.0p-31L) - 0x1.0p32; |
99 #elif LDBL_MANT_DIG == 113 | 101 #elif LDBL_MANT_DIG == 113 |
100 » /* | 102 /* |
101 » * Round dt away from zero to 47 bits. Since we don't trust the 47, | 103 * Round dt away from zero to 47 bits. Since we don't trust the 47, |
102 » * add 2 47-bit ulps instead of 1 to round up. Rounding is slow and | 104 * add 2 47-bit ulps instead of 1 to round up. Rounding is slow and |
103 » * might be avoidable in this case, since on most machines dt will | 105 * might be avoidable in this case, since on most machines dt will |
104 » * have been evaluated in 53-bit precision and the technical reasons | 106 * have been evaluated in 53-bit precision and the technical reasons |
105 » * for rounding up might not apply to either case in cbrtl() since | 107 * for rounding up might not apply to either case in cbrtl() since |
106 » * dt is much more accurate than needed. | 108 * dt is much more accurate than needed. |
107 » */ | 109 */ |
108 » t = dt + 0x2.0p-46 + 0x1.0p60L - 0x1.0p60; | 110 t = dt + 0x2.0p-46 + 0x1.0p60L - 0x1.0p60; |
109 #endif | 111 #endif |
110 | 112 |
111 » /* | 113 /* |
112 » * Final step Newton iteration to 64 or 113 bits with | 114 * Final step Newton iteration to 64 or 113 bits with |
113 » * error < 0.667 ulps | 115 * error < 0.667 ulps |
114 » */ | 116 */ |
115 » s = t*t; /* t*t is exact */ | 117 s = t * t; /* t*t is exact */ |
116 » r = x/s; /* error <= 0.5 ulps; |r| < |t| */ | 118 r = x / s; /* error <= 0.5 ulps; |r| < |t| */ |
117 » w = t+t; /* t+t is exact */ | 119 w = t + t; /* t+t is exact */ |
118 » r = (r-t)/(w+r); /* r-t is exact; w+r ~= 3*t */ | 120 r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */ |
119 » t = t+t*r; /* error <= 0.5 + 0.5/3 + epsilon */ | 121 t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */ |
120 | 122 |
121 » t *= v.f; | 123 t *= v.f; |
122 » return t; | 124 return t; |
123 } | 125 } |
124 #endif | 126 #endif |
OLD | NEW |