OLD | NEW |
1 /* origin: FreeBSD /usr/src/lib/msun/ld80/e_rem_pio2.c */ | 1 /* origin: FreeBSD /usr/src/lib/msun/ld80/e_rem_pio2.c */ |
2 /* | 2 /* |
3 * ==================================================== | 3 * ==================================================== |
4 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. | 4 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. |
5 * Copyright (c) 2008 Steven G. Kargl, David Schultz, Bruce D. Evans. | 5 * Copyright (c) 2008 Steven G. Kargl, David Schultz, Bruce D. Evans. |
6 * | 6 * |
7 * Developed at SunSoft, a Sun Microsystems, Inc. business. | 7 * Developed at SunSoft, a Sun Microsystems, Inc. business. |
8 * Permission to use, copy, modify, and distribute this | 8 * Permission to use, copy, modify, and distribute this |
9 * software is freely granted, provided that this notice | 9 * software is freely granted, provided that this notice |
10 * is preserved. | 10 * is preserved. |
11 * ==================================================== | 11 * ==================================================== |
12 * | 12 * |
13 * Optimized by Bruce D. Evans. | 13 * Optimized by Bruce D. Evans. |
14 */ | 14 */ |
15 #include "libm.h" | 15 #include "libm.h" |
16 #if (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 | 16 #if (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 |
17 /* ld80 and ld128 version of __rem_pio2(x,y) | 17 /* ld80 and ld128 version of __rem_pio2(x,y) |
18 * | 18 * |
19 * return the remainder of x rem pi/2 in y[0]+y[1] | 19 * return the remainder of x rem pi/2 in y[0]+y[1] |
20 * use __rem_pio2_large() for large x | 20 * use __rem_pio2_large() for large x |
21 */ | 21 */ |
22 | 22 |
23 static const long double toint = 1.5/LDBL_EPSILON; | 23 static const long double toint = 1.5 / LDBL_EPSILON; |
24 | 24 |
25 #if LDBL_MANT_DIG == 64 | 25 #if LDBL_MANT_DIG == 64 |
26 /* u ~< 0x1p25*pi/2 */ | 26 /* u ~< 0x1p25*pi/2 */ |
27 #define SMALL(u) (((u.i.se & 0x7fffU)<<16 | u.i.m>>48) < ((0x3fff + 25)<<16 | 0x
921f>>1 | 0x8000)) | 27 #define SMALL(u) \ |
| 28 (((u.i.se & 0x7fffU) << 16 | u.i.m >> 48) < \ |
| 29 ((0x3fff + 25) << 16 | 0x921f >> 1 | 0x8000)) |
28 #define QUOBITS(x) ((uint32_t)(int32_t)x & 0x7fffffff) | 30 #define QUOBITS(x) ((uint32_t)(int32_t)x & 0x7fffffff) |
29 #define ROUND1 22 | 31 #define ROUND1 22 |
30 #define ROUND2 61 | 32 #define ROUND2 61 |
31 #define NX 3 | 33 #define NX 3 |
32 #define NY 2 | 34 #define NY 2 |
33 /* | 35 /* |
34 * invpio2: 64 bits of 2/pi | 36 * invpio2: 64 bits of 2/pi |
35 * pio2_1: first 39 bits of pi/2 | 37 * pio2_1: first 39 bits of pi/2 |
36 * pio2_1t: pi/2 - pio2_1 | 38 * pio2_1t: pi/2 - pio2_1 |
37 * pio2_2: second 39 bits of pi/2 | 39 * pio2_2: second 39 bits of pi/2 |
38 * pio2_2t: pi/2 - (pio2_1+pio2_2) | 40 * pio2_2t: pi/2 - (pio2_1+pio2_2) |
39 * pio2_3: third 39 bits of pi/2 | 41 * pio2_3: third 39 bits of pi/2 |
40 * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3) | 42 * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3) |
41 */ | 43 */ |
42 static const double | 44 static const double pio2_1 = |
43 pio2_1 = 1.57079632679597125389e+00, /* 0x3FF921FB, 0x54444000 */ | 45 1.57079632679597125389e+00, /* 0x3FF921FB, 0x54444000 */ |
44 pio2_2 = -1.07463465549783099519e-12, /* -0x12e7b967674000.0p-92 */ | 46 pio2_2 = -1.07463465549783099519e-12, /* -0x12e7b967674000.0p-92 */ |
45 pio2_3 = 6.36831716351370313614e-25; /* 0x18a2e037074000.0p-133 */ | 47 pio2_3 = 6.36831716351370313614e-25; /* 0x18a2e037074000.0p-133 */ |
46 static const long double | 48 static const long double |
47 invpio2 = 6.36619772367581343076e-01L, /* 0xa2f9836e4e44152a.0p-64 */ | 49 invpio2 = 6.36619772367581343076e-01L, /* 0xa2f9836e4e44152a.0p-64 */ |
48 pio2_1t = -1.07463465549719416346e-12L, /* -0x973dcb3b399d747f.0p-103 */ | 50 pio2_1t = -1.07463465549719416346e-12L, /* -0x973dcb3b399d747f.0p-103 */ |
49 pio2_2t = 6.36831716351095013979e-25L, /* 0xc51701b839a25205.0p-144 */ | 51 pio2_2t = 6.36831716351095013979e-25L, /* 0xc51701b839a25205.0p-144 */ |
50 pio2_3t = -2.75299651904407171810e-37L; /* -0xbb5bf6c7ddd660ce.0p-185 */ | 52 pio2_3t = -2.75299651904407171810e-37L; /* -0xbb5bf6c7ddd660ce.0p-185 */ |
51 #elif LDBL_MANT_DIG == 113 | 53 #elif LDBL_MANT_DIG == 113 |
52 /* u ~< 0x1p45*pi/2 */ | 54 /* u ~< 0x1p45*pi/2 */ |
53 #define SMALL(u) (((u.i.se & 0x7fffU)<<16 | u.i.top) < ((0x3fff + 45)<<16 | 0x92
1f)) | 55 #define SMALL(u) \ |
| 56 (((u.i.se & 0x7fffU) << 16 | u.i.top) < ((0x3fff + 45) << 16 | 0x921f)) |
54 #define QUOBITS(x) ((uint32_t)(int64_t)x & 0x7fffffff) | 57 #define QUOBITS(x) ((uint32_t)(int64_t)x & 0x7fffffff) |
55 #define ROUND1 51 | 58 #define ROUND1 51 |
56 #define ROUND2 119 | 59 #define ROUND2 119 |
57 #define NX 5 | 60 #define NX 5 |
58 #define NY 3 | 61 #define NY 3 |
59 static const long double | 62 static const long double |
60 invpio2 = 6.3661977236758134307553505349005747e-01L,» /* 0x145f306dc9c882a53f
84eafa3ea6a.0p-113 */ | 63 invpio2 = |
61 pio2_1 = 1.5707963267948966192292994253909555e+00L,» /* 0x1921fb54442d184698
00000000000.0p-112 */ | 64 6.3661977236758134307553505349005747e-01L, /* 0x145f306dc9c882a53f84eaf
a3ea6a.0p-113 |
62 pio2_1t = 2.0222662487959507323996846200947577e-21L,» /* 0x13198a2e03707344a4
093822299f3.0p-181 */ | 65 */ |
63 pio2_2 = 2.0222662487959507323994779168837751e-21L,» /* 0x13198a2e03707344a4
00000000000.0p-181 */ | 66 pio2_1 = |
64 pio2_2t = 2.0670321098263988236496903051604844e-43L,» /* 0x127044533e63a0105d
f531d89cd91.0p-254 */ | 67 1.5707963267948966192292994253909555e+00L, /* 0x1921fb54442d18469800000
000000.0p-112 |
65 pio2_3 = 2.0670321098263988236499468110329591e-43L,» /* 0x127044533e63a0105e
00000000000.0p-254 */ | 68 */ |
66 pio2_3t = -2.5650587247459238361625433492959285e-65L;» /* -0x159c4ec64ddaeb5f78
671cbfb2210.0p-327 */ | 69 pio2_1t = |
| 70 2.0222662487959507323996846200947577e-21L, /* 0x13198a2e03707344a409382
2299f3.0p-181 |
| 71 */ |
| 72 pio2_2 = |
| 73 2.0222662487959507323994779168837751e-21L, /* 0x13198a2e03707344a400000
000000.0p-181 |
| 74 */ |
| 75 pio2_2t = |
| 76 2.0670321098263988236496903051604844e-43L, /* 0x127044533e63a0105df531d
89cd91.0p-254 |
| 77 */ |
| 78 pio2_3 = |
| 79 2.0670321098263988236499468110329591e-43L, /* 0x127044533e63a0105e00000
000000.0p-254 |
| 80 */ |
| 81 pio2_3t = |
| 82 -2.5650587247459238361625433492959285e-65L; /* -0x159c4ec64ddaeb5f78671c
bfb2210.0p-327 |
| 83 */ |
67 #endif | 84 #endif |
68 | 85 |
69 int __rem_pio2l(long double x, long double *y) | 86 int __rem_pio2l(long double x, long double* y) { |
70 { | 87 union ldshape u, uz; |
71 » union ldshape u,uz; | 88 long double z, w, t, r, fn; |
72 » long double z,w,t,r,fn; | 89 double tx[NX], ty[NY]; |
73 » double tx[NX],ty[NY]; | 90 int ex, ey, n, i; |
74 » int ex,ey,n,i; | |
75 | 91 |
76 » u.f = x; | 92 u.f = x; |
77 » ex = u.i.se & 0x7fff; | 93 ex = u.i.se & 0x7fff; |
78 » if (SMALL(u)) { | 94 if (SMALL(u)) { |
79 » » /* rint(x/(pi/2)), Assume round-to-nearest. */ | 95 /* rint(x/(pi/2)), Assume round-to-nearest. */ |
80 » » fn = x*invpio2 + toint - toint; | 96 fn = x * invpio2 + toint - toint; |
81 » » n = QUOBITS(fn); | 97 n = QUOBITS(fn); |
82 » » r = x-fn*pio2_1; | 98 r = x - fn * pio2_1; |
83 » » w = fn*pio2_1t; /* 1st round good to 102/180 bits (ld80/ld128)
*/ | 99 w = fn * pio2_1t; /* 1st round good to 102/180 bits (ld80/ld128) */ |
84 » » y[0] = r-w; | 100 y[0] = r - w; |
85 » » u.f = y[0]; | 101 u.f = y[0]; |
86 » » ey = u.i.se & 0x7fff; | 102 ey = u.i.se & 0x7fff; |
87 » » if (ex - ey > ROUND1) { /* 2nd iteration needed, good to 141/24
8 (ld80/ld128) */ | 103 if (ex - ey > |
88 » » » t = r; | 104 ROUND1) { /* 2nd iteration needed, good to 141/248 (ld80/ld128) */ |
89 » » » w = fn*pio2_2; | 105 t = r; |
90 » » » r = t-w; | 106 w = fn * pio2_2; |
91 » » » w = fn*pio2_2t-((t-r)-w); | 107 r = t - w; |
92 » » » y[0] = r-w; | 108 w = fn * pio2_2t - ((t - r) - w); |
93 » » » u.f = y[0]; | 109 y[0] = r - w; |
94 » » » ey = u.i.se & 0x7fff; | 110 u.f = y[0]; |
95 » » » if (ex - ey > ROUND2) { /* 3rd iteration, good to 180/3
16 bits */ | 111 ey = u.i.se & 0x7fff; |
96 » » » » t = r; /* will cover all possible cases (not ver
ified for ld128) */ | 112 if (ex - ey > ROUND2) { /* 3rd iteration, good to 180/316 bits */ |
97 » » » » w = fn*pio2_3; | 113 t = r; /* will cover all possible cases (not verified for ld128) */ |
98 » » » » r = t-w; | 114 w = fn * pio2_3; |
99 » » » » w = fn*pio2_3t-((t-r)-w); | 115 r = t - w; |
100 » » » » y[0] = r-w; | 116 w = fn * pio2_3t - ((t - r) - w); |
101 » » » } | 117 y[0] = r - w; |
102 » » } | 118 } |
103 » » y[1] = (r - y[0]) - w; | 119 } |
104 » » return n; | 120 y[1] = (r - y[0]) - w; |
105 » } | 121 return n; |
106 » /* | 122 } |
107 » * all other (large) arguments | 123 /* |
108 » */ | 124 * all other (large) arguments |
109 » if (ex == 0x7fff) { /* x is inf or NaN */ | 125 */ |
110 » » y[0] = y[1] = x - x; | 126 if (ex == 0x7fff) { /* x is inf or NaN */ |
111 » » return 0; | 127 y[0] = y[1] = x - x; |
112 » } | 128 return 0; |
113 » /* set z = scalbn(|x|,-ilogb(x)+23) */ | 129 } |
114 » uz.f = x; | 130 /* set z = scalbn(|x|,-ilogb(x)+23) */ |
115 » uz.i.se = 0x3fff + 23; | 131 uz.f = x; |
116 » z = uz.f; | 132 uz.i.se = 0x3fff + 23; |
117 » for (i=0; i < NX - 1; i++) { | 133 z = uz.f; |
118 » » tx[i] = (double)(int32_t)z; | 134 for (i = 0; i < NX - 1; i++) { |
119 » » z = (z-tx[i])*0x1p24; | 135 tx[i] = (double)(int32_t)z; |
120 » } | 136 z = (z - tx[i]) * 0x1p24; |
121 » tx[i] = z; | 137 } |
122 » while (tx[i] == 0) | 138 tx[i] = z; |
123 » » i--; | 139 while (tx[i] == 0) |
124 » n = __rem_pio2_large(tx, ty, ex-0x3fff-23, i+1, NY); | 140 i--; |
125 » w = ty[1]; | 141 n = __rem_pio2_large(tx, ty, ex - 0x3fff - 23, i + 1, NY); |
126 » if (NY == 3) | 142 w = ty[1]; |
127 » » w += ty[2]; | 143 if (NY == 3) |
128 » r = ty[0] + w; | 144 w += ty[2]; |
129 » /* TODO: for ld128 this does not follow the recommendation of the | 145 r = ty[0] + w; |
130 » comments of __rem_pio2_large which seem wrong if |ty[0]| > |ty[1]+ty[2]|
*/ | 146 /* TODO: for ld128 this does not follow the recommendation of the |
131 » w -= r - ty[0]; | 147 comments of __rem_pio2_large which seem wrong if |ty[0]| > |ty[1]+ty[2]| */ |
132 » if (u.i.se >> 15) { | 148 w -= r - ty[0]; |
133 » » y[0] = -r; | 149 if (u.i.se >> 15) { |
134 » » y[1] = -w; | 150 y[0] = -r; |
135 » » return -n; | 151 y[1] = -w; |
136 » } | 152 return -n; |
137 » y[0] = r; | 153 } |
138 » y[1] = w; | 154 y[0] = r; |
139 » return n; | 155 y[1] = w; |
| 156 return n; |
140 } | 157 } |
141 #endif | 158 #endif |
OLD | NEW |