Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(938)

Side by Side Diff: fusl/src/math/fma.c

Issue 1714623002: [fusl] clang-format fusl (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: headers too Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 #include <fenv.h> 1 #include <fenv.h>
2 #include "libm.h" 2 #include "libm.h"
3 3
4 #if LDBL_MANT_DIG==64 && LDBL_MAX_EXP==16384 4 #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
5 /* exact add, assumes exponent_x >= exponent_y */ 5 /* exact add, assumes exponent_x >= exponent_y */
6 static void add(long double *hi, long double *lo, long double x, long double y) 6 static void add(long double* hi,
7 { 7 long double* lo,
8 » long double r; 8 long double x,
9 long double y) {
10 long double r;
9 11
10 » r = x + y; 12 r = x + y;
11 » *hi = r; 13 *hi = r;
12 » r -= x; 14 r -= x;
13 » *lo = y - r; 15 *lo = y - r;
14 } 16 }
15 17
16 /* exact mul, assumes no over/underflow */ 18 /* exact mul, assumes no over/underflow */
17 static void mul(long double *hi, long double *lo, long double x, long double y) 19 static void mul(long double* hi,
18 { 20 long double* lo,
19 » static const long double c = 1.0 + 0x1p32L; 21 long double x,
20 » long double cx, xh, xl, cy, yh, yl; 22 long double y) {
23 static const long double c = 1.0 + 0x1p32L;
24 long double cx, xh, xl, cy, yh, yl;
21 25
22 » cx = c*x; 26 cx = c * x;
23 » xh = (x - cx) + cx; 27 xh = (x - cx) + cx;
24 » xl = x - xh; 28 xl = x - xh;
25 » cy = c*y; 29 cy = c * y;
26 » yh = (y - cy) + cy; 30 yh = (y - cy) + cy;
27 » yl = y - yh; 31 yl = y - yh;
28 » *hi = x*y; 32 *hi = x * y;
29 » *lo = (xh*yh - *hi) + xh*yl + xl*yh + xl*yl; 33 *lo = (xh * yh - *hi) + xh * yl + xl * yh + xl * yl;
30 } 34 }
31 35
32 /* 36 /*
33 assume (long double)(hi+lo) == hi 37 assume (long double)(hi+lo) == hi
34 return an adjusted hi so that rounding it to double (or less) precision is corre ct 38 return an adjusted hi so that rounding it to double (or less) precision is
39 correct
35 */ 40 */
36 static long double adjust(long double hi, long double lo) 41 static long double adjust(long double hi, long double lo) {
37 { 42 union ldshape uhi, ulo;
38 » union ldshape uhi, ulo;
39 43
40 » if (lo == 0) 44 if (lo == 0)
41 » » return hi; 45 return hi;
42 » uhi.f = hi; 46 uhi.f = hi;
43 » if (uhi.i.m & 0x3ff) 47 if (uhi.i.m & 0x3ff)
44 » » return hi; 48 return hi;
45 » ulo.f = lo; 49 ulo.f = lo;
46 » if ((uhi.i.se & 0x8000) == (ulo.i.se & 0x8000)) 50 if ((uhi.i.se & 0x8000) == (ulo.i.se & 0x8000))
47 » » uhi.i.m++; 51 uhi.i.m++;
48 » else { 52 else {
49 » » /* handle underflow and take care of ld80 implicit msb */ 53 /* handle underflow and take care of ld80 implicit msb */
50 » » if (uhi.i.m << 1 == 0) { 54 if (uhi.i.m << 1 == 0) {
51 » » » uhi.i.m = 0; 55 uhi.i.m = 0;
52 » » » uhi.i.se--; 56 uhi.i.se--;
53 » » } 57 }
54 » » uhi.i.m--; 58 uhi.i.m--;
55 » } 59 }
56 » return uhi.f; 60 return uhi.f;
57 } 61 }
58 62
59 /* adjusted add so the result is correct when rounded to double (or less) precis ion */ 63 /* adjusted add so the result is correct when rounded to double (or less)
60 static long double dadd(long double x, long double y) 64 * precision */
61 { 65 static long double dadd(long double x, long double y) {
62 » add(&x, &y, x, y); 66 add(&x, &y, x, y);
63 » return adjust(x, y); 67 return adjust(x, y);
64 } 68 }
65 69
66 /* adjusted mul so the result is correct when rounded to double (or less) precis ion */ 70 /* adjusted mul so the result is correct when rounded to double (or less)
67 static long double dmul(long double x, long double y) 71 * precision */
68 { 72 static long double dmul(long double x, long double y) {
69 » mul(&x, &y, x, y); 73 mul(&x, &y, x, y);
70 » return adjust(x, y); 74 return adjust(x, y);
71 } 75 }
72 76
73 static int getexp(long double x) 77 static int getexp(long double x) {
74 { 78 union ldshape u;
75 » union ldshape u; 79 u.f = x;
76 » u.f = x; 80 return u.i.se & 0x7fff;
77 » return u.i.se & 0x7fff;
78 } 81 }
79 82
80 double fma(double x, double y, double z) 83 double fma(double x, double y, double z) {
81 { 84 PRAGMA_STDC_FENV_ACCESS_ON
82 » PRAGMA_STDC_FENV_ACCESS_ON 85 long double hi, lo1, lo2, xy;
83 » long double hi, lo1, lo2, xy; 86 int round, ez, exy;
84 » int round, ez, exy;
85 87
86 » /* handle +-inf,nan */ 88 /* handle +-inf,nan */
87 » if (!isfinite(x) || !isfinite(y)) 89 if (!isfinite(x) || !isfinite(y))
88 » » return x*y + z; 90 return x * y + z;
89 » if (!isfinite(z)) 91 if (!isfinite(z))
90 » » return z; 92 return z;
91 » /* handle +-0 */ 93 /* handle +-0 */
92 » if (x == 0.0 || y == 0.0) 94 if (x == 0.0 || y == 0.0)
93 » » return x*y + z; 95 return x * y + z;
94 » round = fegetround(); 96 round = fegetround();
95 » if (z == 0.0) { 97 if (z == 0.0) {
96 » » if (round == FE_TONEAREST) 98 if (round == FE_TONEAREST)
97 » » » return dmul(x, y); 99 return dmul(x, y);
98 » » return x*y; 100 return x * y;
99 » } 101 }
100 102
101 » /* exact mul and add require nearest rounding */ 103 /* exact mul and add require nearest rounding */
102 » /* spurious inexact exceptions may be raised */ 104 /* spurious inexact exceptions may be raised */
103 » fesetround(FE_TONEAREST); 105 fesetround(FE_TONEAREST);
104 » mul(&xy, &lo1, x, y); 106 mul(&xy, &lo1, x, y);
105 » exy = getexp(xy); 107 exy = getexp(xy);
106 » ez = getexp(z); 108 ez = getexp(z);
107 » if (ez > exy) { 109 if (ez > exy) {
108 » » add(&hi, &lo2, z, xy); 110 add(&hi, &lo2, z, xy);
109 » } else if (ez > exy - 12) { 111 } else if (ez > exy - 12) {
110 » » add(&hi, &lo2, xy, z); 112 add(&hi, &lo2, xy, z);
111 » » if (hi == 0) { 113 if (hi == 0) {
112 » » » /* 114 /*
113 » » » xy + z is 0, but it should be calculated with the 115 xy + z is 0, but it should be calculated with the
114 » » » original rounding mode so the sign is correct, if the 116 original rounding mode so the sign is correct, if the
115 » » » compiler does not support FENV_ACCESS ON it does not 117 compiler does not support FENV_ACCESS ON it does not
116 » » » know about the changed rounding mode and eliminates 118 know about the changed rounding mode and eliminates
117 » » » the xy + z below without the volatile memory access 119 the xy + z below without the volatile memory access
118 » » » */ 120 */
119 » » » volatile double z_; 121 volatile double z_;
120 » » » fesetround(round); 122 fesetround(round);
121 » » » z_ = z; 123 z_ = z;
122 » » » return (xy + z_) + lo1; 124 return (xy + z_) + lo1;
123 » » } 125 }
124 » } else { 126 } else {
125 » » /* 127 /*
126 » » ez <= exy - 12 128 ez <= exy - 12
127 » » the 12 extra bits (1guard, 11round+sticky) are needed so with 129 the 12 extra bits (1guard, 11round+sticky) are needed so with
128 » » » lo = dadd(lo1, lo2) 130 lo = dadd(lo1, lo2)
129 » » elo <= ehi - 11, and we use the last 10 bits in adjust so 131 elo <= ehi - 11, and we use the last 10 bits in adjust so
130 » » » dadd(hi, lo) 132 dadd(hi, lo)
131 » » gives correct result when rounded to double 133 gives correct result when rounded to double
132 » » */ 134 */
133 » » hi = xy; 135 hi = xy;
134 » » lo2 = z; 136 lo2 = z;
135 » } 137 }
136 » /* 138 /*
137 » the result is stored before return for correct precision and exceptions 139 the result is stored before return for correct precision and exceptions
138 140
139 » one corner case is when the underflow flag should be raised because 141 one corner case is when the underflow flag should be raised because
140 » the precise result is an inexact subnormal double, but the calculated 142 the precise result is an inexact subnormal double, but the calculated
141 » long double result is an exact subnormal double 143 long double result is an exact subnormal double
142 » (so rounding to double does not raise exceptions) 144 (so rounding to double does not raise exceptions)
143 145
144 » in nearest rounding mode dadd takes care of this: the last bit of the 146 in nearest rounding mode dadd takes care of this: the last bit of the
145 » result is adjusted so rounding sees an inexact value when it should 147 result is adjusted so rounding sees an inexact value when it should
146 148
147 » in non-nearest rounding mode fenv is used for the workaround 149 in non-nearest rounding mode fenv is used for the workaround
148 » */ 150 */
149 » fesetround(round); 151 fesetround(round);
150 » if (round == FE_TONEAREST) 152 if (round == FE_TONEAREST)
151 » » z = dadd(hi, dadd(lo1, lo2)); 153 z = dadd(hi, dadd(lo1, lo2));
152 » else { 154 else {
153 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW) 155 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
154 » » int e = fetestexcept(FE_INEXACT); 156 int e = fetestexcept(FE_INEXACT);
155 » » feclearexcept(FE_INEXACT); 157 feclearexcept(FE_INEXACT);
156 #endif 158 #endif
157 » » z = hi + (lo1 + lo2); 159 z = hi + (lo1 + lo2);
158 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW) 160 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
159 » » if (getexp(z) < 0x3fff-1022 && fetestexcept(FE_INEXACT)) 161 if (getexp(z) < 0x3fff - 1022 && fetestexcept(FE_INEXACT))
160 » » » feraiseexcept(FE_UNDERFLOW); 162 feraiseexcept(FE_UNDERFLOW);
161 » » else if (e) 163 else if (e)
162 » » » feraiseexcept(FE_INEXACT); 164 feraiseexcept(FE_INEXACT);
163 #endif 165 #endif
164 » } 166 }
165 » return z; 167 return z;
166 } 168 }
167 #else 169 #else
168 /* origin: FreeBSD /usr/src/lib/msun/src/s_fma.c */ 170 /* origin: FreeBSD /usr/src/lib/msun/src/s_fma.c */
169 /*- 171 /*-
170 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> 172 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
171 * All rights reserved. 173 * All rights reserved.
172 * 174 *
173 * Redistribution and use in source and binary forms, with or without 175 * Redistribution and use in source and binary forms, with or without
174 * modification, are permitted provided that the following conditions 176 * modification, are permitted provided that the following conditions
175 * are met: 177 * are met:
(...skipping 15 matching lines...) Expand all
191 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 193 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
192 * SUCH DAMAGE. 194 * SUCH DAMAGE.
193 */ 195 */
194 196
195 /* 197 /*
196 * A struct dd represents a floating-point number with twice the precision 198 * A struct dd represents a floating-point number with twice the precision
197 * of a double. We maintain the invariant that "hi" stores the 53 high-order 199 * of a double. We maintain the invariant that "hi" stores the 53 high-order
198 * bits of the result. 200 * bits of the result.
199 */ 201 */
200 struct dd { 202 struct dd {
201 » double hi; 203 double hi;
202 » double lo; 204 double lo;
203 }; 205 };
204 206
205 /* 207 /*
206 * Compute a+b exactly, returning the exact result in a struct dd. We assume 208 * Compute a+b exactly, returning the exact result in a struct dd. We assume
207 * that both a and b are finite, but make no assumptions about their relative 209 * that both a and b are finite, but make no assumptions about their relative
208 * magnitudes. 210 * magnitudes.
209 */ 211 */
210 static inline struct dd dd_add(double a, double b) 212 static inline struct dd dd_add(double a, double b) {
211 { 213 struct dd ret;
212 » struct dd ret; 214 double s;
213 » double s;
214 215
215 » ret.hi = a + b; 216 ret.hi = a + b;
216 » s = ret.hi - a; 217 s = ret.hi - a;
217 » ret.lo = (a - (ret.hi - s)) + (b - s); 218 ret.lo = (a - (ret.hi - s)) + (b - s);
218 » return (ret); 219 return (ret);
219 } 220 }
220 221
221 /* 222 /*
222 * Compute a+b, with a small tweak: The least significant bit of the 223 * Compute a+b, with a small tweak: The least significant bit of the
223 * result is adjusted into a sticky bit summarizing all the bits that 224 * result is adjusted into a sticky bit summarizing all the bits that
224 * were lost to rounding. This adjustment negates the effects of double 225 * were lost to rounding. This adjustment negates the effects of double
225 * rounding when the result is added to another number with a higher 226 * rounding when the result is added to another number with a higher
226 * exponent. For an explanation of round and sticky bits, see any reference 227 * exponent. For an explanation of round and sticky bits, see any reference
227 * on FPU design, e.g., 228 * on FPU design, e.g.,
228 * 229 *
229 * J. Coonen. An Implementation Guide to a Proposed Standard for 230 * J. Coonen. An Implementation Guide to a Proposed Standard for
230 * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980. 231 * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980.
231 */ 232 */
232 static inline double add_adjusted(double a, double b) 233 static inline double add_adjusted(double a, double b) {
233 { 234 struct dd sum;
234 » struct dd sum; 235 union {
235 » union {double f; uint64_t i;} uhi, ulo; 236 double f;
237 uint64_t i;
238 } uhi, ulo;
236 239
237 » sum = dd_add(a, b); 240 sum = dd_add(a, b);
238 » if (sum.lo != 0) { 241 if (sum.lo != 0) {
239 » » uhi.f = sum.hi; 242 uhi.f = sum.hi;
240 » » if ((uhi.i & 1) == 0) { 243 if ((uhi.i & 1) == 0) {
241 » » » /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ 244 /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */
242 » » » ulo.f = sum.lo; 245 ulo.f = sum.lo;
243 » » » uhi.i += 1 - ((uhi.i ^ ulo.i) >> 62); 246 uhi.i += 1 - ((uhi.i ^ ulo.i) >> 62);
244 » » » sum.hi = uhi.f; 247 sum.hi = uhi.f;
245 » » } 248 }
246 » } 249 }
247 » return (sum.hi); 250 return (sum.hi);
248 } 251 }
249 252
250 /* 253 /*
251 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed 254 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed
252 * that the result will be subnormal, and care is taken to ensure that 255 * that the result will be subnormal, and care is taken to ensure that
253 * double rounding does not occur. 256 * double rounding does not occur.
254 */ 257 */
255 static inline double add_and_denormalize(double a, double b, int scale) 258 static inline double add_and_denormalize(double a, double b, int scale) {
256 { 259 struct dd sum;
257 » struct dd sum; 260 union {
258 » union {double f; uint64_t i;} uhi, ulo; 261 double f;
259 » int bits_lost; 262 uint64_t i;
263 } uhi, ulo;
264 int bits_lost;
260 265
261 » sum = dd_add(a, b); 266 sum = dd_add(a, b);
262 267
263 » /* 268 /*
264 » * If we are losing at least two bits of accuracy to denormalization, 269 * If we are losing at least two bits of accuracy to denormalization,
265 » * then the first lost bit becomes a round bit, and we adjust the 270 * then the first lost bit becomes a round bit, and we adjust the
266 » * lowest bit of sum.hi to make it a sticky bit summarizing all the 271 * lowest bit of sum.hi to make it a sticky bit summarizing all the
267 » * bits in sum.lo. With the sticky bit adjusted, the hardware will 272 * bits in sum.lo. With the sticky bit adjusted, the hardware will
268 » * break any ties in the correct direction. 273 * break any ties in the correct direction.
269 » * 274 *
270 » * If we are losing only one bit to denormalization, however, we must 275 * If we are losing only one bit to denormalization, however, we must
271 » * break the ties manually. 276 * break the ties manually.
272 » */ 277 */
273 » if (sum.lo != 0) { 278 if (sum.lo != 0) {
274 » » uhi.f = sum.hi; 279 uhi.f = sum.hi;
275 » » bits_lost = -((int)(uhi.i >> 52) & 0x7ff) - scale + 1; 280 bits_lost = -((int)(uhi.i >> 52) & 0x7ff) - scale + 1;
276 » » if ((bits_lost != 1) ^ (int)(uhi.i & 1)) { 281 if ((bits_lost != 1) ^ (int)(uhi.i & 1)) {
277 » » » /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ 282 /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */
278 » » » ulo.f = sum.lo; 283 ulo.f = sum.lo;
279 » » » uhi.i += 1 - (((uhi.i ^ ulo.i) >> 62) & 2); 284 uhi.i += 1 - (((uhi.i ^ ulo.i) >> 62) & 2);
280 » » » sum.hi = uhi.f; 285 sum.hi = uhi.f;
281 » » } 286 }
282 » } 287 }
283 » return scalbn(sum.hi, scale); 288 return scalbn(sum.hi, scale);
284 } 289 }
285 290
286 /* 291 /*
287 * Compute a*b exactly, returning the exact result in a struct dd. We assume 292 * Compute a*b exactly, returning the exact result in a struct dd. We assume
288 * that both a and b are normalized, so no underflow or overflow will occur. 293 * that both a and b are normalized, so no underflow or overflow will occur.
289 * The current rounding mode must be round-to-nearest. 294 * The current rounding mode must be round-to-nearest.
290 */ 295 */
291 static inline struct dd dd_mul(double a, double b) 296 static inline struct dd dd_mul(double a, double b) {
292 { 297 static const double split = 0x1p27 + 1.0;
293 » static const double split = 0x1p27 + 1.0; 298 struct dd ret;
294 » struct dd ret; 299 double ha, hb, la, lb, p, q;
295 » double ha, hb, la, lb, p, q;
296 300
297 » p = a * split; 301 p = a * split;
298 » ha = a - p; 302 ha = a - p;
299 » ha += p; 303 ha += p;
300 » la = a - ha; 304 la = a - ha;
301 305
302 » p = b * split; 306 p = b * split;
303 » hb = b - p; 307 hb = b - p;
304 » hb += p; 308 hb += p;
305 » lb = b - hb; 309 lb = b - hb;
306 310
307 » p = ha * hb; 311 p = ha * hb;
308 » q = ha * lb + la * hb; 312 q = ha * lb + la * hb;
309 313
310 » ret.hi = p + q; 314 ret.hi = p + q;
311 » ret.lo = p - ret.hi + q + la * lb; 315 ret.lo = p - ret.hi + q + la * lb;
312 » return (ret); 316 return (ret);
313 } 317 }
314 318
315 /* 319 /*
316 * Fused multiply-add: Compute x * y + z with a single rounding error. 320 * Fused multiply-add: Compute x * y + z with a single rounding error.
317 * 321 *
318 * We use scaling to avoid overflow/underflow, along with the 322 * We use scaling to avoid overflow/underflow, along with the
319 * canonical precision-doubling technique adapted from: 323 * canonical precision-doubling technique adapted from:
320 * 324 *
321 * Dekker, T. A Floating-Point Technique for Extending the 325 * Dekker, T. A Floating-Point Technique for Extending the
322 * Available Precision. Numer. Math. 18, 224-242 (1971). 326 * Available Precision. Numer. Math. 18, 224-242 (1971).
323 * 327 *
324 * This algorithm is sensitive to the rounding precision. FPUs such 328 * This algorithm is sensitive to the rounding precision. FPUs such
325 * as the i387 must be set in double-precision mode if variables are 329 * as the i387 must be set in double-precision mode if variables are
326 * to be stored in FP registers in order to avoid incorrect results. 330 * to be stored in FP registers in order to avoid incorrect results.
327 * This is the default on FreeBSD, but not on many other systems. 331 * This is the default on FreeBSD, but not on many other systems.
328 * 332 *
329 * Hardware instructions should be used on architectures that support it, 333 * Hardware instructions should be used on architectures that support it,
330 * since this implementation will likely be several times slower. 334 * since this implementation will likely be several times slower.
331 */ 335 */
332 double fma(double x, double y, double z) 336 double fma(double x, double y, double z) {
333 { 337 #pragma STDC FENV_ACCESS ON
334 » #pragma STDC FENV_ACCESS ON 338 double xs, ys, zs, adj;
335 » double xs, ys, zs, adj; 339 struct dd xy, r;
336 » struct dd xy, r; 340 int oround;
337 » int oround; 341 int ex, ey, ez;
338 » int ex, ey, ez; 342 int spread;
339 » int spread;
340 343
341 » /* 344 /*
342 » * Handle special cases. The order of operations and the particular 345 * Handle special cases. The order of operations and the particular
343 » * return values here are crucial in handling special cases involving 346 * return values here are crucial in handling special cases involving
344 » * infinities, NaNs, overflows, and signed zeroes correctly. 347 * infinities, NaNs, overflows, and signed zeroes correctly.
345 » */ 348 */
346 » if (!isfinite(x) || !isfinite(y)) 349 if (!isfinite(x) || !isfinite(y))
347 » » return (x * y + z); 350 return (x * y + z);
348 » if (!isfinite(z)) 351 if (!isfinite(z))
349 » » return (z); 352 return (z);
350 » if (x == 0.0 || y == 0.0) 353 if (x == 0.0 || y == 0.0)
351 » » return (x * y + z); 354 return (x * y + z);
352 » if (z == 0.0) 355 if (z == 0.0)
353 » » return (x * y); 356 return (x * y);
354 357
355 » xs = frexp(x, &ex); 358 xs = frexp(x, &ex);
356 » ys = frexp(y, &ey); 359 ys = frexp(y, &ey);
357 » zs = frexp(z, &ez); 360 zs = frexp(z, &ez);
358 » oround = fegetround(); 361 oround = fegetround();
359 » spread = ex + ey - ez; 362 spread = ex + ey - ez;
360 363
361 » /* 364 /*
362 » * If x * y and z are many orders of magnitude apart, the scaling 365 * If x * y and z are many orders of magnitude apart, the scaling
363 » * will overflow, so we handle these cases specially. Rounding 366 * will overflow, so we handle these cases specially. Rounding
364 » * modes other than FE_TONEAREST are painful. 367 * modes other than FE_TONEAREST are painful.
365 » */ 368 */
366 » if (spread < -DBL_MANT_DIG) { 369 if (spread < -DBL_MANT_DIG) {
367 #ifdef FE_INEXACT 370 #ifdef FE_INEXACT
368 » » feraiseexcept(FE_INEXACT); 371 feraiseexcept(FE_INEXACT);
369 #endif 372 #endif
370 #ifdef FE_UNDERFLOW 373 #ifdef FE_UNDERFLOW
371 » » if (!isnormal(z)) 374 if (!isnormal(z))
372 » » » feraiseexcept(FE_UNDERFLOW); 375 feraiseexcept(FE_UNDERFLOW);
373 #endif 376 #endif
374 » » switch (oround) { 377 switch (oround) {
375 » » default: /* FE_TONEAREST */ 378 default: /* FE_TONEAREST */
376 » » » return (z); 379 return (z);
377 #ifdef FE_TOWARDZERO 380 #ifdef FE_TOWARDZERO
378 » » case FE_TOWARDZERO: 381 case FE_TOWARDZERO:
379 » » » if (x > 0.0 ^ y < 0.0 ^ z < 0.0) 382 if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
380 » » » » return (z); 383 return (z);
381 » » » else 384 else
382 » » » » return (nextafter(z, 0)); 385 return (nextafter(z, 0));
383 #endif 386 #endif
384 #ifdef FE_DOWNWARD 387 #ifdef FE_DOWNWARD
385 » » case FE_DOWNWARD: 388 case FE_DOWNWARD:
386 » » » if (x > 0.0 ^ y < 0.0) 389 if (x > 0.0 ^ y < 0.0)
387 » » » » return (z); 390 return (z);
388 » » » else 391 else
389 » » » » return (nextafter(z, -INFINITY)); 392 return (nextafter(z, -INFINITY));
390 #endif 393 #endif
391 #ifdef FE_UPWARD 394 #ifdef FE_UPWARD
392 » » case FE_UPWARD: 395 case FE_UPWARD:
393 » » » if (x > 0.0 ^ y < 0.0) 396 if (x > 0.0 ^ y < 0.0)
394 » » » » return (nextafter(z, INFINITY)); 397 return (nextafter(z, INFINITY));
395 » » » else 398 else
396 » » » » return (z); 399 return (z);
397 #endif 400 #endif
398 » » } 401 }
399 » } 402 }
400 » if (spread <= DBL_MANT_DIG * 2) 403 if (spread <= DBL_MANT_DIG * 2)
401 » » zs = scalbn(zs, -spread); 404 zs = scalbn(zs, -spread);
402 » else 405 else
403 » » zs = copysign(DBL_MIN, zs); 406 zs = copysign(DBL_MIN, zs);
404 407
405 » fesetround(FE_TONEAREST); 408 fesetround(FE_TONEAREST);
406 409
407 » /* 410 /*
408 » * Basic approach for round-to-nearest: 411 * Basic approach for round-to-nearest:
409 » * 412 *
410 » * (xy.hi, xy.lo) = x * y (exact) 413 * (xy.hi, xy.lo) = x * y (exact)
411 » * (r.hi, r.lo) = xy.hi + z (exact) 414 * (r.hi, r.lo) = xy.hi + z (exact)
412 » * adj = xy.lo + r.lo (inexact; low bit is sticky) 415 * adj = xy.lo + r.lo (inexact; low bit is sticky)
413 » * result = r.hi + adj (correctly rounded) 416 * result = r.hi + adj (correctly rounded)
414 » */ 417 */
415 » xy = dd_mul(xs, ys); 418 xy = dd_mul(xs, ys);
416 » r = dd_add(xy.hi, zs); 419 r = dd_add(xy.hi, zs);
417 420
418 » spread = ex + ey; 421 spread = ex + ey;
419 422
420 » if (r.hi == 0.0) { 423 if (r.hi == 0.0) {
421 » » /* 424 /*
422 » » * When the addends cancel to 0, ensure that the result has 425 * When the addends cancel to 0, ensure that the result has
423 » » * the correct sign. 426 * the correct sign.
424 » » */ 427 */
425 » » fesetround(oround); 428 fesetround(oround);
426 » » volatile double vzs = zs; /* XXX gcc CSE bug workaround */ 429 volatile double vzs = zs; /* XXX gcc CSE bug workaround */
427 » » return xy.hi + vzs + scalbn(xy.lo, spread); 430 return xy.hi + vzs + scalbn(xy.lo, spread);
428 » } 431 }
429 432
430 » if (oround != FE_TONEAREST) { 433 if (oround != FE_TONEAREST) {
431 » » /* 434 /*
432 » » * There is no need to worry about double rounding in directed 435 * There is no need to worry about double rounding in directed
433 » » * rounding modes. 436 * rounding modes.
434 » » * But underflow may not be raised properly, example in downward rounding: 437 * But underflow may not be raised properly, example in downward rounding:
435 » » * fma(0x1.000000001p-1000, 0x1.000000001p-30, -0x1p-1066) 438 * fma(0x1.000000001p-1000, 0x1.000000001p-30, -0x1p-1066)
436 » » */ 439 */
437 » » double ret; 440 double ret;
438 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW) 441 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
439 » » int e = fetestexcept(FE_INEXACT); 442 int e = fetestexcept(FE_INEXACT);
440 » » feclearexcept(FE_INEXACT); 443 feclearexcept(FE_INEXACT);
441 #endif 444 #endif
442 » » fesetround(oround); 445 fesetround(oround);
443 » » adj = r.lo + xy.lo; 446 adj = r.lo + xy.lo;
444 » » ret = scalbn(r.hi + adj, spread); 447 ret = scalbn(r.hi + adj, spread);
445 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW) 448 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
446 » » if (ilogb(ret) < -1022 && fetestexcept(FE_INEXACT)) 449 if (ilogb(ret) < -1022 && fetestexcept(FE_INEXACT))
447 » » » feraiseexcept(FE_UNDERFLOW); 450 feraiseexcept(FE_UNDERFLOW);
448 » » else if (e) 451 else if (e)
449 » » » feraiseexcept(FE_INEXACT); 452 feraiseexcept(FE_INEXACT);
450 #endif 453 #endif
451 » » return ret; 454 return ret;
452 » } 455 }
453 456
454 » adj = add_adjusted(r.lo, xy.lo); 457 adj = add_adjusted(r.lo, xy.lo);
455 » if (spread + ilogb(r.hi) > -1023) 458 if (spread + ilogb(r.hi) > -1023)
456 » » return scalbn(r.hi + adj, spread); 459 return scalbn(r.hi + adj, spread);
457 » else 460 else
458 » » return add_and_denormalize(r.hi, adj, spread); 461 return add_and_denormalize(r.hi, adj, spread);
459 } 462 }
460 #endif 463 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698