| OLD | NEW |
| 1 #include <fenv.h> | 1 #include <fenv.h> |
| 2 #include "libm.h" | 2 #include "libm.h" |
| 3 | 3 |
| 4 #if LDBL_MANT_DIG==64 && LDBL_MAX_EXP==16384 | 4 #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 |
| 5 /* exact add, assumes exponent_x >= exponent_y */ | 5 /* exact add, assumes exponent_x >= exponent_y */ |
| 6 static void add(long double *hi, long double *lo, long double x, long double y) | 6 static void add(long double* hi, |
| 7 { | 7 long double* lo, |
| 8 » long double r; | 8 long double x, |
| 9 long double y) { |
| 10 long double r; |
| 9 | 11 |
| 10 » r = x + y; | 12 r = x + y; |
| 11 » *hi = r; | 13 *hi = r; |
| 12 » r -= x; | 14 r -= x; |
| 13 » *lo = y - r; | 15 *lo = y - r; |
| 14 } | 16 } |
| 15 | 17 |
| 16 /* exact mul, assumes no over/underflow */ | 18 /* exact mul, assumes no over/underflow */ |
| 17 static void mul(long double *hi, long double *lo, long double x, long double y) | 19 static void mul(long double* hi, |
| 18 { | 20 long double* lo, |
| 19 » static const long double c = 1.0 + 0x1p32L; | 21 long double x, |
| 20 » long double cx, xh, xl, cy, yh, yl; | 22 long double y) { |
| 23 static const long double c = 1.0 + 0x1p32L; |
| 24 long double cx, xh, xl, cy, yh, yl; |
| 21 | 25 |
| 22 » cx = c*x; | 26 cx = c * x; |
| 23 » xh = (x - cx) + cx; | 27 xh = (x - cx) + cx; |
| 24 » xl = x - xh; | 28 xl = x - xh; |
| 25 » cy = c*y; | 29 cy = c * y; |
| 26 » yh = (y - cy) + cy; | 30 yh = (y - cy) + cy; |
| 27 » yl = y - yh; | 31 yl = y - yh; |
| 28 » *hi = x*y; | 32 *hi = x * y; |
| 29 » *lo = (xh*yh - *hi) + xh*yl + xl*yh + xl*yl; | 33 *lo = (xh * yh - *hi) + xh * yl + xl * yh + xl * yl; |
| 30 } | 34 } |
| 31 | 35 |
| 32 /* | 36 /* |
| 33 assume (long double)(hi+lo) == hi | 37 assume (long double)(hi+lo) == hi |
| 34 return an adjusted hi so that rounding it to double (or less) precision is corre
ct | 38 return an adjusted hi so that rounding it to double (or less) precision is |
| 39 correct |
| 35 */ | 40 */ |
| 36 static long double adjust(long double hi, long double lo) | 41 static long double adjust(long double hi, long double lo) { |
| 37 { | 42 union ldshape uhi, ulo; |
| 38 » union ldshape uhi, ulo; | |
| 39 | 43 |
| 40 » if (lo == 0) | 44 if (lo == 0) |
| 41 » » return hi; | 45 return hi; |
| 42 » uhi.f = hi; | 46 uhi.f = hi; |
| 43 » if (uhi.i.m & 0x3ff) | 47 if (uhi.i.m & 0x3ff) |
| 44 » » return hi; | 48 return hi; |
| 45 » ulo.f = lo; | 49 ulo.f = lo; |
| 46 » if ((uhi.i.se & 0x8000) == (ulo.i.se & 0x8000)) | 50 if ((uhi.i.se & 0x8000) == (ulo.i.se & 0x8000)) |
| 47 » » uhi.i.m++; | 51 uhi.i.m++; |
| 48 » else { | 52 else { |
| 49 » » /* handle underflow and take care of ld80 implicit msb */ | 53 /* handle underflow and take care of ld80 implicit msb */ |
| 50 » » if (uhi.i.m << 1 == 0) { | 54 if (uhi.i.m << 1 == 0) { |
| 51 » » » uhi.i.m = 0; | 55 uhi.i.m = 0; |
| 52 » » » uhi.i.se--; | 56 uhi.i.se--; |
| 53 » » } | 57 } |
| 54 » » uhi.i.m--; | 58 uhi.i.m--; |
| 55 » } | 59 } |
| 56 » return uhi.f; | 60 return uhi.f; |
| 57 } | 61 } |
| 58 | 62 |
| 59 /* adjusted add so the result is correct when rounded to double (or less) precis
ion */ | 63 /* adjusted add so the result is correct when rounded to double (or less) |
| 60 static long double dadd(long double x, long double y) | 64 * precision */ |
| 61 { | 65 static long double dadd(long double x, long double y) { |
| 62 » add(&x, &y, x, y); | 66 add(&x, &y, x, y); |
| 63 » return adjust(x, y); | 67 return adjust(x, y); |
| 64 } | 68 } |
| 65 | 69 |
| 66 /* adjusted mul so the result is correct when rounded to double (or less) precis
ion */ | 70 /* adjusted mul so the result is correct when rounded to double (or less) |
| 67 static long double dmul(long double x, long double y) | 71 * precision */ |
| 68 { | 72 static long double dmul(long double x, long double y) { |
| 69 » mul(&x, &y, x, y); | 73 mul(&x, &y, x, y); |
| 70 » return adjust(x, y); | 74 return adjust(x, y); |
| 71 } | 75 } |
| 72 | 76 |
| 73 static int getexp(long double x) | 77 static int getexp(long double x) { |
| 74 { | 78 union ldshape u; |
| 75 » union ldshape u; | 79 u.f = x; |
| 76 » u.f = x; | 80 return u.i.se & 0x7fff; |
| 77 » return u.i.se & 0x7fff; | |
| 78 } | 81 } |
| 79 | 82 |
| 80 double fma(double x, double y, double z) | 83 double fma(double x, double y, double z) { |
| 81 { | 84 PRAGMA_STDC_FENV_ACCESS_ON |
| 82 » PRAGMA_STDC_FENV_ACCESS_ON | 85 long double hi, lo1, lo2, xy; |
| 83 » long double hi, lo1, lo2, xy; | 86 int round, ez, exy; |
| 84 » int round, ez, exy; | |
| 85 | 87 |
| 86 » /* handle +-inf,nan */ | 88 /* handle +-inf,nan */ |
| 87 » if (!isfinite(x) || !isfinite(y)) | 89 if (!isfinite(x) || !isfinite(y)) |
| 88 » » return x*y + z; | 90 return x * y + z; |
| 89 » if (!isfinite(z)) | 91 if (!isfinite(z)) |
| 90 » » return z; | 92 return z; |
| 91 » /* handle +-0 */ | 93 /* handle +-0 */ |
| 92 » if (x == 0.0 || y == 0.0) | 94 if (x == 0.0 || y == 0.0) |
| 93 » » return x*y + z; | 95 return x * y + z; |
| 94 » round = fegetround(); | 96 round = fegetround(); |
| 95 » if (z == 0.0) { | 97 if (z == 0.0) { |
| 96 » » if (round == FE_TONEAREST) | 98 if (round == FE_TONEAREST) |
| 97 » » » return dmul(x, y); | 99 return dmul(x, y); |
| 98 » » return x*y; | 100 return x * y; |
| 99 » } | 101 } |
| 100 | 102 |
| 101 » /* exact mul and add require nearest rounding */ | 103 /* exact mul and add require nearest rounding */ |
| 102 » /* spurious inexact exceptions may be raised */ | 104 /* spurious inexact exceptions may be raised */ |
| 103 » fesetround(FE_TONEAREST); | 105 fesetround(FE_TONEAREST); |
| 104 » mul(&xy, &lo1, x, y); | 106 mul(&xy, &lo1, x, y); |
| 105 » exy = getexp(xy); | 107 exy = getexp(xy); |
| 106 » ez = getexp(z); | 108 ez = getexp(z); |
| 107 » if (ez > exy) { | 109 if (ez > exy) { |
| 108 » » add(&hi, &lo2, z, xy); | 110 add(&hi, &lo2, z, xy); |
| 109 » } else if (ez > exy - 12) { | 111 } else if (ez > exy - 12) { |
| 110 » » add(&hi, &lo2, xy, z); | 112 add(&hi, &lo2, xy, z); |
| 111 » » if (hi == 0) { | 113 if (hi == 0) { |
| 112 » » » /* | 114 /* |
| 113 » » » xy + z is 0, but it should be calculated with the | 115 xy + z is 0, but it should be calculated with the |
| 114 » » » original rounding mode so the sign is correct, if the | 116 original rounding mode so the sign is correct, if the |
| 115 » » » compiler does not support FENV_ACCESS ON it does not | 117 compiler does not support FENV_ACCESS ON it does not |
| 116 » » » know about the changed rounding mode and eliminates | 118 know about the changed rounding mode and eliminates |
| 117 » » » the xy + z below without the volatile memory access | 119 the xy + z below without the volatile memory access |
| 118 » » » */ | 120 */ |
| 119 » » » volatile double z_; | 121 volatile double z_; |
| 120 » » » fesetround(round); | 122 fesetround(round); |
| 121 » » » z_ = z; | 123 z_ = z; |
| 122 » » » return (xy + z_) + lo1; | 124 return (xy + z_) + lo1; |
| 123 » » } | 125 } |
| 124 » } else { | 126 } else { |
| 125 » » /* | 127 /* |
| 126 » » ez <= exy - 12 | 128 ez <= exy - 12 |
| 127 » » the 12 extra bits (1guard, 11round+sticky) are needed so with | 129 the 12 extra bits (1guard, 11round+sticky) are needed so with |
| 128 » » » lo = dadd(lo1, lo2) | 130 lo = dadd(lo1, lo2) |
| 129 » » elo <= ehi - 11, and we use the last 10 bits in adjust so | 131 elo <= ehi - 11, and we use the last 10 bits in adjust so |
| 130 » » » dadd(hi, lo) | 132 dadd(hi, lo) |
| 131 » » gives correct result when rounded to double | 133 gives correct result when rounded to double |
| 132 » » */ | 134 */ |
| 133 » » hi = xy; | 135 hi = xy; |
| 134 » » lo2 = z; | 136 lo2 = z; |
| 135 » } | 137 } |
| 136 » /* | 138 /* |
| 137 » the result is stored before return for correct precision and exceptions | 139 the result is stored before return for correct precision and exceptions |
| 138 | 140 |
| 139 » one corner case is when the underflow flag should be raised because | 141 one corner case is when the underflow flag should be raised because |
| 140 » the precise result is an inexact subnormal double, but the calculated | 142 the precise result is an inexact subnormal double, but the calculated |
| 141 » long double result is an exact subnormal double | 143 long double result is an exact subnormal double |
| 142 » (so rounding to double does not raise exceptions) | 144 (so rounding to double does not raise exceptions) |
| 143 | 145 |
| 144 » in nearest rounding mode dadd takes care of this: the last bit of the | 146 in nearest rounding mode dadd takes care of this: the last bit of the |
| 145 » result is adjusted so rounding sees an inexact value when it should | 147 result is adjusted so rounding sees an inexact value when it should |
| 146 | 148 |
| 147 » in non-nearest rounding mode fenv is used for the workaround | 149 in non-nearest rounding mode fenv is used for the workaround |
| 148 » */ | 150 */ |
| 149 » fesetround(round); | 151 fesetround(round); |
| 150 » if (round == FE_TONEAREST) | 152 if (round == FE_TONEAREST) |
| 151 » » z = dadd(hi, dadd(lo1, lo2)); | 153 z = dadd(hi, dadd(lo1, lo2)); |
| 152 » else { | 154 else { |
| 153 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW) | 155 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW) |
| 154 » » int e = fetestexcept(FE_INEXACT); | 156 int e = fetestexcept(FE_INEXACT); |
| 155 » » feclearexcept(FE_INEXACT); | 157 feclearexcept(FE_INEXACT); |
| 156 #endif | 158 #endif |
| 157 » » z = hi + (lo1 + lo2); | 159 z = hi + (lo1 + lo2); |
| 158 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW) | 160 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW) |
| 159 » » if (getexp(z) < 0x3fff-1022 && fetestexcept(FE_INEXACT)) | 161 if (getexp(z) < 0x3fff - 1022 && fetestexcept(FE_INEXACT)) |
| 160 » » » feraiseexcept(FE_UNDERFLOW); | 162 feraiseexcept(FE_UNDERFLOW); |
| 161 » » else if (e) | 163 else if (e) |
| 162 » » » feraiseexcept(FE_INEXACT); | 164 feraiseexcept(FE_INEXACT); |
| 163 #endif | 165 #endif |
| 164 » } | 166 } |
| 165 » return z; | 167 return z; |
| 166 } | 168 } |
| 167 #else | 169 #else |
| 168 /* origin: FreeBSD /usr/src/lib/msun/src/s_fma.c */ | 170 /* origin: FreeBSD /usr/src/lib/msun/src/s_fma.c */ |
| 169 /*- | 171 /*- |
| 170 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> | 172 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> |
| 171 * All rights reserved. | 173 * All rights reserved. |
| 172 * | 174 * |
| 173 * Redistribution and use in source and binary forms, with or without | 175 * Redistribution and use in source and binary forms, with or without |
| 174 * modification, are permitted provided that the following conditions | 176 * modification, are permitted provided that the following conditions |
| 175 * are met: | 177 * are met: |
| (...skipping 15 matching lines...) Expand all Loading... |
| 191 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 193 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 192 * SUCH DAMAGE. | 194 * SUCH DAMAGE. |
| 193 */ | 195 */ |
| 194 | 196 |
| 195 /* | 197 /* |
| 196 * A struct dd represents a floating-point number with twice the precision | 198 * A struct dd represents a floating-point number with twice the precision |
| 197 * of a double. We maintain the invariant that "hi" stores the 53 high-order | 199 * of a double. We maintain the invariant that "hi" stores the 53 high-order |
| 198 * bits of the result. | 200 * bits of the result. |
| 199 */ | 201 */ |
| 200 struct dd { | 202 struct dd { |
| 201 » double hi; | 203 double hi; |
| 202 » double lo; | 204 double lo; |
| 203 }; | 205 }; |
| 204 | 206 |
| 205 /* | 207 /* |
| 206 * Compute a+b exactly, returning the exact result in a struct dd. We assume | 208 * Compute a+b exactly, returning the exact result in a struct dd. We assume |
| 207 * that both a and b are finite, but make no assumptions about their relative | 209 * that both a and b are finite, but make no assumptions about their relative |
| 208 * magnitudes. | 210 * magnitudes. |
| 209 */ | 211 */ |
| 210 static inline struct dd dd_add(double a, double b) | 212 static inline struct dd dd_add(double a, double b) { |
| 211 { | 213 struct dd ret; |
| 212 » struct dd ret; | 214 double s; |
| 213 » double s; | |
| 214 | 215 |
| 215 » ret.hi = a + b; | 216 ret.hi = a + b; |
| 216 » s = ret.hi - a; | 217 s = ret.hi - a; |
| 217 » ret.lo = (a - (ret.hi - s)) + (b - s); | 218 ret.lo = (a - (ret.hi - s)) + (b - s); |
| 218 » return (ret); | 219 return (ret); |
| 219 } | 220 } |
| 220 | 221 |
| 221 /* | 222 /* |
| 222 * Compute a+b, with a small tweak: The least significant bit of the | 223 * Compute a+b, with a small tweak: The least significant bit of the |
| 223 * result is adjusted into a sticky bit summarizing all the bits that | 224 * result is adjusted into a sticky bit summarizing all the bits that |
| 224 * were lost to rounding. This adjustment negates the effects of double | 225 * were lost to rounding. This adjustment negates the effects of double |
| 225 * rounding when the result is added to another number with a higher | 226 * rounding when the result is added to another number with a higher |
| 226 * exponent. For an explanation of round and sticky bits, see any reference | 227 * exponent. For an explanation of round and sticky bits, see any reference |
| 227 * on FPU design, e.g., | 228 * on FPU design, e.g., |
| 228 * | 229 * |
| 229 * J. Coonen. An Implementation Guide to a Proposed Standard for | 230 * J. Coonen. An Implementation Guide to a Proposed Standard for |
| 230 * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980. | 231 * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980. |
| 231 */ | 232 */ |
| 232 static inline double add_adjusted(double a, double b) | 233 static inline double add_adjusted(double a, double b) { |
| 233 { | 234 struct dd sum; |
| 234 » struct dd sum; | 235 union { |
| 235 » union {double f; uint64_t i;} uhi, ulo; | 236 double f; |
| 237 uint64_t i; |
| 238 } uhi, ulo; |
| 236 | 239 |
| 237 » sum = dd_add(a, b); | 240 sum = dd_add(a, b); |
| 238 » if (sum.lo != 0) { | 241 if (sum.lo != 0) { |
| 239 » » uhi.f = sum.hi; | 242 uhi.f = sum.hi; |
| 240 » » if ((uhi.i & 1) == 0) { | 243 if ((uhi.i & 1) == 0) { |
| 241 » » » /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ | 244 /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ |
| 242 » » » ulo.f = sum.lo; | 245 ulo.f = sum.lo; |
| 243 » » » uhi.i += 1 - ((uhi.i ^ ulo.i) >> 62); | 246 uhi.i += 1 - ((uhi.i ^ ulo.i) >> 62); |
| 244 » » » sum.hi = uhi.f; | 247 sum.hi = uhi.f; |
| 245 » » } | 248 } |
| 246 » } | 249 } |
| 247 » return (sum.hi); | 250 return (sum.hi); |
| 248 } | 251 } |
| 249 | 252 |
| 250 /* | 253 /* |
| 251 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed | 254 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed |
| 252 * that the result will be subnormal, and care is taken to ensure that | 255 * that the result will be subnormal, and care is taken to ensure that |
| 253 * double rounding does not occur. | 256 * double rounding does not occur. |
| 254 */ | 257 */ |
| 255 static inline double add_and_denormalize(double a, double b, int scale) | 258 static inline double add_and_denormalize(double a, double b, int scale) { |
| 256 { | 259 struct dd sum; |
| 257 » struct dd sum; | 260 union { |
| 258 » union {double f; uint64_t i;} uhi, ulo; | 261 double f; |
| 259 » int bits_lost; | 262 uint64_t i; |
| 263 } uhi, ulo; |
| 264 int bits_lost; |
| 260 | 265 |
| 261 » sum = dd_add(a, b); | 266 sum = dd_add(a, b); |
| 262 | 267 |
| 263 » /* | 268 /* |
| 264 » * If we are losing at least two bits of accuracy to denormalization, | 269 * If we are losing at least two bits of accuracy to denormalization, |
| 265 » * then the first lost bit becomes a round bit, and we adjust the | 270 * then the first lost bit becomes a round bit, and we adjust the |
| 266 » * lowest bit of sum.hi to make it a sticky bit summarizing all the | 271 * lowest bit of sum.hi to make it a sticky bit summarizing all the |
| 267 » * bits in sum.lo. With the sticky bit adjusted, the hardware will | 272 * bits in sum.lo. With the sticky bit adjusted, the hardware will |
| 268 » * break any ties in the correct direction. | 273 * break any ties in the correct direction. |
| 269 » * | 274 * |
| 270 » * If we are losing only one bit to denormalization, however, we must | 275 * If we are losing only one bit to denormalization, however, we must |
| 271 » * break the ties manually. | 276 * break the ties manually. |
| 272 » */ | 277 */ |
| 273 » if (sum.lo != 0) { | 278 if (sum.lo != 0) { |
| 274 » » uhi.f = sum.hi; | 279 uhi.f = sum.hi; |
| 275 » » bits_lost = -((int)(uhi.i >> 52) & 0x7ff) - scale + 1; | 280 bits_lost = -((int)(uhi.i >> 52) & 0x7ff) - scale + 1; |
| 276 » » if ((bits_lost != 1) ^ (int)(uhi.i & 1)) { | 281 if ((bits_lost != 1) ^ (int)(uhi.i & 1)) { |
| 277 » » » /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ | 282 /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ |
| 278 » » » ulo.f = sum.lo; | 283 ulo.f = sum.lo; |
| 279 » » » uhi.i += 1 - (((uhi.i ^ ulo.i) >> 62) & 2); | 284 uhi.i += 1 - (((uhi.i ^ ulo.i) >> 62) & 2); |
| 280 » » » sum.hi = uhi.f; | 285 sum.hi = uhi.f; |
| 281 » » } | 286 } |
| 282 » } | 287 } |
| 283 » return scalbn(sum.hi, scale); | 288 return scalbn(sum.hi, scale); |
| 284 } | 289 } |
| 285 | 290 |
| 286 /* | 291 /* |
| 287 * Compute a*b exactly, returning the exact result in a struct dd. We assume | 292 * Compute a*b exactly, returning the exact result in a struct dd. We assume |
| 288 * that both a and b are normalized, so no underflow or overflow will occur. | 293 * that both a and b are normalized, so no underflow or overflow will occur. |
| 289 * The current rounding mode must be round-to-nearest. | 294 * The current rounding mode must be round-to-nearest. |
| 290 */ | 295 */ |
| 291 static inline struct dd dd_mul(double a, double b) | 296 static inline struct dd dd_mul(double a, double b) { |
| 292 { | 297 static const double split = 0x1p27 + 1.0; |
| 293 » static const double split = 0x1p27 + 1.0; | 298 struct dd ret; |
| 294 » struct dd ret; | 299 double ha, hb, la, lb, p, q; |
| 295 » double ha, hb, la, lb, p, q; | |
| 296 | 300 |
| 297 » p = a * split; | 301 p = a * split; |
| 298 » ha = a - p; | 302 ha = a - p; |
| 299 » ha += p; | 303 ha += p; |
| 300 » la = a - ha; | 304 la = a - ha; |
| 301 | 305 |
| 302 » p = b * split; | 306 p = b * split; |
| 303 » hb = b - p; | 307 hb = b - p; |
| 304 » hb += p; | 308 hb += p; |
| 305 » lb = b - hb; | 309 lb = b - hb; |
| 306 | 310 |
| 307 » p = ha * hb; | 311 p = ha * hb; |
| 308 » q = ha * lb + la * hb; | 312 q = ha * lb + la * hb; |
| 309 | 313 |
| 310 » ret.hi = p + q; | 314 ret.hi = p + q; |
| 311 » ret.lo = p - ret.hi + q + la * lb; | 315 ret.lo = p - ret.hi + q + la * lb; |
| 312 » return (ret); | 316 return (ret); |
| 313 } | 317 } |
| 314 | 318 |
| 315 /* | 319 /* |
| 316 * Fused multiply-add: Compute x * y + z with a single rounding error. | 320 * Fused multiply-add: Compute x * y + z with a single rounding error. |
| 317 * | 321 * |
| 318 * We use scaling to avoid overflow/underflow, along with the | 322 * We use scaling to avoid overflow/underflow, along with the |
| 319 * canonical precision-doubling technique adapted from: | 323 * canonical precision-doubling technique adapted from: |
| 320 * | 324 * |
| 321 * Dekker, T. A Floating-Point Technique for Extending the | 325 * Dekker, T. A Floating-Point Technique for Extending the |
| 322 * Available Precision. Numer. Math. 18, 224-242 (1971). | 326 * Available Precision. Numer. Math. 18, 224-242 (1971). |
| 323 * | 327 * |
| 324 * This algorithm is sensitive to the rounding precision. FPUs such | 328 * This algorithm is sensitive to the rounding precision. FPUs such |
| 325 * as the i387 must be set in double-precision mode if variables are | 329 * as the i387 must be set in double-precision mode if variables are |
| 326 * to be stored in FP registers in order to avoid incorrect results. | 330 * to be stored in FP registers in order to avoid incorrect results. |
| 327 * This is the default on FreeBSD, but not on many other systems. | 331 * This is the default on FreeBSD, but not on many other systems. |
| 328 * | 332 * |
| 329 * Hardware instructions should be used on architectures that support it, | 333 * Hardware instructions should be used on architectures that support it, |
| 330 * since this implementation will likely be several times slower. | 334 * since this implementation will likely be several times slower. |
| 331 */ | 335 */ |
| 332 double fma(double x, double y, double z) | 336 double fma(double x, double y, double z) { |
| 333 { | 337 #pragma STDC FENV_ACCESS ON |
| 334 » #pragma STDC FENV_ACCESS ON | 338 double xs, ys, zs, adj; |
| 335 » double xs, ys, zs, adj; | 339 struct dd xy, r; |
| 336 » struct dd xy, r; | 340 int oround; |
| 337 » int oround; | 341 int ex, ey, ez; |
| 338 » int ex, ey, ez; | 342 int spread; |
| 339 » int spread; | |
| 340 | 343 |
| 341 » /* | 344 /* |
| 342 » * Handle special cases. The order of operations and the particular | 345 * Handle special cases. The order of operations and the particular |
| 343 » * return values here are crucial in handling special cases involving | 346 * return values here are crucial in handling special cases involving |
| 344 » * infinities, NaNs, overflows, and signed zeroes correctly. | 347 * infinities, NaNs, overflows, and signed zeroes correctly. |
| 345 » */ | 348 */ |
| 346 » if (!isfinite(x) || !isfinite(y)) | 349 if (!isfinite(x) || !isfinite(y)) |
| 347 » » return (x * y + z); | 350 return (x * y + z); |
| 348 » if (!isfinite(z)) | 351 if (!isfinite(z)) |
| 349 » » return (z); | 352 return (z); |
| 350 » if (x == 0.0 || y == 0.0) | 353 if (x == 0.0 || y == 0.0) |
| 351 » » return (x * y + z); | 354 return (x * y + z); |
| 352 » if (z == 0.0) | 355 if (z == 0.0) |
| 353 » » return (x * y); | 356 return (x * y); |
| 354 | 357 |
| 355 » xs = frexp(x, &ex); | 358 xs = frexp(x, &ex); |
| 356 » ys = frexp(y, &ey); | 359 ys = frexp(y, &ey); |
| 357 » zs = frexp(z, &ez); | 360 zs = frexp(z, &ez); |
| 358 » oround = fegetround(); | 361 oround = fegetround(); |
| 359 » spread = ex + ey - ez; | 362 spread = ex + ey - ez; |
| 360 | 363 |
| 361 » /* | 364 /* |
| 362 » * If x * y and z are many orders of magnitude apart, the scaling | 365 * If x * y and z are many orders of magnitude apart, the scaling |
| 363 » * will overflow, so we handle these cases specially. Rounding | 366 * will overflow, so we handle these cases specially. Rounding |
| 364 » * modes other than FE_TONEAREST are painful. | 367 * modes other than FE_TONEAREST are painful. |
| 365 » */ | 368 */ |
| 366 » if (spread < -DBL_MANT_DIG) { | 369 if (spread < -DBL_MANT_DIG) { |
| 367 #ifdef FE_INEXACT | 370 #ifdef FE_INEXACT |
| 368 » » feraiseexcept(FE_INEXACT); | 371 feraiseexcept(FE_INEXACT); |
| 369 #endif | 372 #endif |
| 370 #ifdef FE_UNDERFLOW | 373 #ifdef FE_UNDERFLOW |
| 371 » » if (!isnormal(z)) | 374 if (!isnormal(z)) |
| 372 » » » feraiseexcept(FE_UNDERFLOW); | 375 feraiseexcept(FE_UNDERFLOW); |
| 373 #endif | 376 #endif |
| 374 » » switch (oround) { | 377 switch (oround) { |
| 375 » » default: /* FE_TONEAREST */ | 378 default: /* FE_TONEAREST */ |
| 376 » » » return (z); | 379 return (z); |
| 377 #ifdef FE_TOWARDZERO | 380 #ifdef FE_TOWARDZERO |
| 378 » » case FE_TOWARDZERO: | 381 case FE_TOWARDZERO: |
| 379 » » » if (x > 0.0 ^ y < 0.0 ^ z < 0.0) | 382 if (x > 0.0 ^ y < 0.0 ^ z < 0.0) |
| 380 » » » » return (z); | 383 return (z); |
| 381 » » » else | 384 else |
| 382 » » » » return (nextafter(z, 0)); | 385 return (nextafter(z, 0)); |
| 383 #endif | 386 #endif |
| 384 #ifdef FE_DOWNWARD | 387 #ifdef FE_DOWNWARD |
| 385 » » case FE_DOWNWARD: | 388 case FE_DOWNWARD: |
| 386 » » » if (x > 0.0 ^ y < 0.0) | 389 if (x > 0.0 ^ y < 0.0) |
| 387 » » » » return (z); | 390 return (z); |
| 388 » » » else | 391 else |
| 389 » » » » return (nextafter(z, -INFINITY)); | 392 return (nextafter(z, -INFINITY)); |
| 390 #endif | 393 #endif |
| 391 #ifdef FE_UPWARD | 394 #ifdef FE_UPWARD |
| 392 » » case FE_UPWARD: | 395 case FE_UPWARD: |
| 393 » » » if (x > 0.0 ^ y < 0.0) | 396 if (x > 0.0 ^ y < 0.0) |
| 394 » » » » return (nextafter(z, INFINITY)); | 397 return (nextafter(z, INFINITY)); |
| 395 » » » else | 398 else |
| 396 » » » » return (z); | 399 return (z); |
| 397 #endif | 400 #endif |
| 398 » » } | 401 } |
| 399 » } | 402 } |
| 400 » if (spread <= DBL_MANT_DIG * 2) | 403 if (spread <= DBL_MANT_DIG * 2) |
| 401 » » zs = scalbn(zs, -spread); | 404 zs = scalbn(zs, -spread); |
| 402 » else | 405 else |
| 403 » » zs = copysign(DBL_MIN, zs); | 406 zs = copysign(DBL_MIN, zs); |
| 404 | 407 |
| 405 » fesetround(FE_TONEAREST); | 408 fesetround(FE_TONEAREST); |
| 406 | 409 |
| 407 » /* | 410 /* |
| 408 » * Basic approach for round-to-nearest: | 411 * Basic approach for round-to-nearest: |
| 409 » * | 412 * |
| 410 » * (xy.hi, xy.lo) = x * y (exact) | 413 * (xy.hi, xy.lo) = x * y (exact) |
| 411 » * (r.hi, r.lo) = xy.hi + z (exact) | 414 * (r.hi, r.lo) = xy.hi + z (exact) |
| 412 » * adj = xy.lo + r.lo (inexact; low bit is sticky) | 415 * adj = xy.lo + r.lo (inexact; low bit is sticky) |
| 413 » * result = r.hi + adj (correctly rounded) | 416 * result = r.hi + adj (correctly rounded) |
| 414 » */ | 417 */ |
| 415 » xy = dd_mul(xs, ys); | 418 xy = dd_mul(xs, ys); |
| 416 » r = dd_add(xy.hi, zs); | 419 r = dd_add(xy.hi, zs); |
| 417 | 420 |
| 418 » spread = ex + ey; | 421 spread = ex + ey; |
| 419 | 422 |
| 420 » if (r.hi == 0.0) { | 423 if (r.hi == 0.0) { |
| 421 » » /* | 424 /* |
| 422 » » * When the addends cancel to 0, ensure that the result has | 425 * When the addends cancel to 0, ensure that the result has |
| 423 » » * the correct sign. | 426 * the correct sign. |
| 424 » » */ | 427 */ |
| 425 » » fesetround(oround); | 428 fesetround(oround); |
| 426 » » volatile double vzs = zs; /* XXX gcc CSE bug workaround */ | 429 volatile double vzs = zs; /* XXX gcc CSE bug workaround */ |
| 427 » » return xy.hi + vzs + scalbn(xy.lo, spread); | 430 return xy.hi + vzs + scalbn(xy.lo, spread); |
| 428 » } | 431 } |
| 429 | 432 |
| 430 » if (oround != FE_TONEAREST) { | 433 if (oround != FE_TONEAREST) { |
| 431 » » /* | 434 /* |
| 432 » » * There is no need to worry about double rounding in directed | 435 * There is no need to worry about double rounding in directed |
| 433 » » * rounding modes. | 436 * rounding modes. |
| 434 » » * But underflow may not be raised properly, example in downward
rounding: | 437 * But underflow may not be raised properly, example in downward rounding: |
| 435 » » * fma(0x1.000000001p-1000, 0x1.000000001p-30, -0x1p-1066) | 438 * fma(0x1.000000001p-1000, 0x1.000000001p-30, -0x1p-1066) |
| 436 » » */ | 439 */ |
| 437 » » double ret; | 440 double ret; |
| 438 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW) | 441 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW) |
| 439 » » int e = fetestexcept(FE_INEXACT); | 442 int e = fetestexcept(FE_INEXACT); |
| 440 » » feclearexcept(FE_INEXACT); | 443 feclearexcept(FE_INEXACT); |
| 441 #endif | 444 #endif |
| 442 » » fesetround(oround); | 445 fesetround(oround); |
| 443 » » adj = r.lo + xy.lo; | 446 adj = r.lo + xy.lo; |
| 444 » » ret = scalbn(r.hi + adj, spread); | 447 ret = scalbn(r.hi + adj, spread); |
| 445 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW) | 448 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW) |
| 446 » » if (ilogb(ret) < -1022 && fetestexcept(FE_INEXACT)) | 449 if (ilogb(ret) < -1022 && fetestexcept(FE_INEXACT)) |
| 447 » » » feraiseexcept(FE_UNDERFLOW); | 450 feraiseexcept(FE_UNDERFLOW); |
| 448 » » else if (e) | 451 else if (e) |
| 449 » » » feraiseexcept(FE_INEXACT); | 452 feraiseexcept(FE_INEXACT); |
| 450 #endif | 453 #endif |
| 451 » » return ret; | 454 return ret; |
| 452 » } | 455 } |
| 453 | 456 |
| 454 » adj = add_adjusted(r.lo, xy.lo); | 457 adj = add_adjusted(r.lo, xy.lo); |
| 455 » if (spread + ilogb(r.hi) > -1023) | 458 if (spread + ilogb(r.hi) > -1023) |
| 456 » » return scalbn(r.hi + adj, spread); | 459 return scalbn(r.hi + adj, spread); |
| 457 » else | 460 else |
| 458 » » return add_and_denormalize(r.hi, adj, spread); | 461 return add_and_denormalize(r.hi, adj, spread); |
| 459 } | 462 } |
| 460 #endif | 463 #endif |
| OLD | NEW |