fusl/src/math/fma.c - Issue 1573973002: Add a "fork" of musl as //fusl.

Side by Side Diff: fusl/src/math/fma.c

Issue 1573973002: Add a "fork" of musl as //fusl. (Closed) Base URL: https://github.com/domokit/mojo.git@master

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 #include <fenv.h>

	2 #include "libm.h"

	3

	4 #if LDBL_MANT_DIG==64 && LDBL_MAX_EXP==16384

	5 /* exact add, assumes exponent_x >= exponent_y */

	6 static void add(long double hi, long double lo, long double x, long double y)

	7 {

	8 long double r;

	9

	10 r = x + y;

	11 *hi = r;

	12 r -= x;

	13 *lo = y - r;

	14 }

	15

	16 /* exact mul, assumes no over/underflow */

	17 static void mul(long double hi, long double lo, long double x, long double y)

	18 {

	19 static const long double c = 1.0 + 0x1p32L;

	20 long double cx, xh, xl, cy, yh, yl;

	21

	22 cx = c*x;

	23 xh = (x - cx) + cx;

	24 xl = x - xh;

	25 cy = c*y;

	26 yh = (y - cy) + cy;

	27 yl = y - yh;

	28 hi = xy;

	29 lo = (xhyh - hi) + xhyl + xlyh + xlyl;

	30 }

	31

	32 /*

	33 assume (long double)(hi+lo) == hi

	34 return an adjusted hi so that rounding it to double (or less) precision is corre ct

	35 */

	36 static long double adjust(long double hi, long double lo)

	37 {

	38 union ldshape uhi, ulo;

	39

	40 if (lo == 0)

	41 return hi;

	42 uhi.f = hi;

	43 if (uhi.i.m & 0x3ff)

	44 return hi;

	45 ulo.f = lo;

	46 if ((uhi.i.se & 0x8000) == (ulo.i.se & 0x8000))

	47 uhi.i.m++;

	48 else {

	49 /* handle underflow and take care of ld80 implicit msb */

	50 if (uhi.i.m << 1 == 0) {

	51 uhi.i.m = 0;

	52 uhi.i.se--;

	53 }

	54 uhi.i.m--;

	55 }

	56 return uhi.f;

	57 }

	58

	59 /* adjusted add so the result is correct when rounded to double (or less) precis ion */

	60 static long double dadd(long double x, long double y)

	61 {

	62 add(&x, &y, x, y);

	63 return adjust(x, y);

	64 }

	65

	66 /* adjusted mul so the result is correct when rounded to double (or less) precis ion */

	67 static long double dmul(long double x, long double y)

	68 {

	69 mul(&x, &y, x, y);

	70 return adjust(x, y);

	71 }

	72

	73 static int getexp(long double x)

	74 {

	75 union ldshape u;

	76 u.f = x;

	77 return u.i.se & 0x7fff;

	78 }

	79

	80 double fma(double x, double y, double z)

	81 {

	82 #pragma STDC FENV_ACCESS ON

	83 long double hi, lo1, lo2, xy;

	84 int round, ez, exy;

	85

	86 /* handle +-inf,nan */

	87 if (!isfinite(x) \|\| !isfinite(y))

	88 return x*y + z;

	89 if (!isfinite(z))

	90 return z;

	91 /* handle +-0 */

	92 if (x == 0.0 \|\| y == 0.0)

	93 return x*y + z;

	94 round = fegetround();

	95 if (z == 0.0) {

	96 if (round == FE_TONEAREST)

	97 return dmul(x, y);

	98 return x*y;

	99 }

	100

	101 /* exact mul and add require nearest rounding */

	102 /* spurious inexact exceptions may be raised */

	103 fesetround(FE_TONEAREST);

	104 mul(&xy, &lo1, x, y);

	105 exy = getexp(xy);

	106 ez = getexp(z);

	107 if (ez > exy) {

	108 add(&hi, &lo2, z, xy);

	109 } else if (ez > exy - 12) {

	110 add(&hi, &lo2, xy, z);

	111 if (hi == 0) {

	112 /*

	113 xy + z is 0, but it should be calculated with the

	114 original rounding mode so the sign is correct, if the

	115 compiler does not support FENV_ACCESS ON it does not

	116 know about the changed rounding mode and eliminates

	117 the xy + z below without the volatile memory access

	118 */

	119 volatile double z_;

	120 fesetround(round);

	121 z_ = z;

	122 return (xy + z_) + lo1;

	123 }

	124 } else {

	125 /*

	126 ez <= exy - 12

	127 the 12 extra bits (1guard, 11round+sticky) are needed so with

	128 lo = dadd(lo1, lo2)

	129 elo <= ehi - 11, and we use the last 10 bits in adjust so

	130 dadd(hi, lo)

	131 gives correct result when rounded to double

	132 */

	133 hi = xy;

	134 lo2 = z;

	135 }

	136 /*

	137 the result is stored before return for correct precision and exceptions

	138

	139 one corner case is when the underflow flag should be raised because

	140 the precise result is an inexact subnormal double, but the calculated

	141 long double result is an exact subnormal double

	142 (so rounding to double does not raise exceptions)

	143

	144 in nearest rounding mode dadd takes care of this: the last bit of the

	145 result is adjusted so rounding sees an inexact value when it should

	146

	147 in non-nearest rounding mode fenv is used for the workaround

	148 */

	149 fesetround(round);

	150 if (round == FE_TONEAREST)

	151 z = dadd(hi, dadd(lo1, lo2));

	152 else {

	153 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)

	154 int e = fetestexcept(FE_INEXACT);

	155 feclearexcept(FE_INEXACT);

	156 #endif

	157 z = hi + (lo1 + lo2);

	158 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)

	159 if (getexp(z) < 0x3fff-1022 && fetestexcept(FE_INEXACT))

	160 feraiseexcept(FE_UNDERFLOW);

	161 else if (e)

	162 feraiseexcept(FE_INEXACT);

	163 #endif

	164 }

	165 return z;

	166 }

	167 #else

	168 /* origin: FreeBSD /usr/src/lib/msun/src/s_fma.c */

	169 /*-

	170 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>

	171 * All rights reserved.

	172 *

	173 * Redistribution and use in source and binary forms, with or without

	174 * modification, are permitted provided that the following conditions

	175 * are met:

	176 * 1. Redistributions of source code must retain the above copyright

	177 * notice, this list of conditions and the following disclaimer.

	178 * 2. Redistributions in binary form must reproduce the above copyright

	179 * notice, this list of conditions and the following disclaimer in the

	180 * documentation and/or other materials provided with the distribution.

	181 *

	182 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND

	183 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

	184 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

	185 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE

	186 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

	187 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS

	188 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

	189 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

	190 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

	191 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

	192 * SUCH DAMAGE.

	193 */

	194

	195 /*

	196 * A struct dd represents a floating-point number with twice the precision

	197 * of a double. We maintain the invariant that "hi" stores the 53 high-order

	198 * bits of the result.

	199 */

	200 struct dd {

	201 double hi;

	202 double lo;

	203 };

	204

	205 /*

	206 * Compute a+b exactly, returning the exact result in a struct dd. We assume

	207 * that both a and b are finite, but make no assumptions about their relative

	208 * magnitudes.

	209 */

	210 static inline struct dd dd_add(double a, double b)

	211 {

	212 struct dd ret;

	213 double s;

	214

	215 ret.hi = a + b;

	216 s = ret.hi - a;

	217 ret.lo = (a - (ret.hi - s)) + (b - s);

	218 return (ret);

	219 }

	220

	221 /*

	222 * Compute a+b, with a small tweak: The least significant bit of the

	223 * result is adjusted into a sticky bit summarizing all the bits that

	224 * were lost to rounding. This adjustment negates the effects of double

	225 * rounding when the result is added to another number with a higher

	226 * exponent. For an explanation of round and sticky bits, see any reference

	227 * on FPU design, e.g.,

	228 *

	229 * J. Coonen. An Implementation Guide to a Proposed Standard for

	230 * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980.

	231 */

	232 static inline double add_adjusted(double a, double b)

	233 {

	234 struct dd sum;

	235 union {double f; uint64_t i;} uhi, ulo;

	236

	237 sum = dd_add(a, b);

	238 if (sum.lo != 0) {

	239 uhi.f = sum.hi;

	240 if ((uhi.i & 1) == 0) {

	241 /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */

	242 ulo.f = sum.lo;

	243 uhi.i += 1 - ((uhi.i ^ ulo.i) >> 62);

	244 sum.hi = uhi.f;

	245 }

	246 }

	247 return (sum.hi);

	248 }

	249

	250 /*

	251 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed

	252 * that the result will be subnormal, and care is taken to ensure that

	253 * double rounding does not occur.

	254 */

	255 static inline double add_and_denormalize(double a, double b, int scale)

	256 {

	257 struct dd sum;

	258 union {double f; uint64_t i;} uhi, ulo;

	259 int bits_lost;

	260

	261 sum = dd_add(a, b);

	262

	263 /*

	264 * If we are losing at least two bits of accuracy to denormalization,

	265 * then the first lost bit becomes a round bit, and we adjust the

	266 * lowest bit of sum.hi to make it a sticky bit summarizing all the

	267 * bits in sum.lo. With the sticky bit adjusted, the hardware will

	268 * break any ties in the correct direction.

	269 *

	270 * If we are losing only one bit to denormalization, however, we must

	271 * break the ties manually.

	272 */

	273 if (sum.lo != 0) {

	274 uhi.f = sum.hi;

	275 bits_lost = -((int)(uhi.i >> 52) & 0x7ff) - scale + 1;

	276 if ((bits_lost != 1) ^ (int)(uhi.i & 1)) {

	277 /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */

	278 ulo.f = sum.lo;

	279 uhi.i += 1 - (((uhi.i ^ ulo.i) >> 62) & 2);

	280 sum.hi = uhi.f;

	281 }

	282 }

	283 return scalbn(sum.hi, scale);

	284 }

	285

	286 /*

	287 * Compute a*b exactly, returning the exact result in a struct dd. We assume

	288 * that both a and b are normalized, so no underflow or overflow will occur.

	289 * The current rounding mode must be round-to-nearest.

	290 */

	291 static inline struct dd dd_mul(double a, double b)

	292 {

	293 static const double split = 0x1p27 + 1.0;

	294 struct dd ret;

	295 double ha, hb, la, lb, p, q;

	296

	297 p = a * split;

	298 ha = a - p;

	299 ha += p;

	300 la = a - ha;

	301

	302 p = b * split;

	303 hb = b - p;

	304 hb += p;

	305 lb = b - hb;

	306

	307 p = ha * hb;

	308 q = ha * lb + la * hb;

	309

	310 ret.hi = p + q;

	311 ret.lo = p - ret.hi + q + la * lb;

	312 return (ret);

	313 }

	314

	315 /*

	316 * Fused multiply-add: Compute x * y + z with a single rounding error.

	317 *

	318 * We use scaling to avoid overflow/underflow, along with the

	319 * canonical precision-doubling technique adapted from:

	320 *

	321 * Dekker, T. A Floating-Point Technique for Extending the

	322 * Available Precision. Numer. Math. 18, 224-242 (1971).

	323 *

	324 * This algorithm is sensitive to the rounding precision. FPUs such

	325 * as the i387 must be set in double-precision mode if variables are

	326 * to be stored in FP registers in order to avoid incorrect results.

	327 * This is the default on FreeBSD, but not on many other systems.

	328 *

	329 * Hardware instructions should be used on architectures that support it,

	330 * since this implementation will likely be several times slower.

	331 */

	332 double fma(double x, double y, double z)

	333 {

	334 #pragma STDC FENV_ACCESS ON

	335 double xs, ys, zs, adj;

	336 struct dd xy, r;

	337 int oround;

	338 int ex, ey, ez;

	339 int spread;

	340

	341 /*

	342 * Handle special cases. The order of operations and the particular

	343 * return values here are crucial in handling special cases involving

	344 * infinities, NaNs, overflows, and signed zeroes correctly.

	345 */

	346 if (!isfinite(x) \|\| !isfinite(y))

	347 return (x * y + z);

	348 if (!isfinite(z))

	349 return (z);

	350 if (x == 0.0 \|\| y == 0.0)

	351 return (x * y + z);

	352 if (z == 0.0)

	353 return (x * y);

	354

	355 xs = frexp(x, &ex);

	356 ys = frexp(y, &ey);

	357 zs = frexp(z, &ez);

	358 oround = fegetround();

	359 spread = ex + ey - ez;

	360

	361 /*

	362 * If x * y and z are many orders of magnitude apart, the scaling

	363 * will overflow, so we handle these cases specially. Rounding

	364 * modes other than FE_TONEAREST are painful.

	365 */

	366 if (spread < -DBL_MANT_DIG) {

	367 #ifdef FE_INEXACT

	368 feraiseexcept(FE_INEXACT);

	369 #endif

	370 #ifdef FE_UNDERFLOW

	371 if (!isnormal(z))

	372 feraiseexcept(FE_UNDERFLOW);

	373 #endif

	374 switch (oround) {

	375 default: /* FE_TONEAREST */

	376 return (z);

	377 #ifdef FE_TOWARDZERO

	378 case FE_TOWARDZERO:

	379 if (x > 0.0 ^ y < 0.0 ^ z < 0.0)

	380 return (z);

	381 else

	382 return (nextafter(z, 0));

	383 #endif

	384 #ifdef FE_DOWNWARD

	385 case FE_DOWNWARD:

	386 if (x > 0.0 ^ y < 0.0)

	387 return (z);

	388 else

	389 return (nextafter(z, -INFINITY));

	390 #endif

	391 #ifdef FE_UPWARD

	392 case FE_UPWARD:

	393 if (x > 0.0 ^ y < 0.0)

	394 return (nextafter(z, INFINITY));

	395 else

	396 return (z);

	397 #endif

	398 }

	399 }

	400 if (spread <= DBL_MANT_DIG * 2)

	401 zs = scalbn(zs, -spread);

	402 else

	403 zs = copysign(DBL_MIN, zs);

	404

	405 fesetround(FE_TONEAREST);

	406

	407 /*

	408 * Basic approach for round-to-nearest:

	409 *

	410 * (xy.hi, xy.lo) = x * y (exact)

	411 * (r.hi, r.lo) = xy.hi + z (exact)

	412 * adj = xy.lo + r.lo (inexact; low bit is sticky)

	413 * result = r.hi + adj (correctly rounded)

	414 */

	415 xy = dd_mul(xs, ys);

	416 r = dd_add(xy.hi, zs);

	417

	418 spread = ex + ey;

	419

	420 if (r.hi == 0.0) {

	421 /*

	422 * When the addends cancel to 0, ensure that the result has

	423 * the correct sign.

	424 */

	425 fesetround(oround);

	426 volatile double vzs = zs; /* XXX gcc CSE bug workaround */

	427 return xy.hi + vzs + scalbn(xy.lo, spread);

	428 }

	429

	430 if (oround != FE_TONEAREST) {

	431 /*

	432 * There is no need to worry about double rounding in directed

	433 * rounding modes.

	434 * But underflow may not be raised properly, example in downward rounding:

	435 * fma(0x1.000000001p-1000, 0x1.000000001p-30, -0x1p-1066)

	436 */

	437 double ret;

	438 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)

	439 int e = fetestexcept(FE_INEXACT);

	440 feclearexcept(FE_INEXACT);

	441 #endif

	442 fesetround(oround);

	443 adj = r.lo + xy.lo;

	444 ret = scalbn(r.hi + adj, spread);

	445 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)

	446 if (ilogb(ret) < -1022 && fetestexcept(FE_INEXACT))

	447 feraiseexcept(FE_UNDERFLOW);

	448 else if (e)

	449 feraiseexcept(FE_INEXACT);

	450 #endif

	451 return ret;

	452 }

	453

	454 adj = add_adjusted(r.lo, xy.lo);

	455 if (spread + ilogb(r.hi) > -1023)

	456 return scalbn(r.hi + adj, spread);

	457 else

	458 return add_and_denormalize(r.hi, adj, spread);

	459 }

	460 #endif

OLD	NEW

« no previous file with comments | « fusl/src/math/floorl.c ('k') | fusl/src/math/fmaf.c » ('j') | no next file with comments »