Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(86)

Side by Side Diff: fusl/src/math/fma.c

Issue 1573973002: Add a "fork" of musl as //fusl. (Closed) Base URL: https://github.com/domokit/mojo.git@master
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « fusl/src/math/floorl.c ('k') | fusl/src/math/fmaf.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #include <fenv.h>
2 #include "libm.h"
3
4 #if LDBL_MANT_DIG==64 && LDBL_MAX_EXP==16384
5 /* exact add, assumes exponent_x >= exponent_y */
6 static void add(long double *hi, long double *lo, long double x, long double y)
7 {
8 long double r;
9
10 r = x + y;
11 *hi = r;
12 r -= x;
13 *lo = y - r;
14 }
15
16 /* exact mul, assumes no over/underflow */
17 static void mul(long double *hi, long double *lo, long double x, long double y)
18 {
19 static const long double c = 1.0 + 0x1p32L;
20 long double cx, xh, xl, cy, yh, yl;
21
22 cx = c*x;
23 xh = (x - cx) + cx;
24 xl = x - xh;
25 cy = c*y;
26 yh = (y - cy) + cy;
27 yl = y - yh;
28 *hi = x*y;
29 *lo = (xh*yh - *hi) + xh*yl + xl*yh + xl*yl;
30 }
31
32 /*
33 assume (long double)(hi+lo) == hi
34 return an adjusted hi so that rounding it to double (or less) precision is corre ct
35 */
36 static long double adjust(long double hi, long double lo)
37 {
38 union ldshape uhi, ulo;
39
40 if (lo == 0)
41 return hi;
42 uhi.f = hi;
43 if (uhi.i.m & 0x3ff)
44 return hi;
45 ulo.f = lo;
46 if ((uhi.i.se & 0x8000) == (ulo.i.se & 0x8000))
47 uhi.i.m++;
48 else {
49 /* handle underflow and take care of ld80 implicit msb */
50 if (uhi.i.m << 1 == 0) {
51 uhi.i.m = 0;
52 uhi.i.se--;
53 }
54 uhi.i.m--;
55 }
56 return uhi.f;
57 }
58
59 /* adjusted add so the result is correct when rounded to double (or less) precis ion */
60 static long double dadd(long double x, long double y)
61 {
62 add(&x, &y, x, y);
63 return adjust(x, y);
64 }
65
66 /* adjusted mul so the result is correct when rounded to double (or less) precis ion */
67 static long double dmul(long double x, long double y)
68 {
69 mul(&x, &y, x, y);
70 return adjust(x, y);
71 }
72
73 static int getexp(long double x)
74 {
75 union ldshape u;
76 u.f = x;
77 return u.i.se & 0x7fff;
78 }
79
80 double fma(double x, double y, double z)
81 {
82 #pragma STDC FENV_ACCESS ON
83 long double hi, lo1, lo2, xy;
84 int round, ez, exy;
85
86 /* handle +-inf,nan */
87 if (!isfinite(x) || !isfinite(y))
88 return x*y + z;
89 if (!isfinite(z))
90 return z;
91 /* handle +-0 */
92 if (x == 0.0 || y == 0.0)
93 return x*y + z;
94 round = fegetround();
95 if (z == 0.0) {
96 if (round == FE_TONEAREST)
97 return dmul(x, y);
98 return x*y;
99 }
100
101 /* exact mul and add require nearest rounding */
102 /* spurious inexact exceptions may be raised */
103 fesetround(FE_TONEAREST);
104 mul(&xy, &lo1, x, y);
105 exy = getexp(xy);
106 ez = getexp(z);
107 if (ez > exy) {
108 add(&hi, &lo2, z, xy);
109 } else if (ez > exy - 12) {
110 add(&hi, &lo2, xy, z);
111 if (hi == 0) {
112 /*
113 xy + z is 0, but it should be calculated with the
114 original rounding mode so the sign is correct, if the
115 compiler does not support FENV_ACCESS ON it does not
116 know about the changed rounding mode and eliminates
117 the xy + z below without the volatile memory access
118 */
119 volatile double z_;
120 fesetround(round);
121 z_ = z;
122 return (xy + z_) + lo1;
123 }
124 } else {
125 /*
126 ez <= exy - 12
127 the 12 extra bits (1guard, 11round+sticky) are needed so with
128 lo = dadd(lo1, lo2)
129 elo <= ehi - 11, and we use the last 10 bits in adjust so
130 dadd(hi, lo)
131 gives correct result when rounded to double
132 */
133 hi = xy;
134 lo2 = z;
135 }
136 /*
137 the result is stored before return for correct precision and exceptions
138
139 one corner case is when the underflow flag should be raised because
140 the precise result is an inexact subnormal double, but the calculated
141 long double result is an exact subnormal double
142 (so rounding to double does not raise exceptions)
143
144 in nearest rounding mode dadd takes care of this: the last bit of the
145 result is adjusted so rounding sees an inexact value when it should
146
147 in non-nearest rounding mode fenv is used for the workaround
148 */
149 fesetround(round);
150 if (round == FE_TONEAREST)
151 z = dadd(hi, dadd(lo1, lo2));
152 else {
153 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
154 int e = fetestexcept(FE_INEXACT);
155 feclearexcept(FE_INEXACT);
156 #endif
157 z = hi + (lo1 + lo2);
158 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
159 if (getexp(z) < 0x3fff-1022 && fetestexcept(FE_INEXACT))
160 feraiseexcept(FE_UNDERFLOW);
161 else if (e)
162 feraiseexcept(FE_INEXACT);
163 #endif
164 }
165 return z;
166 }
167 #else
168 /* origin: FreeBSD /usr/src/lib/msun/src/s_fma.c */
169 /*-
170 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
171 * All rights reserved.
172 *
173 * Redistribution and use in source and binary forms, with or without
174 * modification, are permitted provided that the following conditions
175 * are met:
176 * 1. Redistributions of source code must retain the above copyright
177 * notice, this list of conditions and the following disclaimer.
178 * 2. Redistributions in binary form must reproduce the above copyright
179 * notice, this list of conditions and the following disclaimer in the
180 * documentation and/or other materials provided with the distribution.
181 *
182 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
183 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
184 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
185 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
186 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
187 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
188 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
189 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
190 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
191 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
192 * SUCH DAMAGE.
193 */
194
195 /*
196 * A struct dd represents a floating-point number with twice the precision
197 * of a double. We maintain the invariant that "hi" stores the 53 high-order
198 * bits of the result.
199 */
200 struct dd {
201 double hi;
202 double lo;
203 };
204
205 /*
206 * Compute a+b exactly, returning the exact result in a struct dd. We assume
207 * that both a and b are finite, but make no assumptions about their relative
208 * magnitudes.
209 */
210 static inline struct dd dd_add(double a, double b)
211 {
212 struct dd ret;
213 double s;
214
215 ret.hi = a + b;
216 s = ret.hi - a;
217 ret.lo = (a - (ret.hi - s)) + (b - s);
218 return (ret);
219 }
220
221 /*
222 * Compute a+b, with a small tweak: The least significant bit of the
223 * result is adjusted into a sticky bit summarizing all the bits that
224 * were lost to rounding. This adjustment negates the effects of double
225 * rounding when the result is added to another number with a higher
226 * exponent. For an explanation of round and sticky bits, see any reference
227 * on FPU design, e.g.,
228 *
229 * J. Coonen. An Implementation Guide to a Proposed Standard for
230 * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980.
231 */
232 static inline double add_adjusted(double a, double b)
233 {
234 struct dd sum;
235 union {double f; uint64_t i;} uhi, ulo;
236
237 sum = dd_add(a, b);
238 if (sum.lo != 0) {
239 uhi.f = sum.hi;
240 if ((uhi.i & 1) == 0) {
241 /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */
242 ulo.f = sum.lo;
243 uhi.i += 1 - ((uhi.i ^ ulo.i) >> 62);
244 sum.hi = uhi.f;
245 }
246 }
247 return (sum.hi);
248 }
249
250 /*
251 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed
252 * that the result will be subnormal, and care is taken to ensure that
253 * double rounding does not occur.
254 */
255 static inline double add_and_denormalize(double a, double b, int scale)
256 {
257 struct dd sum;
258 union {double f; uint64_t i;} uhi, ulo;
259 int bits_lost;
260
261 sum = dd_add(a, b);
262
263 /*
264 * If we are losing at least two bits of accuracy to denormalization,
265 * then the first lost bit becomes a round bit, and we adjust the
266 * lowest bit of sum.hi to make it a sticky bit summarizing all the
267 * bits in sum.lo. With the sticky bit adjusted, the hardware will
268 * break any ties in the correct direction.
269 *
270 * If we are losing only one bit to denormalization, however, we must
271 * break the ties manually.
272 */
273 if (sum.lo != 0) {
274 uhi.f = sum.hi;
275 bits_lost = -((int)(uhi.i >> 52) & 0x7ff) - scale + 1;
276 if ((bits_lost != 1) ^ (int)(uhi.i & 1)) {
277 /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */
278 ulo.f = sum.lo;
279 uhi.i += 1 - (((uhi.i ^ ulo.i) >> 62) & 2);
280 sum.hi = uhi.f;
281 }
282 }
283 return scalbn(sum.hi, scale);
284 }
285
286 /*
287 * Compute a*b exactly, returning the exact result in a struct dd. We assume
288 * that both a and b are normalized, so no underflow or overflow will occur.
289 * The current rounding mode must be round-to-nearest.
290 */
291 static inline struct dd dd_mul(double a, double b)
292 {
293 static const double split = 0x1p27 + 1.0;
294 struct dd ret;
295 double ha, hb, la, lb, p, q;
296
297 p = a * split;
298 ha = a - p;
299 ha += p;
300 la = a - ha;
301
302 p = b * split;
303 hb = b - p;
304 hb += p;
305 lb = b - hb;
306
307 p = ha * hb;
308 q = ha * lb + la * hb;
309
310 ret.hi = p + q;
311 ret.lo = p - ret.hi + q + la * lb;
312 return (ret);
313 }
314
315 /*
316 * Fused multiply-add: Compute x * y + z with a single rounding error.
317 *
318 * We use scaling to avoid overflow/underflow, along with the
319 * canonical precision-doubling technique adapted from:
320 *
321 * Dekker, T. A Floating-Point Technique for Extending the
322 * Available Precision. Numer. Math. 18, 224-242 (1971).
323 *
324 * This algorithm is sensitive to the rounding precision. FPUs such
325 * as the i387 must be set in double-precision mode if variables are
326 * to be stored in FP registers in order to avoid incorrect results.
327 * This is the default on FreeBSD, but not on many other systems.
328 *
329 * Hardware instructions should be used on architectures that support it,
330 * since this implementation will likely be several times slower.
331 */
332 double fma(double x, double y, double z)
333 {
334 #pragma STDC FENV_ACCESS ON
335 double xs, ys, zs, adj;
336 struct dd xy, r;
337 int oround;
338 int ex, ey, ez;
339 int spread;
340
341 /*
342 * Handle special cases. The order of operations and the particular
343 * return values here are crucial in handling special cases involving
344 * infinities, NaNs, overflows, and signed zeroes correctly.
345 */
346 if (!isfinite(x) || !isfinite(y))
347 return (x * y + z);
348 if (!isfinite(z))
349 return (z);
350 if (x == 0.0 || y == 0.0)
351 return (x * y + z);
352 if (z == 0.0)
353 return (x * y);
354
355 xs = frexp(x, &ex);
356 ys = frexp(y, &ey);
357 zs = frexp(z, &ez);
358 oround = fegetround();
359 spread = ex + ey - ez;
360
361 /*
362 * If x * y and z are many orders of magnitude apart, the scaling
363 * will overflow, so we handle these cases specially. Rounding
364 * modes other than FE_TONEAREST are painful.
365 */
366 if (spread < -DBL_MANT_DIG) {
367 #ifdef FE_INEXACT
368 feraiseexcept(FE_INEXACT);
369 #endif
370 #ifdef FE_UNDERFLOW
371 if (!isnormal(z))
372 feraiseexcept(FE_UNDERFLOW);
373 #endif
374 switch (oround) {
375 default: /* FE_TONEAREST */
376 return (z);
377 #ifdef FE_TOWARDZERO
378 case FE_TOWARDZERO:
379 if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
380 return (z);
381 else
382 return (nextafter(z, 0));
383 #endif
384 #ifdef FE_DOWNWARD
385 case FE_DOWNWARD:
386 if (x > 0.0 ^ y < 0.0)
387 return (z);
388 else
389 return (nextafter(z, -INFINITY));
390 #endif
391 #ifdef FE_UPWARD
392 case FE_UPWARD:
393 if (x > 0.0 ^ y < 0.0)
394 return (nextafter(z, INFINITY));
395 else
396 return (z);
397 #endif
398 }
399 }
400 if (spread <= DBL_MANT_DIG * 2)
401 zs = scalbn(zs, -spread);
402 else
403 zs = copysign(DBL_MIN, zs);
404
405 fesetround(FE_TONEAREST);
406
407 /*
408 * Basic approach for round-to-nearest:
409 *
410 * (xy.hi, xy.lo) = x * y (exact)
411 * (r.hi, r.lo) = xy.hi + z (exact)
412 * adj = xy.lo + r.lo (inexact; low bit is sticky)
413 * result = r.hi + adj (correctly rounded)
414 */
415 xy = dd_mul(xs, ys);
416 r = dd_add(xy.hi, zs);
417
418 spread = ex + ey;
419
420 if (r.hi == 0.0) {
421 /*
422 * When the addends cancel to 0, ensure that the result has
423 * the correct sign.
424 */
425 fesetround(oround);
426 volatile double vzs = zs; /* XXX gcc CSE bug workaround */
427 return xy.hi + vzs + scalbn(xy.lo, spread);
428 }
429
430 if (oround != FE_TONEAREST) {
431 /*
432 * There is no need to worry about double rounding in directed
433 * rounding modes.
434 * But underflow may not be raised properly, example in downward rounding:
435 * fma(0x1.000000001p-1000, 0x1.000000001p-30, -0x1p-1066)
436 */
437 double ret;
438 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
439 int e = fetestexcept(FE_INEXACT);
440 feclearexcept(FE_INEXACT);
441 #endif
442 fesetround(oround);
443 adj = r.lo + xy.lo;
444 ret = scalbn(r.hi + adj, spread);
445 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
446 if (ilogb(ret) < -1022 && fetestexcept(FE_INEXACT))
447 feraiseexcept(FE_UNDERFLOW);
448 else if (e)
449 feraiseexcept(FE_INEXACT);
450 #endif
451 return ret;
452 }
453
454 adj = add_adjusted(r.lo, xy.lo);
455 if (spread + ilogb(r.hi) > -1023)
456 return scalbn(r.hi + adj, spread);
457 else
458 return add_and_denormalize(r.hi, adj, spread);
459 }
460 #endif
OLDNEW
« no previous file with comments | « fusl/src/math/floorl.c ('k') | fusl/src/math/fmaf.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698