fusl/src/math/fma.c - Issue 1714623002: [fusl] clang-format fusl

Side by Side Diff: fusl/src/math/fma.c

Issue 1714623002: [fusl] clang-format fusl (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: headers too Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 #include <fenv.h>	1 #include <fenv.h>

2 #include "libm.h"	2 #include "libm.h"

3	3

4 #if LDBL_MANT_DIG==64 && LDBL_MAX_EXP==16384	4 #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384

5 /* exact add, assumes exponent_x >= exponent_y */	5 /* exact add, assumes exponent_x >= exponent_y */

6 static void add(long double hi, long double lo, long double x, long double y)	6 static void add(long double* hi,

7 {	7 long double* lo,

8 » long double r;	8 long double x,

	9 long double y) {

	10 long double r;

9	11

10 » r = x + y;	12 r = x + y;

11 » *hi = r;	13 *hi = r;

12 » r -= x;	14 r -= x;

13 » *lo = y - r;	15 *lo = y - r;

14 }	16 }

15	17

16 /* exact mul, assumes no over/underflow */	18 /* exact mul, assumes no over/underflow */

17 static void mul(long double hi, long double lo, long double x, long double y)	19 static void mul(long double* hi,

18 {	20 long double* lo,

19 » static const long double c = 1.0 + 0x1p32L;	21 long double x,

20 » long double cx, xh, xl, cy, yh, yl;	22 long double y) {

	23 static const long double c = 1.0 + 0x1p32L;

	24 long double cx, xh, xl, cy, yh, yl;

21	25

22 » cx = c*x;	26 cx = c * x;

23 » xh = (x - cx) + cx;	27 xh = (x - cx) + cx;

24 » xl = x - xh;	28 xl = x - xh;

25 » cy = c*y;	29 cy = c * y;

26 » yh = (y - cy) + cy;	30 yh = (y - cy) + cy;

27 » yl = y - yh;	31 yl = y - yh;

28 » hi = xy;	32 hi = x y;

29 » lo = (xhyh - hi) + xhyl + xlyh + xlyl;	33 lo = (xh yh - hi) + xh yl + xl * yh + xl * yl;

30 }	34 }

31	35

32 /*	36 /*

33 assume (long double)(hi+lo) == hi	37 assume (long double)(hi+lo) == hi

34 return an adjusted hi so that rounding it to double (or less) precision is corre ct	38 return an adjusted hi so that rounding it to double (or less) precision is

	39 correct

35 */	40 */

36 static long double adjust(long double hi, long double lo)	41 static long double adjust(long double hi, long double lo) {

37 {	42 union ldshape uhi, ulo;

38 » union ldshape uhi, ulo;

39	43

40 » if (lo == 0)	44 if (lo == 0)

41 » » return hi;	45 return hi;

42 » uhi.f = hi;	46 uhi.f = hi;

43 » if (uhi.i.m & 0x3ff)	47 if (uhi.i.m & 0x3ff)

44 » » return hi;	48 return hi;

45 » ulo.f = lo;	49 ulo.f = lo;

46 » if ((uhi.i.se & 0x8000) == (ulo.i.se & 0x8000))	50 if ((uhi.i.se & 0x8000) == (ulo.i.se & 0x8000))

47 » » uhi.i.m++;	51 uhi.i.m++;

48 » else {	52 else {

49 » » /* handle underflow and take care of ld80 implicit msb */	53 /* handle underflow and take care of ld80 implicit msb */

50 » » if (uhi.i.m << 1 == 0) {	54 if (uhi.i.m << 1 == 0) {

51 » » » uhi.i.m = 0;	55 uhi.i.m = 0;

52 » » » uhi.i.se--;	56 uhi.i.se--;

53 » » }	57 }

54 » » uhi.i.m--;	58 uhi.i.m--;

55 » }	59 }

56 » return uhi.f;	60 return uhi.f;

57 }	61 }

58	62

59 /* adjusted add so the result is correct when rounded to double (or less) precis ion */	63 /* adjusted add so the result is correct when rounded to double (or less)

60 static long double dadd(long double x, long double y)	64 * precision */

61 {	65 static long double dadd(long double x, long double y) {

62 » add(&x, &y, x, y);	66 add(&x, &y, x, y);

63 » return adjust(x, y);	67 return adjust(x, y);

64 }	68 }

65	69

66 /* adjusted mul so the result is correct when rounded to double (or less) precis ion */	70 /* adjusted mul so the result is correct when rounded to double (or less)

67 static long double dmul(long double x, long double y)	71 * precision */

68 {	72 static long double dmul(long double x, long double y) {

69 » mul(&x, &y, x, y);	73 mul(&x, &y, x, y);

70 » return adjust(x, y);	74 return adjust(x, y);

71 }	75 }

72	76

73 static int getexp(long double x)	77 static int getexp(long double x) {

74 {	78 union ldshape u;

75 » union ldshape u;	79 u.f = x;

76 » u.f = x;	80 return u.i.se & 0x7fff;

77 » return u.i.se & 0x7fff;

78 }	81 }

79	82

80 double fma(double x, double y, double z)	83 double fma(double x, double y, double z) {

81 {	84 PRAGMA_STDC_FENV_ACCESS_ON

82 » PRAGMA_STDC_FENV_ACCESS_ON	85 long double hi, lo1, lo2, xy;

83 » long double hi, lo1, lo2, xy;	86 int round, ez, exy;

84 » int round, ez, exy;

85	87

86 » /* handle +-inf,nan */	88 /* handle +-inf,nan */

87 » if (!isfinite(x) \|\| !isfinite(y))	89 if (!isfinite(x) \|\| !isfinite(y))

88 » » return x*y + z;	90 return x * y + z;

89 » if (!isfinite(z))	91 if (!isfinite(z))

90 » » return z;	92 return z;

91 » /* handle +-0 */	93 /* handle +-0 */

92 » if (x == 0.0 \|\| y == 0.0)	94 if (x == 0.0 \|\| y == 0.0)

93 » » return x*y + z;	95 return x * y + z;

94 » round = fegetround();	96 round = fegetround();

95 » if (z == 0.0) {	97 if (z == 0.0) {

96 » » if (round == FE_TONEAREST)	98 if (round == FE_TONEAREST)

97 » » » return dmul(x, y);	99 return dmul(x, y);

98 » » return x*y;	100 return x * y;

99 » }	101 }

100	102

101 » /* exact mul and add require nearest rounding */	103 /* exact mul and add require nearest rounding */

102 » /* spurious inexact exceptions may be raised */	104 /* spurious inexact exceptions may be raised */

103 » fesetround(FE_TONEAREST);	105 fesetround(FE_TONEAREST);

104 » mul(&xy, &lo1, x, y);	106 mul(&xy, &lo1, x, y);

105 » exy = getexp(xy);	107 exy = getexp(xy);

106 » ez = getexp(z);	108 ez = getexp(z);

107 » if (ez > exy) {	109 if (ez > exy) {

108 » » add(&hi, &lo2, z, xy);	110 add(&hi, &lo2, z, xy);

109 » } else if (ez > exy - 12) {	111 } else if (ez > exy - 12) {

110 » » add(&hi, &lo2, xy, z);	112 add(&hi, &lo2, xy, z);

111 » » if (hi == 0) {	113 if (hi == 0) {

112 » » » /*	114 /*

113 » » » xy + z is 0, but it should be calculated with the	115 xy + z is 0, but it should be calculated with the

114 » » » original rounding mode so the sign is correct, if the	116 original rounding mode so the sign is correct, if the

115 » » » compiler does not support FENV_ACCESS ON it does not	117 compiler does not support FENV_ACCESS ON it does not

116 » » » know about the changed rounding mode and eliminates	118 know about the changed rounding mode and eliminates

117 » » » the xy + z below without the volatile memory access	119 the xy + z below without the volatile memory access

118 » » » */	120 */

119 » » » volatile double z_;	121 volatile double z_;

120 » » » fesetround(round);	122 fesetround(round);

121 » » » z_ = z;	123 z_ = z;

122 » » » return (xy + z_) + lo1;	124 return (xy + z_) + lo1;

123 » » }	125 }

124 » } else {	126 } else {

125 » » /*	127 /*

126 » » ez <= exy - 12	128 ez <= exy - 12

127 » » the 12 extra bits (1guard, 11round+sticky) are needed so with	129 the 12 extra bits (1guard, 11round+sticky) are needed so with

128 » » » lo = dadd(lo1, lo2)	130 lo = dadd(lo1, lo2)

129 » » elo <= ehi - 11, and we use the last 10 bits in adjust so	131 elo <= ehi - 11, and we use the last 10 bits in adjust so

130 » » » dadd(hi, lo)	132 dadd(hi, lo)

131 » » gives correct result when rounded to double	133 gives correct result when rounded to double

132 » » */	134 */

133 » » hi = xy;	135 hi = xy;

134 » » lo2 = z;	136 lo2 = z;

135 » }	137 }

136 » /*	138 /*

137 » the result is stored before return for correct precision and exceptions	139 the result is stored before return for correct precision and exceptions

138	140

139 » one corner case is when the underflow flag should be raised because	141 one corner case is when the underflow flag should be raised because

140 » the precise result is an inexact subnormal double, but the calculated	142 the precise result is an inexact subnormal double, but the calculated

141 » long double result is an exact subnormal double	143 long double result is an exact subnormal double

142 » (so rounding to double does not raise exceptions)	144 (so rounding to double does not raise exceptions)

143	145

144 » in nearest rounding mode dadd takes care of this: the last bit of the	146 in nearest rounding mode dadd takes care of this: the last bit of the

145 » result is adjusted so rounding sees an inexact value when it should	147 result is adjusted so rounding sees an inexact value when it should

146	148

147 » in non-nearest rounding mode fenv is used for the workaround	149 in non-nearest rounding mode fenv is used for the workaround

148 » */	150 */

149 » fesetround(round);	151 fesetround(round);

150 » if (round == FE_TONEAREST)	152 if (round == FE_TONEAREST)

151 » » z = dadd(hi, dadd(lo1, lo2));	153 z = dadd(hi, dadd(lo1, lo2));

152 » else {	154 else {

153 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)	155 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)

154 » » int e = fetestexcept(FE_INEXACT);	156 int e = fetestexcept(FE_INEXACT);

155 » » feclearexcept(FE_INEXACT);	157 feclearexcept(FE_INEXACT);

156 #endif	158 #endif

157 » » z = hi + (lo1 + lo2);	159 z = hi + (lo1 + lo2);

158 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)	160 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)

159 » » if (getexp(z) < 0x3fff-1022 && fetestexcept(FE_INEXACT))	161 if (getexp(z) < 0x3fff - 1022 && fetestexcept(FE_INEXACT))

160 » » » feraiseexcept(FE_UNDERFLOW);	162 feraiseexcept(FE_UNDERFLOW);

161 » » else if (e)	163 else if (e)

162 » » » feraiseexcept(FE_INEXACT);	164 feraiseexcept(FE_INEXACT);

163 #endif	165 #endif

164 » }	166 }

165 » return z;	167 return z;

166 }	168 }

167 #else	169 #else

168 /* origin: FreeBSD /usr/src/lib/msun/src/s_fma.c */	170 /* origin: FreeBSD /usr/src/lib/msun/src/s_fma.c */

169 /*-	171 /*-

170 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>	172 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>

171 * All rights reserved.	173 * All rights reserved.

172 *	174 *

173 * Redistribution and use in source and binary forms, with or without	175 * Redistribution and use in source and binary forms, with or without

174 * modification, are permitted provided that the following conditions	176 * modification, are permitted provided that the following conditions

175 * are met:	177 * are met:

(...skipping 15 matching lines...) Expand all Loading...
191 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF	193 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

192 * SUCH DAMAGE.	194 * SUCH DAMAGE.

193 */	195 */

194	196

195 /*	197 /*

196 * A struct dd represents a floating-point number with twice the precision	198 * A struct dd represents a floating-point number with twice the precision

197 * of a double. We maintain the invariant that "hi" stores the 53 high-order	199 * of a double. We maintain the invariant that "hi" stores the 53 high-order

198 * bits of the result.	200 * bits of the result.

199 */	201 */

200 struct dd {	202 struct dd {

201 » double hi;	203 double hi;

202 » double lo;	204 double lo;

203 };	205 };

204	206

205 /*	207 /*

206 * Compute a+b exactly, returning the exact result in a struct dd. We assume	208 * Compute a+b exactly, returning the exact result in a struct dd. We assume

207 * that both a and b are finite, but make no assumptions about their relative	209 * that both a and b are finite, but make no assumptions about their relative

208 * magnitudes.	210 * magnitudes.

209 */	211 */

210 static inline struct dd dd_add(double a, double b)	212 static inline struct dd dd_add(double a, double b) {

211 {	213 struct dd ret;

212 » struct dd ret;	214 double s;

213 » double s;

214	215

215 » ret.hi = a + b;	216 ret.hi = a + b;

216 » s = ret.hi - a;	217 s = ret.hi - a;

217 » ret.lo = (a - (ret.hi - s)) + (b - s);	218 ret.lo = (a - (ret.hi - s)) + (b - s);

218 » return (ret);	219 return (ret);

219 }	220 }

220	221

221 /*	222 /*

222 * Compute a+b, with a small tweak: The least significant bit of the	223 * Compute a+b, with a small tweak: The least significant bit of the

223 * result is adjusted into a sticky bit summarizing all the bits that	224 * result is adjusted into a sticky bit summarizing all the bits that

224 * were lost to rounding. This adjustment negates the effects of double	225 * were lost to rounding. This adjustment negates the effects of double

225 * rounding when the result is added to another number with a higher	226 * rounding when the result is added to another number with a higher

226 * exponent. For an explanation of round and sticky bits, see any reference	227 * exponent. For an explanation of round and sticky bits, see any reference

227 * on FPU design, e.g.,	228 * on FPU design, e.g.,

228 *	229 *

229 * J. Coonen. An Implementation Guide to a Proposed Standard for	230 * J. Coonen. An Implementation Guide to a Proposed Standard for

230 * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980.	231 * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980.

231 */	232 */

232 static inline double add_adjusted(double a, double b)	233 static inline double add_adjusted(double a, double b) {

233 {	234 struct dd sum;

234 » struct dd sum;	235 union {

235 » union {double f; uint64_t i;} uhi, ulo;	236 double f;

	237 uint64_t i;

	238 } uhi, ulo;

236	239

237 » sum = dd_add(a, b);	240 sum = dd_add(a, b);

238 » if (sum.lo != 0) {	241 if (sum.lo != 0) {

239 » » uhi.f = sum.hi;	242 uhi.f = sum.hi;

240 » » if ((uhi.i & 1) == 0) {	243 if ((uhi.i & 1) == 0) {

241 » » » /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */	244 /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */

242 » » » ulo.f = sum.lo;	245 ulo.f = sum.lo;

243 » » » uhi.i += 1 - ((uhi.i ^ ulo.i) >> 62);	246 uhi.i += 1 - ((uhi.i ^ ulo.i) >> 62);

244 » » » sum.hi = uhi.f;	247 sum.hi = uhi.f;

245 » » }	248 }

246 » }	249 }

247 » return (sum.hi);	250 return (sum.hi);

248 }	251 }

249	252

250 /*	253 /*

251 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed	254 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed

252 * that the result will be subnormal, and care is taken to ensure that	255 * that the result will be subnormal, and care is taken to ensure that

253 * double rounding does not occur.	256 * double rounding does not occur.

254 */	257 */

255 static inline double add_and_denormalize(double a, double b, int scale)	258 static inline double add_and_denormalize(double a, double b, int scale) {

256 {	259 struct dd sum;

257 » struct dd sum;	260 union {

258 » union {double f; uint64_t i;} uhi, ulo;	261 double f;

259 » int bits_lost;	262 uint64_t i;

	263 } uhi, ulo;

	264 int bits_lost;

260	265

261 » sum = dd_add(a, b);	266 sum = dd_add(a, b);

262	267

263 » /*	268 /*

264 » * If we are losing at least two bits of accuracy to denormalization,	269 * If we are losing at least two bits of accuracy to denormalization,

265 » * then the first lost bit becomes a round bit, and we adjust the	270 * then the first lost bit becomes a round bit, and we adjust the

266 » * lowest bit of sum.hi to make it a sticky bit summarizing all the	271 * lowest bit of sum.hi to make it a sticky bit summarizing all the

267 » * bits in sum.lo. With the sticky bit adjusted, the hardware will	272 * bits in sum.lo. With the sticky bit adjusted, the hardware will

268 » * break any ties in the correct direction.	273 * break any ties in the correct direction.

269 » *	274 *

270 » * If we are losing only one bit to denormalization, however, we must	275 * If we are losing only one bit to denormalization, however, we must

271 » * break the ties manually.	276 * break the ties manually.

272 » */	277 */

273 » if (sum.lo != 0) {	278 if (sum.lo != 0) {

274 » » uhi.f = sum.hi;	279 uhi.f = sum.hi;

275 » » bits_lost = -((int)(uhi.i >> 52) & 0x7ff) - scale + 1;	280 bits_lost = -((int)(uhi.i >> 52) & 0x7ff) - scale + 1;

276 » » if ((bits_lost != 1) ^ (int)(uhi.i & 1)) {	281 if ((bits_lost != 1) ^ (int)(uhi.i & 1)) {

277 » » » /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */	282 /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */

278 » » » ulo.f = sum.lo;	283 ulo.f = sum.lo;

279 » » » uhi.i += 1 - (((uhi.i ^ ulo.i) >> 62) & 2);	284 uhi.i += 1 - (((uhi.i ^ ulo.i) >> 62) & 2);

280 » » » sum.hi = uhi.f;	285 sum.hi = uhi.f;

281 » » }	286 }

282 » }	287 }

283 » return scalbn(sum.hi, scale);	288 return scalbn(sum.hi, scale);

284 }	289 }

285	290

286 /*	291 /*

287 * Compute a*b exactly, returning the exact result in a struct dd. We assume	292 * Compute a*b exactly, returning the exact result in a struct dd. We assume

288 * that both a and b are normalized, so no underflow or overflow will occur.	293 * that both a and b are normalized, so no underflow or overflow will occur.

289 * The current rounding mode must be round-to-nearest.	294 * The current rounding mode must be round-to-nearest.

290 */	295 */

291 static inline struct dd dd_mul(double a, double b)	296 static inline struct dd dd_mul(double a, double b) {

292 {	297 static const double split = 0x1p27 + 1.0;

293 » static const double split = 0x1p27 + 1.0;	298 struct dd ret;

294 » struct dd ret;	299 double ha, hb, la, lb, p, q;

295 » double ha, hb, la, lb, p, q;

296	300

297 » p = a * split;	301 p = a * split;

298 » ha = a - p;	302 ha = a - p;

299 » ha += p;	303 ha += p;

300 » la = a - ha;	304 la = a - ha;

301	305

302 » p = b * split;	306 p = b * split;

303 » hb = b - p;	307 hb = b - p;

304 » hb += p;	308 hb += p;

305 » lb = b - hb;	309 lb = b - hb;

306	310

307 » p = ha * hb;	311 p = ha * hb;

308 » q = ha * lb + la * hb;	312 q = ha * lb + la * hb;

309	313

310 » ret.hi = p + q;	314 ret.hi = p + q;

311 » ret.lo = p - ret.hi + q + la * lb;	315 ret.lo = p - ret.hi + q + la * lb;

312 » return (ret);	316 return (ret);

313 }	317 }

314	318

315 /*	319 /*

316 * Fused multiply-add: Compute x * y + z with a single rounding error.	320 * Fused multiply-add: Compute x * y + z with a single rounding error.

317 *	321 *

318 * We use scaling to avoid overflow/underflow, along with the	322 * We use scaling to avoid overflow/underflow, along with the

319 * canonical precision-doubling technique adapted from:	323 * canonical precision-doubling technique adapted from:

320 *	324 *

321 * Dekker, T. A Floating-Point Technique for Extending the	325 * Dekker, T. A Floating-Point Technique for Extending the

322 * Available Precision. Numer. Math. 18, 224-242 (1971).	326 * Available Precision. Numer. Math. 18, 224-242 (1971).

323 *	327 *

324 * This algorithm is sensitive to the rounding precision. FPUs such	328 * This algorithm is sensitive to the rounding precision. FPUs such

325 * as the i387 must be set in double-precision mode if variables are	329 * as the i387 must be set in double-precision mode if variables are

326 * to be stored in FP registers in order to avoid incorrect results.	330 * to be stored in FP registers in order to avoid incorrect results.

327 * This is the default on FreeBSD, but not on many other systems.	331 * This is the default on FreeBSD, but not on many other systems.

328 *	332 *

329 * Hardware instructions should be used on architectures that support it,	333 * Hardware instructions should be used on architectures that support it,

330 * since this implementation will likely be several times slower.	334 * since this implementation will likely be several times slower.

331 */	335 */

332 double fma(double x, double y, double z)	336 double fma(double x, double y, double z) {

333 {	337 #pragma STDC FENV_ACCESS ON

334 » #pragma STDC FENV_ACCESS ON	338 double xs, ys, zs, adj;

335 » double xs, ys, zs, adj;	339 struct dd xy, r;

336 » struct dd xy, r;	340 int oround;

337 » int oround;	341 int ex, ey, ez;

338 » int ex, ey, ez;	342 int spread;

339 » int spread;

340	343

341 » /*	344 /*

342 » * Handle special cases. The order of operations and the particular	345 * Handle special cases. The order of operations and the particular

343 » * return values here are crucial in handling special cases involving	346 * return values here are crucial in handling special cases involving

344 » * infinities, NaNs, overflows, and signed zeroes correctly.	347 * infinities, NaNs, overflows, and signed zeroes correctly.

345 » */	348 */

346 » if (!isfinite(x) \|\| !isfinite(y))	349 if (!isfinite(x) \|\| !isfinite(y))

347 » » return (x * y + z);	350 return (x * y + z);

348 » if (!isfinite(z))	351 if (!isfinite(z))

349 » » return (z);	352 return (z);

350 » if (x == 0.0 \|\| y == 0.0)	353 if (x == 0.0 \|\| y == 0.0)

351 » » return (x * y + z);	354 return (x * y + z);

352 » if (z == 0.0)	355 if (z == 0.0)

353 » » return (x * y);	356 return (x * y);

354	357

355 » xs = frexp(x, &ex);	358 xs = frexp(x, &ex);

356 » ys = frexp(y, &ey);	359 ys = frexp(y, &ey);

357 » zs = frexp(z, &ez);	360 zs = frexp(z, &ez);

358 » oround = fegetround();	361 oround = fegetround();

359 » spread = ex + ey - ez;	362 spread = ex + ey - ez;

360	363

361 » /*	364 /*

362 » * If x * y and z are many orders of magnitude apart, the scaling	365 * If x * y and z are many orders of magnitude apart, the scaling

363 » * will overflow, so we handle these cases specially. Rounding	366 * will overflow, so we handle these cases specially. Rounding

364 » * modes other than FE_TONEAREST are painful.	367 * modes other than FE_TONEAREST are painful.

365 » */	368 */

366 » if (spread < -DBL_MANT_DIG) {	369 if (spread < -DBL_MANT_DIG) {

367 #ifdef FE_INEXACT	370 #ifdef FE_INEXACT

368 » » feraiseexcept(FE_INEXACT);	371 feraiseexcept(FE_INEXACT);

369 #endif	372 #endif

370 #ifdef FE_UNDERFLOW	373 #ifdef FE_UNDERFLOW

371 » » if (!isnormal(z))	374 if (!isnormal(z))

372 » » » feraiseexcept(FE_UNDERFLOW);	375 feraiseexcept(FE_UNDERFLOW);

373 #endif	376 #endif

374 » » switch (oround) {	377 switch (oround) {

375 » » default: /* FE_TONEAREST */	378 default: /* FE_TONEAREST */

376 » » » return (z);	379 return (z);

377 #ifdef FE_TOWARDZERO	380 #ifdef FE_TOWARDZERO

378 » » case FE_TOWARDZERO:	381 case FE_TOWARDZERO:

379 » » » if (x > 0.0 ^ y < 0.0 ^ z < 0.0)	382 if (x > 0.0 ^ y < 0.0 ^ z < 0.0)

380 » » » » return (z);	383 return (z);

381 » » » else	384 else

382 » » » » return (nextafter(z, 0));	385 return (nextafter(z, 0));

383 #endif	386 #endif

384 #ifdef FE_DOWNWARD	387 #ifdef FE_DOWNWARD

385 » » case FE_DOWNWARD:	388 case FE_DOWNWARD:

386 » » » if (x > 0.0 ^ y < 0.0)	389 if (x > 0.0 ^ y < 0.0)

387 » » » » return (z);	390 return (z);

388 » » » else	391 else

389 » » » » return (nextafter(z, -INFINITY));	392 return (nextafter(z, -INFINITY));

390 #endif	393 #endif

391 #ifdef FE_UPWARD	394 #ifdef FE_UPWARD

392 » » case FE_UPWARD:	395 case FE_UPWARD:

393 » » » if (x > 0.0 ^ y < 0.0)	396 if (x > 0.0 ^ y < 0.0)

394 » » » » return (nextafter(z, INFINITY));	397 return (nextafter(z, INFINITY));

395 » » » else	398 else

396 » » » » return (z);	399 return (z);

397 #endif	400 #endif

398 » » }	401 }

399 » }	402 }

400 » if (spread <= DBL_MANT_DIG * 2)	403 if (spread <= DBL_MANT_DIG * 2)

401 » » zs = scalbn(zs, -spread);	404 zs = scalbn(zs, -spread);

402 » else	405 else

403 » » zs = copysign(DBL_MIN, zs);	406 zs = copysign(DBL_MIN, zs);

404	407

405 » fesetround(FE_TONEAREST);	408 fesetround(FE_TONEAREST);

406	409

407 » /*	410 /*

408 » * Basic approach for round-to-nearest:	411 * Basic approach for round-to-nearest:

409 » *	412 *

410 » * (xy.hi, xy.lo) = x * y (exact)	413 * (xy.hi, xy.lo) = x * y (exact)

411 » * (r.hi, r.lo) = xy.hi + z (exact)	414 * (r.hi, r.lo) = xy.hi + z (exact)

412 » * adj = xy.lo + r.lo (inexact; low bit is sticky)	415 * adj = xy.lo + r.lo (inexact; low bit is sticky)

413 » * result = r.hi + adj (correctly rounded)	416 * result = r.hi + adj (correctly rounded)

414 » */	417 */

415 » xy = dd_mul(xs, ys);	418 xy = dd_mul(xs, ys);

416 » r = dd_add(xy.hi, zs);	419 r = dd_add(xy.hi, zs);

417	420

418 » spread = ex + ey;	421 spread = ex + ey;

419	422

420 » if (r.hi == 0.0) {	423 if (r.hi == 0.0) {

421 » » /*	424 /*

422 » » * When the addends cancel to 0, ensure that the result has	425 * When the addends cancel to 0, ensure that the result has

423 » » * the correct sign.	426 * the correct sign.

424 » » */	427 */

425 » » fesetround(oround);	428 fesetround(oround);

426 » » volatile double vzs = zs; /* XXX gcc CSE bug workaround */	429 volatile double vzs = zs; /* XXX gcc CSE bug workaround */

427 » » return xy.hi + vzs + scalbn(xy.lo, spread);	430 return xy.hi + vzs + scalbn(xy.lo, spread);

428 » }	431 }

429	432

430 » if (oround != FE_TONEAREST) {	433 if (oround != FE_TONEAREST) {

431 » » /*	434 /*

432 » » * There is no need to worry about double rounding in directed	435 * There is no need to worry about double rounding in directed

433 » » * rounding modes.	436 * rounding modes.

434 » » * But underflow may not be raised properly, example in downward rounding:	437 * But underflow may not be raised properly, example in downward rounding:

435 » » * fma(0x1.000000001p-1000, 0x1.000000001p-30, -0x1p-1066)	438 * fma(0x1.000000001p-1000, 0x1.000000001p-30, -0x1p-1066)

436 » » */	439 */

437 » » double ret;	440 double ret;

438 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)	441 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)

439 » » int e = fetestexcept(FE_INEXACT);	442 int e = fetestexcept(FE_INEXACT);

440 » » feclearexcept(FE_INEXACT);	443 feclearexcept(FE_INEXACT);

441 #endif	444 #endif

442 » » fesetround(oround);	445 fesetround(oround);

443 » » adj = r.lo + xy.lo;	446 adj = r.lo + xy.lo;

444 » » ret = scalbn(r.hi + adj, spread);	447 ret = scalbn(r.hi + adj, spread);

445 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)	448 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)

446 » » if (ilogb(ret) < -1022 && fetestexcept(FE_INEXACT))	449 if (ilogb(ret) < -1022 && fetestexcept(FE_INEXACT))

447 » » » feraiseexcept(FE_UNDERFLOW);	450 feraiseexcept(FE_UNDERFLOW);

448 » » else if (e)	451 else if (e)

449 » » » feraiseexcept(FE_INEXACT);	452 feraiseexcept(FE_INEXACT);

450 #endif	453 #endif

451 » » return ret;	454 return ret;

452 » }	455 }

453	456

454 » adj = add_adjusted(r.lo, xy.lo);	457 adj = add_adjusted(r.lo, xy.lo);

455 » if (spread + ilogb(r.hi) > -1023)	458 if (spread + ilogb(r.hi) > -1023)

456 » » return scalbn(r.hi + adj, spread);	459 return scalbn(r.hi + adj, spread);

457 » else	460 else

458 » » return add_and_denormalize(r.hi, adj, spread);	461 return add_and_denormalize(r.hi, adj, spread);

459 }	462 }

460 #endif	463 #endif

OLD	NEW

« fusl/arch/aarch64/atomic_arch.h ('K') | « fusl/src/math/floorl.c ('k') | fusl/src/math/fmaf.c » ('j') | no next file with comments »