fusl/src/math/fmal.c - Issue 1714623002: [fusl] clang-format fusl

Side by Side Diff: fusl/src/math/fmal.c

Issue 1714623002: [fusl] clang-format fusl (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: headers too Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /* origin: FreeBSD /usr/src/lib/msun/src/s_fmal.c */	1 /* origin: FreeBSD /usr/src/lib/msun/src/s_fmal.c */

2 /*-	2 /*-

3 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>	3 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>

4 * All rights reserved.	4 * All rights reserved.

5 *	5 *

6 * Redistribution and use in source and binary forms, with or without	6 * Redistribution and use in source and binary forms, with or without

7 * modification, are permitted provided that the following conditions	7 * modification, are permitted provided that the following conditions

8 * are met:	8 * are met:

9 * 1. Redistributions of source code must retain the above copyright	9 * 1. Redistributions of source code must retain the above copyright

10 * notice, this list of conditions and the following disclaimer.	10 * notice, this list of conditions and the following disclaimer.

11 * 2. Redistributions in binary form must reproduce the above copyright	11 * 2. Redistributions in binary form must reproduce the above copyright

12 * notice, this list of conditions and the following disclaimer in the	12 * notice, this list of conditions and the following disclaimer in the

13 * documentation and/or other materials provided with the distribution.	13 * documentation and/or other materials provided with the distribution.

14 *	14 *

15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND	15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND

16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE	16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE	17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE	18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE

19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL	19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS	20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS

21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)	21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT	22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY	23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF	24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

25 * SUCH DAMAGE.	25 * SUCH DAMAGE.

26 */	26 */

27	27

28

29 #include "libm.h"	28 #include "libm.h"

30 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024	29 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024

31 long double fmal(long double x, long double y, long double z)	30 long double fmal(long double x, long double y, long double z) {

32 {	31 return fma(x, y, z);

33 » return fma(x, y, z);

34 }	32 }

35 #elif (LDBL_MANT_DIG == 64 \|\| LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384	33 #elif (LDBL_MANT_DIG == 64 \|\| LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384

36 #include <fenv.h>	34 #include <fenv.h>

37 #if LDBL_MANT_DIG == 64	35 #if LDBL_MANT_DIG == 64

38 #define LASTBIT(u) (u.i.m & 1)	36 #define LASTBIT(u) (u.i.m & 1)

39 #define SPLIT (0x1p32L + 1)	37 #define SPLIT (0x1p32L + 1)

40 #elif LDBL_MANT_DIG == 113	38 #elif LDBL_MANT_DIG == 113

41 #define LASTBIT(u) (u.i.lo & 1)	39 #define LASTBIT(u) (u.i.lo & 1)

42 #define SPLIT (0x1p57L + 1)	40 #define SPLIT (0x1p57L + 1)

43 #endif	41 #endif

44	42

45 /*	43 /*

46 * A struct dd represents a floating-point number with twice the precision	44 * A struct dd represents a floating-point number with twice the precision

47 * of a long double. We maintain the invariant that "hi" stores the high-order	45 * of a long double. We maintain the invariant that "hi" stores the high-order

48 * bits of the result.	46 * bits of the result.

49 */	47 */

50 struct dd {	48 struct dd {

51 » long double hi;	49 long double hi;

52 » long double lo;	50 long double lo;

53 };	51 };

54	52

55 /*	53 /*

56 * Compute a+b exactly, returning the exact result in a struct dd. We assume	54 * Compute a+b exactly, returning the exact result in a struct dd. We assume

57 * that both a and b are finite, but make no assumptions about their relative	55 * that both a and b are finite, but make no assumptions about their relative

58 * magnitudes.	56 * magnitudes.

59 */	57 */

60 static inline struct dd dd_add(long double a, long double b)	58 static inline struct dd dd_add(long double a, long double b) {

61 {	59 struct dd ret;

62 » struct dd ret;	60 long double s;

63 » long double s;

64	61

65 » ret.hi = a + b;	62 ret.hi = a + b;

66 » s = ret.hi - a;	63 s = ret.hi - a;

67 » ret.lo = (a - (ret.hi - s)) + (b - s);	64 ret.lo = (a - (ret.hi - s)) + (b - s);

68 » return (ret);	65 return (ret);

69 }	66 }

70	67

71 /*	68 /*

72 * Compute a+b, with a small tweak: The least significant bit of the	69 * Compute a+b, with a small tweak: The least significant bit of the

73 * result is adjusted into a sticky bit summarizing all the bits that	70 * result is adjusted into a sticky bit summarizing all the bits that

74 * were lost to rounding. This adjustment negates the effects of double	71 * were lost to rounding. This adjustment negates the effects of double

75 * rounding when the result is added to another number with a higher	72 * rounding when the result is added to another number with a higher

76 * exponent. For an explanation of round and sticky bits, see any reference	73 * exponent. For an explanation of round and sticky bits, see any reference

77 * on FPU design, e.g.,	74 * on FPU design, e.g.,

78 *	75 *

79 * J. Coonen. An Implementation Guide to a Proposed Standard for	76 * J. Coonen. An Implementation Guide to a Proposed Standard for

80 * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980.	77 * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980.

81 */	78 */

82 static inline long double add_adjusted(long double a, long double b)	79 static inline long double add_adjusted(long double a, long double b) {

83 {	80 struct dd sum;

84 » struct dd sum;	81 union ldshape u;

85 » union ldshape u;

86	82

87 » sum = dd_add(a, b);	83 sum = dd_add(a, b);

88 » if (sum.lo != 0) {	84 if (sum.lo != 0) {

89 » » u.f = sum.hi;	85 u.f = sum.hi;

90 » » if (!LASTBIT(u))	86 if (!LASTBIT(u))

91 » » » sum.hi = nextafterl(sum.hi, INFINITY * sum.lo);	87 sum.hi = nextafterl(sum.hi, INFINITY * sum.lo);

92 » }	88 }

93 » return (sum.hi);	89 return (sum.hi);

94 }	90 }

95	91

96 /*	92 /*

97 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed	93 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed

98 * that the result will be subnormal, and care is taken to ensure that	94 * that the result will be subnormal, and care is taken to ensure that

99 * double rounding does not occur.	95 * double rounding does not occur.

100 */	96 */

101 static inline long double add_and_denormalize(long double a, long double b, int scale)	97 static inline long double add_and_denormalize(long double a,

102 {	98 long double b,

103 » struct dd sum;	99 int scale) {

104 » int bits_lost;	100 struct dd sum;

105 » union ldshape u;	101 int bits_lost;

	102 union ldshape u;

106	103

107 » sum = dd_add(a, b);	104 sum = dd_add(a, b);

108	105

109 » /*	106 /*

110 » * If we are losing at least two bits of accuracy to denormalization,	107 * If we are losing at least two bits of accuracy to denormalization,

111 » * then the first lost bit becomes a round bit, and we adjust the	108 * then the first lost bit becomes a round bit, and we adjust the

112 » * lowest bit of sum.hi to make it a sticky bit summarizing all the	109 * lowest bit of sum.hi to make it a sticky bit summarizing all the

113 » * bits in sum.lo. With the sticky bit adjusted, the hardware will	110 * bits in sum.lo. With the sticky bit adjusted, the hardware will

114 » * break any ties in the correct direction.	111 * break any ties in the correct direction.

115 » *	112 *

116 » * If we are losing only one bit to denormalization, however, we must	113 * If we are losing only one bit to denormalization, however, we must

117 » * break the ties manually.	114 * break the ties manually.

118 » */	115 */

119 » if (sum.lo != 0) {	116 if (sum.lo != 0) {

120 » » u.f = sum.hi;	117 u.f = sum.hi;

121 » » bits_lost = -u.i.se - scale + 1;	118 bits_lost = -u.i.se - scale + 1;

122 » » if ((bits_lost != 1) ^ LASTBIT(u))	119 if ((bits_lost != 1) ^ LASTBIT(u))

123 » » » sum.hi = nextafterl(sum.hi, INFINITY * sum.lo);	120 sum.hi = nextafterl(sum.hi, INFINITY * sum.lo);

124 » }	121 }

125 » return scalbnl(sum.hi, scale);	122 return scalbnl(sum.hi, scale);

126 }	123 }

127	124

128 /*	125 /*

129 * Compute a*b exactly, returning the exact result in a struct dd. We assume	126 * Compute a*b exactly, returning the exact result in a struct dd. We assume

130 * that both a and b are normalized, so no underflow or overflow will occur.	127 * that both a and b are normalized, so no underflow or overflow will occur.

131 * The current rounding mode must be round-to-nearest.	128 * The current rounding mode must be round-to-nearest.

132 */	129 */

133 static inline struct dd dd_mul(long double a, long double b)	130 static inline struct dd dd_mul(long double a, long double b) {

134 {	131 struct dd ret;

135 » struct dd ret;	132 long double ha, hb, la, lb, p, q;

136 » long double ha, hb, la, lb, p, q;

137	133

138 » p = a * SPLIT;	134 p = a * SPLIT;

139 » ha = a - p;	135 ha = a - p;

140 » ha += p;	136 ha += p;

141 » la = a - ha;	137 la = a - ha;

142	138

143 » p = b * SPLIT;	139 p = b * SPLIT;

144 » hb = b - p;	140 hb = b - p;

145 » hb += p;	141 hb += p;

146 » lb = b - hb;	142 lb = b - hb;

147	143

148 » p = ha * hb;	144 p = ha * hb;

149 » q = ha * lb + la * hb;	145 q = ha * lb + la * hb;

150	146

151 » ret.hi = p + q;	147 ret.hi = p + q;

152 » ret.lo = p - ret.hi + q + la * lb;	148 ret.lo = p - ret.hi + q + la * lb;

153 » return (ret);	149 return (ret);

154 }	150 }

155	151

156 /*	152 /*

157 * Fused multiply-add: Compute x * y + z with a single rounding error.	153 * Fused multiply-add: Compute x * y + z with a single rounding error.

158 *	154 *

159 * We use scaling to avoid overflow/underflow, along with the	155 * We use scaling to avoid overflow/underflow, along with the

160 * canonical precision-doubling technique adapted from:	156 * canonical precision-doubling technique adapted from:

161 *	157 *

162 * Dekker, T. A Floating-Point Technique for Extending the	158 * Dekker, T. A Floating-Point Technique for Extending the

163 * Available Precision. Numer. Math. 18, 224-242 (1971).	159 * Available Precision. Numer. Math. 18, 224-242 (1971).

164 */	160 */

165 long double fmal(long double x, long double y, long double z)	161 long double fmal(long double x, long double y, long double z) {

166 {	162 PRAGMA_STDC_FENV_ACCESS_ON

167 » PRAGMA_STDC_FENV_ACCESS_ON	163 long double xs, ys, zs, adj;

168 » long double xs, ys, zs, adj;	164 struct dd xy, r;

169 » struct dd xy, r;	165 int oround;

170 » int oround;	166 int ex, ey, ez;

171 » int ex, ey, ez;	167 int spread;

172 » int spread;

173	168

174 » /*	169 /*

175 » * Handle special cases. The order of operations and the particular	170 * Handle special cases. The order of operations and the particular

176 » * return values here are crucial in handling special cases involving	171 * return values here are crucial in handling special cases involving

177 » * infinities, NaNs, overflows, and signed zeroes correctly.	172 * infinities, NaNs, overflows, and signed zeroes correctly.

178 » */	173 */

179 » if (!isfinite(x) \|\| !isfinite(y))	174 if (!isfinite(x) \|\| !isfinite(y))

180 » » return (x * y + z);	175 return (x * y + z);

181 » if (!isfinite(z))	176 if (!isfinite(z))

182 » » return (z);	177 return (z);

183 » if (x == 0.0 \|\| y == 0.0)	178 if (x == 0.0 \|\| y == 0.0)

184 » » return (x * y + z);	179 return (x * y + z);

185 » if (z == 0.0)	180 if (z == 0.0)

186 » » return (x * y);	181 return (x * y);

187	182

188 » xs = frexpl(x, &ex);	183 xs = frexpl(x, &ex);

189 » ys = frexpl(y, &ey);	184 ys = frexpl(y, &ey);

190 » zs = frexpl(z, &ez);	185 zs = frexpl(z, &ez);

191 » oround = fegetround();	186 oround = fegetround();

192 » spread = ex + ey - ez;	187 spread = ex + ey - ez;

193	188

194 » /*	189 /*

195 » * If x * y and z are many orders of magnitude apart, the scaling	190 * If x * y and z are many orders of magnitude apart, the scaling

196 » * will overflow, so we handle these cases specially. Rounding	191 * will overflow, so we handle these cases specially. Rounding

197 » * modes other than FE_TONEAREST are painful.	192 * modes other than FE_TONEAREST are painful.

198 » */	193 */

199 » if (spread < -LDBL_MANT_DIG) {	194 if (spread < -LDBL_MANT_DIG) {

200 #ifdef FE_INEXACT	195 #ifdef FE_INEXACT

201 » » feraiseexcept(FE_INEXACT);	196 feraiseexcept(FE_INEXACT);

202 #endif	197 #endif

203 #ifdef FE_UNDERFLOW	198 #ifdef FE_UNDERFLOW

204 » » if (!isnormal(z))	199 if (!isnormal(z))

205 » » » feraiseexcept(FE_UNDERFLOW);	200 feraiseexcept(FE_UNDERFLOW);

206 #endif	201 #endif

207 » » switch (oround) {	202 switch (oround) {

208 » » default: /* FE_TONEAREST */	203 default: /* FE_TONEAREST */

209 » » » return (z);	204 return (z);

210 #ifdef FE_TOWARDZERO	205 #ifdef FE_TOWARDZERO

211 » » case FE_TOWARDZERO:	206 case FE_TOWARDZERO:

212 » » » if (x > 0.0 ^ y < 0.0 ^ z < 0.0)	207 if (x > 0.0 ^ y < 0.0 ^ z < 0.0)

213 » » » » return (z);	208 return (z);

214 » » » else	209 else

215 » » » » return (nextafterl(z, 0));	210 return (nextafterl(z, 0));

216 #endif	211 #endif

217 #ifdef FE_DOWNWARD	212 #ifdef FE_DOWNWARD

218 » » case FE_DOWNWARD:	213 case FE_DOWNWARD:

219 » » » if (x > 0.0 ^ y < 0.0)	214 if (x > 0.0 ^ y < 0.0)

220 » » » » return (z);	215 return (z);

221 » » » else	216 else

222 » » » » return (nextafterl(z, -INFINITY));	217 return (nextafterl(z, -INFINITY));

223 #endif	218 #endif

224 #ifdef FE_UPWARD	219 #ifdef FE_UPWARD

225 » » case FE_UPWARD:	220 case FE_UPWARD:

226 » » » if (x > 0.0 ^ y < 0.0)	221 if (x > 0.0 ^ y < 0.0)

227 » » » » return (nextafterl(z, INFINITY));	222 return (nextafterl(z, INFINITY));

228 » » » else	223 else

229 » » » » return (z);	224 return (z);

230 #endif	225 #endif

231 » » }	226 }

232 » }	227 }

233 » if (spread <= LDBL_MANT_DIG * 2)	228 if (spread <= LDBL_MANT_DIG * 2)

234 » » zs = scalbnl(zs, -spread);	229 zs = scalbnl(zs, -spread);

235 » else	230 else

236 » » zs = copysignl(LDBL_MIN, zs);	231 zs = copysignl(LDBL_MIN, zs);

237	232

238 » fesetround(FE_TONEAREST);	233 fesetround(FE_TONEAREST);

239	234

240 » /*	235 /*

241 » * Basic approach for round-to-nearest:	236 * Basic approach for round-to-nearest:

242 » *	237 *

243 » * (xy.hi, xy.lo) = x * y (exact)	238 * (xy.hi, xy.lo) = x * y (exact)

244 » * (r.hi, r.lo) = xy.hi + z (exact)	239 * (r.hi, r.lo) = xy.hi + z (exact)

245 » * adj = xy.lo + r.lo (inexact; low bit is sticky)	240 * adj = xy.lo + r.lo (inexact; low bit is sticky)

246 » * result = r.hi + adj (correctly rounded)	241 * result = r.hi + adj (correctly rounded)

247 » */	242 */

248 » xy = dd_mul(xs, ys);	243 xy = dd_mul(xs, ys);

249 » r = dd_add(xy.hi, zs);	244 r = dd_add(xy.hi, zs);

250	245

251 » spread = ex + ey;	246 spread = ex + ey;

252	247

253 » if (r.hi == 0.0) {	248 if (r.hi == 0.0) {

254 » » /*	249 /*

255 » » * When the addends cancel to 0, ensure that the result has	250 * When the addends cancel to 0, ensure that the result has

256 » » * the correct sign.	251 * the correct sign.

257 » » */	252 */

258 » » fesetround(oround);	253 fesetround(oround);

259 » » volatile long double vzs = zs; /* XXX gcc CSE bug workaround */	254 volatile long double vzs = zs; /* XXX gcc CSE bug workaround */

260 » » return xy.hi + vzs + scalbnl(xy.lo, spread);	255 return xy.hi + vzs + scalbnl(xy.lo, spread);

261 » }	256 }

262	257

263 » if (oround != FE_TONEAREST) {	258 if (oround != FE_TONEAREST) {

264 » » /*	259 /*

265 » » * There is no need to worry about double rounding in directed	260 * There is no need to worry about double rounding in directed

266 » » * rounding modes.	261 * rounding modes.

267 » » * But underflow may not be raised correctly, example in downwar d rounding:	262 * But underflow may not be raised correctly, example in downward rounding:

268 » » * fmal(0x1.0000000001p-16000L, 0x1.0000000001p-400L, -0x1p-1644 0L)	263 * fmal(0x1.0000000001p-16000L, 0x1.0000000001p-400L, -0x1p-16440L)

269 » » */	264 */

270 » » long double ret;	265 long double ret;

271 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)	266 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)

272 » » int e = fetestexcept(FE_INEXACT);	267 int e = fetestexcept(FE_INEXACT);

273 » » feclearexcept(FE_INEXACT);	268 feclearexcept(FE_INEXACT);

274 #endif	269 #endif

275 » » fesetround(oround);	270 fesetround(oround);

276 » » adj = r.lo + xy.lo;	271 adj = r.lo + xy.lo;

277 » » ret = scalbnl(r.hi + adj, spread);	272 ret = scalbnl(r.hi + adj, spread);

278 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)	273 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)

279 » » if (ilogbl(ret) < -16382 && fetestexcept(FE_INEXACT))	274 if (ilogbl(ret) < -16382 && fetestexcept(FE_INEXACT))

280 » » » feraiseexcept(FE_UNDERFLOW);	275 feraiseexcept(FE_UNDERFLOW);

281 » » else if (e)	276 else if (e)

282 » » » feraiseexcept(FE_INEXACT);	277 feraiseexcept(FE_INEXACT);

283 #endif	278 #endif

284 » » return ret;	279 return ret;

285 » }	280 }

286	281

287 » adj = add_adjusted(r.lo, xy.lo);	282 adj = add_adjusted(r.lo, xy.lo);

288 » if (spread + ilogbl(r.hi) > -16383)	283 if (spread + ilogbl(r.hi) > -16383)

289 » » return scalbnl(r.hi + adj, spread);	284 return scalbnl(r.hi + adj, spread);

290 » else	285 else

291 » » return add_and_denormalize(r.hi, adj, spread);	286 return add_and_denormalize(r.hi, adj, spread);

292 }	287 }

293 #endif	288 #endif

OLD	NEW

« fusl/arch/aarch64/atomic_arch.h ('K') | « fusl/src/math/fmaf.c ('k') | fusl/src/math/fmax.c » ('j') | no next file with comments »