Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(810)

Side by Side Diff: fusl/src/math/fmal.c

Issue 1714623002: [fusl] clang-format fusl (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: headers too Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* origin: FreeBSD /usr/src/lib/msun/src/s_fmal.c */ 1 /* origin: FreeBSD /usr/src/lib/msun/src/s_fmal.c */
2 /*- 2 /*-
3 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> 3 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
4 * All rights reserved. 4 * All rights reserved.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
8 * are met: 8 * are met:
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright 11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the 12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution. 13 * documentation and/or other materials provided with the distribution.
14 * 14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE. 25 * SUCH DAMAGE.
26 */ 26 */
27 27
28
29 #include "libm.h" 28 #include "libm.h"
30 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 29 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
31 long double fmal(long double x, long double y, long double z) 30 long double fmal(long double x, long double y, long double z) {
32 { 31 return fma(x, y, z);
33 » return fma(x, y, z);
34 } 32 }
35 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 33 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
36 #include <fenv.h> 34 #include <fenv.h>
37 #if LDBL_MANT_DIG == 64 35 #if LDBL_MANT_DIG == 64
38 #define LASTBIT(u) (u.i.m & 1) 36 #define LASTBIT(u) (u.i.m & 1)
39 #define SPLIT (0x1p32L + 1) 37 #define SPLIT (0x1p32L + 1)
40 #elif LDBL_MANT_DIG == 113 38 #elif LDBL_MANT_DIG == 113
41 #define LASTBIT(u) (u.i.lo & 1) 39 #define LASTBIT(u) (u.i.lo & 1)
42 #define SPLIT (0x1p57L + 1) 40 #define SPLIT (0x1p57L + 1)
43 #endif 41 #endif
44 42
45 /* 43 /*
46 * A struct dd represents a floating-point number with twice the precision 44 * A struct dd represents a floating-point number with twice the precision
47 * of a long double. We maintain the invariant that "hi" stores the high-order 45 * of a long double. We maintain the invariant that "hi" stores the high-order
48 * bits of the result. 46 * bits of the result.
49 */ 47 */
50 struct dd { 48 struct dd {
51 » long double hi; 49 long double hi;
52 » long double lo; 50 long double lo;
53 }; 51 };
54 52
55 /* 53 /*
56 * Compute a+b exactly, returning the exact result in a struct dd. We assume 54 * Compute a+b exactly, returning the exact result in a struct dd. We assume
57 * that both a and b are finite, but make no assumptions about their relative 55 * that both a and b are finite, but make no assumptions about their relative
58 * magnitudes. 56 * magnitudes.
59 */ 57 */
60 static inline struct dd dd_add(long double a, long double b) 58 static inline struct dd dd_add(long double a, long double b) {
61 { 59 struct dd ret;
62 » struct dd ret; 60 long double s;
63 » long double s;
64 61
65 » ret.hi = a + b; 62 ret.hi = a + b;
66 » s = ret.hi - a; 63 s = ret.hi - a;
67 » ret.lo = (a - (ret.hi - s)) + (b - s); 64 ret.lo = (a - (ret.hi - s)) + (b - s);
68 » return (ret); 65 return (ret);
69 } 66 }
70 67
71 /* 68 /*
72 * Compute a+b, with a small tweak: The least significant bit of the 69 * Compute a+b, with a small tweak: The least significant bit of the
73 * result is adjusted into a sticky bit summarizing all the bits that 70 * result is adjusted into a sticky bit summarizing all the bits that
74 * were lost to rounding. This adjustment negates the effects of double 71 * were lost to rounding. This adjustment negates the effects of double
75 * rounding when the result is added to another number with a higher 72 * rounding when the result is added to another number with a higher
76 * exponent. For an explanation of round and sticky bits, see any reference 73 * exponent. For an explanation of round and sticky bits, see any reference
77 * on FPU design, e.g., 74 * on FPU design, e.g.,
78 * 75 *
79 * J. Coonen. An Implementation Guide to a Proposed Standard for 76 * J. Coonen. An Implementation Guide to a Proposed Standard for
80 * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980. 77 * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980.
81 */ 78 */
82 static inline long double add_adjusted(long double a, long double b) 79 static inline long double add_adjusted(long double a, long double b) {
83 { 80 struct dd sum;
84 » struct dd sum; 81 union ldshape u;
85 » union ldshape u;
86 82
87 » sum = dd_add(a, b); 83 sum = dd_add(a, b);
88 » if (sum.lo != 0) { 84 if (sum.lo != 0) {
89 » » u.f = sum.hi; 85 u.f = sum.hi;
90 » » if (!LASTBIT(u)) 86 if (!LASTBIT(u))
91 » » » sum.hi = nextafterl(sum.hi, INFINITY * sum.lo); 87 sum.hi = nextafterl(sum.hi, INFINITY * sum.lo);
92 » } 88 }
93 » return (sum.hi); 89 return (sum.hi);
94 } 90 }
95 91
96 /* 92 /*
97 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed 93 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed
98 * that the result will be subnormal, and care is taken to ensure that 94 * that the result will be subnormal, and care is taken to ensure that
99 * double rounding does not occur. 95 * double rounding does not occur.
100 */ 96 */
101 static inline long double add_and_denormalize(long double a, long double b, int scale) 97 static inline long double add_and_denormalize(long double a,
102 { 98 long double b,
103 » struct dd sum; 99 int scale) {
104 » int bits_lost; 100 struct dd sum;
105 » union ldshape u; 101 int bits_lost;
102 union ldshape u;
106 103
107 » sum = dd_add(a, b); 104 sum = dd_add(a, b);
108 105
109 » /* 106 /*
110 » * If we are losing at least two bits of accuracy to denormalization, 107 * If we are losing at least two bits of accuracy to denormalization,
111 » * then the first lost bit becomes a round bit, and we adjust the 108 * then the first lost bit becomes a round bit, and we adjust the
112 » * lowest bit of sum.hi to make it a sticky bit summarizing all the 109 * lowest bit of sum.hi to make it a sticky bit summarizing all the
113 » * bits in sum.lo. With the sticky bit adjusted, the hardware will 110 * bits in sum.lo. With the sticky bit adjusted, the hardware will
114 » * break any ties in the correct direction. 111 * break any ties in the correct direction.
115 » * 112 *
116 » * If we are losing only one bit to denormalization, however, we must 113 * If we are losing only one bit to denormalization, however, we must
117 » * break the ties manually. 114 * break the ties manually.
118 » */ 115 */
119 » if (sum.lo != 0) { 116 if (sum.lo != 0) {
120 » » u.f = sum.hi; 117 u.f = sum.hi;
121 » » bits_lost = -u.i.se - scale + 1; 118 bits_lost = -u.i.se - scale + 1;
122 » » if ((bits_lost != 1) ^ LASTBIT(u)) 119 if ((bits_lost != 1) ^ LASTBIT(u))
123 » » » sum.hi = nextafterl(sum.hi, INFINITY * sum.lo); 120 sum.hi = nextafterl(sum.hi, INFINITY * sum.lo);
124 » } 121 }
125 » return scalbnl(sum.hi, scale); 122 return scalbnl(sum.hi, scale);
126 } 123 }
127 124
128 /* 125 /*
129 * Compute a*b exactly, returning the exact result in a struct dd. We assume 126 * Compute a*b exactly, returning the exact result in a struct dd. We assume
130 * that both a and b are normalized, so no underflow or overflow will occur. 127 * that both a and b are normalized, so no underflow or overflow will occur.
131 * The current rounding mode must be round-to-nearest. 128 * The current rounding mode must be round-to-nearest.
132 */ 129 */
133 static inline struct dd dd_mul(long double a, long double b) 130 static inline struct dd dd_mul(long double a, long double b) {
134 { 131 struct dd ret;
135 » struct dd ret; 132 long double ha, hb, la, lb, p, q;
136 » long double ha, hb, la, lb, p, q;
137 133
138 » p = a * SPLIT; 134 p = a * SPLIT;
139 » ha = a - p; 135 ha = a - p;
140 » ha += p; 136 ha += p;
141 » la = a - ha; 137 la = a - ha;
142 138
143 » p = b * SPLIT; 139 p = b * SPLIT;
144 » hb = b - p; 140 hb = b - p;
145 » hb += p; 141 hb += p;
146 » lb = b - hb; 142 lb = b - hb;
147 143
148 » p = ha * hb; 144 p = ha * hb;
149 » q = ha * lb + la * hb; 145 q = ha * lb + la * hb;
150 146
151 » ret.hi = p + q; 147 ret.hi = p + q;
152 » ret.lo = p - ret.hi + q + la * lb; 148 ret.lo = p - ret.hi + q + la * lb;
153 » return (ret); 149 return (ret);
154 } 150 }
155 151
156 /* 152 /*
157 * Fused multiply-add: Compute x * y + z with a single rounding error. 153 * Fused multiply-add: Compute x * y + z with a single rounding error.
158 * 154 *
159 * We use scaling to avoid overflow/underflow, along with the 155 * We use scaling to avoid overflow/underflow, along with the
160 * canonical precision-doubling technique adapted from: 156 * canonical precision-doubling technique adapted from:
161 * 157 *
162 * Dekker, T. A Floating-Point Technique for Extending the 158 * Dekker, T. A Floating-Point Technique for Extending the
163 * Available Precision. Numer. Math. 18, 224-242 (1971). 159 * Available Precision. Numer. Math. 18, 224-242 (1971).
164 */ 160 */
165 long double fmal(long double x, long double y, long double z) 161 long double fmal(long double x, long double y, long double z) {
166 { 162 PRAGMA_STDC_FENV_ACCESS_ON
167 » PRAGMA_STDC_FENV_ACCESS_ON 163 long double xs, ys, zs, adj;
168 » long double xs, ys, zs, adj; 164 struct dd xy, r;
169 » struct dd xy, r; 165 int oround;
170 » int oround; 166 int ex, ey, ez;
171 » int ex, ey, ez; 167 int spread;
172 » int spread;
173 168
174 » /* 169 /*
175 » * Handle special cases. The order of operations and the particular 170 * Handle special cases. The order of operations and the particular
176 » * return values here are crucial in handling special cases involving 171 * return values here are crucial in handling special cases involving
177 » * infinities, NaNs, overflows, and signed zeroes correctly. 172 * infinities, NaNs, overflows, and signed zeroes correctly.
178 » */ 173 */
179 » if (!isfinite(x) || !isfinite(y)) 174 if (!isfinite(x) || !isfinite(y))
180 » » return (x * y + z); 175 return (x * y + z);
181 » if (!isfinite(z)) 176 if (!isfinite(z))
182 » » return (z); 177 return (z);
183 » if (x == 0.0 || y == 0.0) 178 if (x == 0.0 || y == 0.0)
184 » » return (x * y + z); 179 return (x * y + z);
185 » if (z == 0.0) 180 if (z == 0.0)
186 » » return (x * y); 181 return (x * y);
187 182
188 » xs = frexpl(x, &ex); 183 xs = frexpl(x, &ex);
189 » ys = frexpl(y, &ey); 184 ys = frexpl(y, &ey);
190 » zs = frexpl(z, &ez); 185 zs = frexpl(z, &ez);
191 » oround = fegetround(); 186 oround = fegetround();
192 » spread = ex + ey - ez; 187 spread = ex + ey - ez;
193 188
194 » /* 189 /*
195 » * If x * y and z are many orders of magnitude apart, the scaling 190 * If x * y and z are many orders of magnitude apart, the scaling
196 » * will overflow, so we handle these cases specially. Rounding 191 * will overflow, so we handle these cases specially. Rounding
197 » * modes other than FE_TONEAREST are painful. 192 * modes other than FE_TONEAREST are painful.
198 » */ 193 */
199 » if (spread < -LDBL_MANT_DIG) { 194 if (spread < -LDBL_MANT_DIG) {
200 #ifdef FE_INEXACT 195 #ifdef FE_INEXACT
201 » » feraiseexcept(FE_INEXACT); 196 feraiseexcept(FE_INEXACT);
202 #endif 197 #endif
203 #ifdef FE_UNDERFLOW 198 #ifdef FE_UNDERFLOW
204 » » if (!isnormal(z)) 199 if (!isnormal(z))
205 » » » feraiseexcept(FE_UNDERFLOW); 200 feraiseexcept(FE_UNDERFLOW);
206 #endif 201 #endif
207 » » switch (oround) { 202 switch (oround) {
208 » » default: /* FE_TONEAREST */ 203 default: /* FE_TONEAREST */
209 » » » return (z); 204 return (z);
210 #ifdef FE_TOWARDZERO 205 #ifdef FE_TOWARDZERO
211 » » case FE_TOWARDZERO: 206 case FE_TOWARDZERO:
212 » » » if (x > 0.0 ^ y < 0.0 ^ z < 0.0) 207 if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
213 » » » » return (z); 208 return (z);
214 » » » else 209 else
215 » » » » return (nextafterl(z, 0)); 210 return (nextafterl(z, 0));
216 #endif 211 #endif
217 #ifdef FE_DOWNWARD 212 #ifdef FE_DOWNWARD
218 » » case FE_DOWNWARD: 213 case FE_DOWNWARD:
219 » » » if (x > 0.0 ^ y < 0.0) 214 if (x > 0.0 ^ y < 0.0)
220 » » » » return (z); 215 return (z);
221 » » » else 216 else
222 » » » » return (nextafterl(z, -INFINITY)); 217 return (nextafterl(z, -INFINITY));
223 #endif 218 #endif
224 #ifdef FE_UPWARD 219 #ifdef FE_UPWARD
225 » » case FE_UPWARD: 220 case FE_UPWARD:
226 » » » if (x > 0.0 ^ y < 0.0) 221 if (x > 0.0 ^ y < 0.0)
227 » » » » return (nextafterl(z, INFINITY)); 222 return (nextafterl(z, INFINITY));
228 » » » else 223 else
229 » » » » return (z); 224 return (z);
230 #endif 225 #endif
231 » » } 226 }
232 » } 227 }
233 » if (spread <= LDBL_MANT_DIG * 2) 228 if (spread <= LDBL_MANT_DIG * 2)
234 » » zs = scalbnl(zs, -spread); 229 zs = scalbnl(zs, -spread);
235 » else 230 else
236 » » zs = copysignl(LDBL_MIN, zs); 231 zs = copysignl(LDBL_MIN, zs);
237 232
238 » fesetround(FE_TONEAREST); 233 fesetround(FE_TONEAREST);
239 234
240 » /* 235 /*
241 » * Basic approach for round-to-nearest: 236 * Basic approach for round-to-nearest:
242 » * 237 *
243 » * (xy.hi, xy.lo) = x * y (exact) 238 * (xy.hi, xy.lo) = x * y (exact)
244 » * (r.hi, r.lo) = xy.hi + z (exact) 239 * (r.hi, r.lo) = xy.hi + z (exact)
245 » * adj = xy.lo + r.lo (inexact; low bit is sticky) 240 * adj = xy.lo + r.lo (inexact; low bit is sticky)
246 » * result = r.hi + adj (correctly rounded) 241 * result = r.hi + adj (correctly rounded)
247 » */ 242 */
248 » xy = dd_mul(xs, ys); 243 xy = dd_mul(xs, ys);
249 » r = dd_add(xy.hi, zs); 244 r = dd_add(xy.hi, zs);
250 245
251 » spread = ex + ey; 246 spread = ex + ey;
252 247
253 » if (r.hi == 0.0) { 248 if (r.hi == 0.0) {
254 » » /* 249 /*
255 » » * When the addends cancel to 0, ensure that the result has 250 * When the addends cancel to 0, ensure that the result has
256 » » * the correct sign. 251 * the correct sign.
257 » » */ 252 */
258 » » fesetround(oround); 253 fesetround(oround);
259 » » volatile long double vzs = zs; /* XXX gcc CSE bug workaround */ 254 volatile long double vzs = zs; /* XXX gcc CSE bug workaround */
260 » » return xy.hi + vzs + scalbnl(xy.lo, spread); 255 return xy.hi + vzs + scalbnl(xy.lo, spread);
261 » } 256 }
262 257
263 » if (oround != FE_TONEAREST) { 258 if (oround != FE_TONEAREST) {
264 » » /* 259 /*
265 » » * There is no need to worry about double rounding in directed 260 * There is no need to worry about double rounding in directed
266 » » * rounding modes. 261 * rounding modes.
267 » » * But underflow may not be raised correctly, example in downwar d rounding: 262 * But underflow may not be raised correctly, example in downward rounding:
268 » » * fmal(0x1.0000000001p-16000L, 0x1.0000000001p-400L, -0x1p-1644 0L) 263 * fmal(0x1.0000000001p-16000L, 0x1.0000000001p-400L, -0x1p-16440L)
269 » » */ 264 */
270 » » long double ret; 265 long double ret;
271 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW) 266 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
272 » » int e = fetestexcept(FE_INEXACT); 267 int e = fetestexcept(FE_INEXACT);
273 » » feclearexcept(FE_INEXACT); 268 feclearexcept(FE_INEXACT);
274 #endif 269 #endif
275 » » fesetround(oround); 270 fesetround(oround);
276 » » adj = r.lo + xy.lo; 271 adj = r.lo + xy.lo;
277 » » ret = scalbnl(r.hi + adj, spread); 272 ret = scalbnl(r.hi + adj, spread);
278 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW) 273 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
279 » » if (ilogbl(ret) < -16382 && fetestexcept(FE_INEXACT)) 274 if (ilogbl(ret) < -16382 && fetestexcept(FE_INEXACT))
280 » » » feraiseexcept(FE_UNDERFLOW); 275 feraiseexcept(FE_UNDERFLOW);
281 » » else if (e) 276 else if (e)
282 » » » feraiseexcept(FE_INEXACT); 277 feraiseexcept(FE_INEXACT);
283 #endif 278 #endif
284 » » return ret; 279 return ret;
285 » } 280 }
286 281
287 » adj = add_adjusted(r.lo, xy.lo); 282 adj = add_adjusted(r.lo, xy.lo);
288 » if (spread + ilogbl(r.hi) > -16383) 283 if (spread + ilogbl(r.hi) > -16383)
289 » » return scalbnl(r.hi + adj, spread); 284 return scalbnl(r.hi + adj, spread);
290 » else 285 else
291 » » return add_and_denormalize(r.hi, adj, spread); 286 return add_and_denormalize(r.hi, adj, spread);
292 } 287 }
293 #endif 288 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698