OLD | NEW |
1 /* origin: FreeBSD /usr/src/lib/msun/src/s_fmaf.c */ | 1 /* origin: FreeBSD /usr/src/lib/msun/src/s_fmaf.c */ |
2 /*- | 2 /*- |
3 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> | 3 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> |
4 * All rights reserved. | 4 * All rights reserved. |
5 * | 5 * |
6 * Redistribution and use in source and binary forms, with or without | 6 * Redistribution and use in source and binary forms, with or without |
7 * modification, are permitted provided that the following conditions | 7 * modification, are permitted provided that the following conditions |
8 * are met: | 8 * are met: |
9 * 1. Redistributions of source code must retain the above copyright | 9 * 1. Redistributions of source code must retain the above copyright |
10 * notice, this list of conditions and the following disclaimer. | 10 * notice, this list of conditions and the following disclaimer. |
(...skipping 19 matching lines...) Expand all Loading... |
30 #include <stdint.h> | 30 #include <stdint.h> |
31 #include "libm.h" | 31 #include "libm.h" |
32 | 32 |
33 /* | 33 /* |
34 * Fused multiply-add: Compute x * y + z with a single rounding error. | 34 * Fused multiply-add: Compute x * y + z with a single rounding error. |
35 * | 35 * |
36 * A double has more than twice as much precision than a float, so | 36 * A double has more than twice as much precision than a float, so |
37 * direct double-precision arithmetic suffices, except where double | 37 * direct double-precision arithmetic suffices, except where double |
38 * rounding occurs. | 38 * rounding occurs. |
39 */ | 39 */ |
40 float fmaf(float x, float y, float z) | 40 float fmaf(float x, float y, float z) { |
41 { | 41 PRAGMA_STDC_FENV_ACCESS_ON |
42 » PRAGMA_STDC_FENV_ACCESS_ON | 42 double xy, result; |
43 » double xy, result; | 43 union { |
44 » union {double f; uint64_t i;} u; | 44 double f; |
45 » int e; | 45 uint64_t i; |
| 46 } u; |
| 47 int e; |
46 | 48 |
47 » xy = (double)x * y; | 49 xy = (double)x * y; |
48 » result = xy + z; | 50 result = xy + z; |
49 » u.f = result; | 51 u.f = result; |
50 » e = u.i>>52 & 0x7ff; | 52 e = u.i >> 52 & 0x7ff; |
51 » /* Common case: The double precision result is fine. */ | 53 /* Common case: The double precision result is fine. */ |
52 » if ((u.i & 0x1fffffff) != 0x10000000 || /* not a halfway case */ | 54 if ((u.i & 0x1fffffff) != 0x10000000 || /* not a halfway case */ |
53 » » e == 0x7ff || /* NaN */ | 55 e == 0x7ff || /* NaN */ |
54 » » result - xy == z || /* exact */ | 56 result - xy == z || /* exact */ |
55 » » fegetround() != FE_TONEAREST) /* not round-to-nearest */ | 57 fegetround() != FE_TONEAREST) /* not round-to-nearest */ |
56 » { | 58 { |
57 » » /* | 59 /* |
58 » » underflow may not be raised correctly, example: | 60 underflow may not be raised correctly, example: |
59 » » fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) | 61 fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) |
60 » » */ | 62 */ |
61 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW) | 63 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW) |
62 » » if (e < 0x3ff-126 && e >= 0x3ff-149 && fetestexcept(FE_INEXACT))
{ | 64 if (e < 0x3ff - 126 && e >= 0x3ff - 149 && fetestexcept(FE_INEXACT)) { |
63 » » » feclearexcept(FE_INEXACT); | 65 feclearexcept(FE_INEXACT); |
64 » » » /* TODO: gcc and clang bug workaround */ | 66 /* TODO: gcc and clang bug workaround */ |
65 » » » volatile float vz = z; | 67 volatile float vz = z; |
66 » » » result = xy + vz; | 68 result = xy + vz; |
67 » » » if (fetestexcept(FE_INEXACT)) | 69 if (fetestexcept(FE_INEXACT)) |
68 » » » » feraiseexcept(FE_UNDERFLOW); | 70 feraiseexcept(FE_UNDERFLOW); |
69 » » » else | 71 else |
70 » » » » feraiseexcept(FE_INEXACT); | 72 feraiseexcept(FE_INEXACT); |
71 » » } | 73 } |
72 #endif | 74 #endif |
73 » » z = result; | 75 z = result; |
74 » » return z; | 76 return z; |
75 » } | 77 } |
76 | 78 |
77 » /* | 79 /* |
78 » * If result is inexact, and exactly halfway between two float values, | 80 * If result is inexact, and exactly halfway between two float values, |
79 » * we need to adjust the low-order bit in the direction of the error. | 81 * we need to adjust the low-order bit in the direction of the error. |
80 » */ | 82 */ |
81 #ifdef FE_TOWARDZERO | 83 #ifdef FE_TOWARDZERO |
82 » fesetround(FE_TOWARDZERO); | 84 fesetround(FE_TOWARDZERO); |
83 #endif | 85 #endif |
84 » volatile double vxy = xy; /* XXX work around gcc CSE bug */ | 86 volatile double vxy = xy; /* XXX work around gcc CSE bug */ |
85 » double adjusted_result = vxy + z; | 87 double adjusted_result = vxy + z; |
86 » fesetround(FE_TONEAREST); | 88 fesetround(FE_TONEAREST); |
87 » if (result == adjusted_result) { | 89 if (result == adjusted_result) { |
88 » » u.f = adjusted_result; | 90 u.f = adjusted_result; |
89 » » u.i++; | 91 u.i++; |
90 » » adjusted_result = u.f; | 92 adjusted_result = u.f; |
91 » } | 93 } |
92 » z = adjusted_result; | 94 z = adjusted_result; |
93 » return z; | 95 return z; |
94 } | 96 } |
OLD | NEW |