| OLD | NEW |
| 1 /* origin: FreeBSD /usr/src/lib/msun/src/s_fmaf.c */ | 1 /* origin: FreeBSD /usr/src/lib/msun/src/s_fmaf.c */ |
| 2 /*- | 2 /*- |
| 3 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> | 3 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> |
| 4 * All rights reserved. | 4 * All rights reserved. |
| 5 * | 5 * |
| 6 * Redistribution and use in source and binary forms, with or without | 6 * Redistribution and use in source and binary forms, with or without |
| 7 * modification, are permitted provided that the following conditions | 7 * modification, are permitted provided that the following conditions |
| 8 * are met: | 8 * are met: |
| 9 * 1. Redistributions of source code must retain the above copyright | 9 * 1. Redistributions of source code must retain the above copyright |
| 10 * notice, this list of conditions and the following disclaimer. | 10 * notice, this list of conditions and the following disclaimer. |
| (...skipping 19 matching lines...) Expand all Loading... |
| 30 #include <stdint.h> | 30 #include <stdint.h> |
| 31 #include "libm.h" | 31 #include "libm.h" |
| 32 | 32 |
| 33 /* | 33 /* |
| 34 * Fused multiply-add: Compute x * y + z with a single rounding error. | 34 * Fused multiply-add: Compute x * y + z with a single rounding error. |
| 35 * | 35 * |
| 36 * A double has more than twice as much precision than a float, so | 36 * A double has more than twice as much precision than a float, so |
| 37 * direct double-precision arithmetic suffices, except where double | 37 * direct double-precision arithmetic suffices, except where double |
| 38 * rounding occurs. | 38 * rounding occurs. |
| 39 */ | 39 */ |
| 40 float fmaf(float x, float y, float z) | 40 float fmaf(float x, float y, float z) { |
| 41 { | 41 PRAGMA_STDC_FENV_ACCESS_ON |
| 42 » PRAGMA_STDC_FENV_ACCESS_ON | 42 double xy, result; |
| 43 » double xy, result; | 43 union { |
| 44 » union {double f; uint64_t i;} u; | 44 double f; |
| 45 » int e; | 45 uint64_t i; |
| 46 } u; |
| 47 int e; |
| 46 | 48 |
| 47 » xy = (double)x * y; | 49 xy = (double)x * y; |
| 48 » result = xy + z; | 50 result = xy + z; |
| 49 » u.f = result; | 51 u.f = result; |
| 50 » e = u.i>>52 & 0x7ff; | 52 e = u.i >> 52 & 0x7ff; |
| 51 » /* Common case: The double precision result is fine. */ | 53 /* Common case: The double precision result is fine. */ |
| 52 » if ((u.i & 0x1fffffff) != 0x10000000 || /* not a halfway case */ | 54 if ((u.i & 0x1fffffff) != 0x10000000 || /* not a halfway case */ |
| 53 » » e == 0x7ff || /* NaN */ | 55 e == 0x7ff || /* NaN */ |
| 54 » » result - xy == z || /* exact */ | 56 result - xy == z || /* exact */ |
| 55 » » fegetround() != FE_TONEAREST) /* not round-to-nearest */ | 57 fegetround() != FE_TONEAREST) /* not round-to-nearest */ |
| 56 » { | 58 { |
| 57 » » /* | 59 /* |
| 58 » » underflow may not be raised correctly, example: | 60 underflow may not be raised correctly, example: |
| 59 » » fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) | 61 fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) |
| 60 » » */ | 62 */ |
| 61 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW) | 63 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW) |
| 62 » » if (e < 0x3ff-126 && e >= 0x3ff-149 && fetestexcept(FE_INEXACT))
{ | 64 if (e < 0x3ff - 126 && e >= 0x3ff - 149 && fetestexcept(FE_INEXACT)) { |
| 63 » » » feclearexcept(FE_INEXACT); | 65 feclearexcept(FE_INEXACT); |
| 64 » » » /* TODO: gcc and clang bug workaround */ | 66 /* TODO: gcc and clang bug workaround */ |
| 65 » » » volatile float vz = z; | 67 volatile float vz = z; |
| 66 » » » result = xy + vz; | 68 result = xy + vz; |
| 67 » » » if (fetestexcept(FE_INEXACT)) | 69 if (fetestexcept(FE_INEXACT)) |
| 68 » » » » feraiseexcept(FE_UNDERFLOW); | 70 feraiseexcept(FE_UNDERFLOW); |
| 69 » » » else | 71 else |
| 70 » » » » feraiseexcept(FE_INEXACT); | 72 feraiseexcept(FE_INEXACT); |
| 71 » » } | 73 } |
| 72 #endif | 74 #endif |
| 73 » » z = result; | 75 z = result; |
| 74 » » return z; | 76 return z; |
| 75 » } | 77 } |
| 76 | 78 |
| 77 » /* | 79 /* |
| 78 » * If result is inexact, and exactly halfway between two float values, | 80 * If result is inexact, and exactly halfway between two float values, |
| 79 » * we need to adjust the low-order bit in the direction of the error. | 81 * we need to adjust the low-order bit in the direction of the error. |
| 80 » */ | 82 */ |
| 81 #ifdef FE_TOWARDZERO | 83 #ifdef FE_TOWARDZERO |
| 82 » fesetround(FE_TOWARDZERO); | 84 fesetround(FE_TOWARDZERO); |
| 83 #endif | 85 #endif |
| 84 » volatile double vxy = xy; /* XXX work around gcc CSE bug */ | 86 volatile double vxy = xy; /* XXX work around gcc CSE bug */ |
| 85 » double adjusted_result = vxy + z; | 87 double adjusted_result = vxy + z; |
| 86 » fesetround(FE_TONEAREST); | 88 fesetround(FE_TONEAREST); |
| 87 » if (result == adjusted_result) { | 89 if (result == adjusted_result) { |
| 88 » » u.f = adjusted_result; | 90 u.f = adjusted_result; |
| 89 » » u.i++; | 91 u.i++; |
| 90 » » adjusted_result = u.f; | 92 adjusted_result = u.f; |
| 91 » } | 93 } |
| 92 » z = adjusted_result; | 94 z = adjusted_result; |
| 93 » return z; | 95 return z; |
| 94 } | 96 } |
| OLD | NEW |