fusl/src/math/cbrt.c - Issue 1714623002: [fusl] clang-format fusl

Side by Side Diff: fusl/src/math/cbrt.c

Issue 1714623002: [fusl] clang-format fusl (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: headers too Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /* origin: FreeBSD /usr/src/lib/msun/src/s_cbrt.c */	1 /* origin: FreeBSD /usr/src/lib/msun/src/s_cbrt.c */

2 /*	2 /*

3 * ====================================================	3 * ====================================================

4 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.	4 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.

5 *	5 *

6 * Developed at SunPro, a Sun Microsystems, Inc. business.	6 * Developed at SunPro, a Sun Microsystems, Inc. business.

7 * Permission to use, copy, modify, and distribute this	7 * Permission to use, copy, modify, and distribute this

8 * software is freely granted, provided that this notice	8 * software is freely granted, provided that this notice

9 * is preserved.	9 * is preserved.

10 * ====================================================	10 * ====================================================

11 *	11 *

12 * Optimized by Bruce D. Evans.	12 * Optimized by Bruce D. Evans.

13 */	13 */

14 /* cbrt(x)	14 /* cbrt(x)

15 * Return cube root of x	15 * Return cube root of x

16 */	16 */

17	17

18 #include <math.h>	18 #include <math.h>

19 #include <stdint.h>	19 #include <stdint.h>

20	20

21 static const uint32_t	21 static const uint32_t

22 B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)220 /	22 B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)220 /

23 B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)220 /	23 B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)220 /

24	24

25 /* \|1/cbrt(x) - p(x)\| < 2*-23.5 (~[-7.93e-8, 7.929e-8]). /	25 /* \|1/cbrt(x) - p(x)\| < 2*-23.5 (~[-7.93e-8, 7.929e-8]). /

26 static const double	26 static const double P0 = 1.87595182427177009643, /* 0x3ffe03e6, 0x0f61e692 */

27 P0 = 1.87595182427177009643, /* 0x3ffe03e6, 0x0f61e692 */	27 P1 = -1.88497979543377169875, /* 0xbffe28e0, 0x92f02420 */

28 P1 = -1.88497979543377169875, /* 0xbffe28e0, 0x92f02420 */	28 P2 = 1.621429720105354466140, /* 0x3ff9f160, 0x4a49d6c2 */

29 P2 = 1.621429720105354466140, /* 0x3ff9f160, 0x4a49d6c2 */	29 P3 = -0.758397934778766047437, /* 0xbfe844cb, 0xbee751d9 */

30 P3 = -0.758397934778766047437, /* 0xbfe844cb, 0xbee751d9 */	30 P4 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */

31 P4 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */

32	31

33 double cbrt(double x)	32 double cbrt(double x) {

34 {	33 union {

35 » union {double f; uint64_t i;} u = {x};	34 double f;

36 » double_t r,s,t,w;	35 uint64_t i;

37 » uint32_t hx = u.i>>32 & 0x7fffffff;	36 } u = {x};

	37 double_t r, s, t, w;

	38 uint32_t hx = u.i >> 32 & 0x7fffffff;

38	39

39 » if (hx >= 0x7ff00000) /* cbrt(NaN,INF) is itself */	40 if (hx >= 0x7ff00000) /* cbrt(NaN,INF) is itself */

40 » » return x+x;	41 return x + x;

41	42

42 » /*	43 /*

43 » * Rough cbrt to 5 bits:	44 * Rough cbrt to 5 bits:

44 » * cbrt(2*e(1+m) ~= 2*(e/3)(1+(e%3+m)/3)	45 * cbrt(2*e(1+m) ~= 2*(e/3)(1+(e%3+m)/3)

45 » * where e is integral and >= 0, m is real and in [0, 1), and "/" and	46 * where e is integral and >= 0, m is real and in [0, 1), and "/" and

46 » * "%" are integer division and modulus with rounding towards minus	47 * "%" are integer division and modulus with rounding towards minus

47 » * infinity. The RHS is always >= the LHS and has a maximum relative	48 * infinity. The RHS is always >= the LHS and has a maximum relative

48 » * error of about 1 in 16. Adding a bias of -0.03306235651 to the	49 * error of about 1 in 16. Adding a bias of -0.03306235651 to the

49 » * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE	50 * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE

50 » * floating point representation, for finite positive normal values,	51 * floating point representation, for finite positive normal values,

51 » * ordinary integer divison of the value in bits magically gives	52 * ordinary integer divison of the value in bits magically gives

52 » * almost exactly the RHS of the above provided we first subtract the	53 * almost exactly the RHS of the above provided we first subtract the

53 » * exponent bias (1023 for doubles) and later add it back. We do the	54 * exponent bias (1023 for doubles) and later add it back. We do the

54 » * subtraction virtually to keep e >= 0 so that ordinary integer	55 * subtraction virtually to keep e >= 0 so that ordinary integer

55 » * division rounds towards minus infinity; this is also efficient.	56 * division rounds towards minus infinity; this is also efficient.

56 » */	57 */

57 » if (hx < 0x00100000) { /* zero or subnormal? */	58 if (hx < 0x00100000) { /* zero or subnormal? */

58 » » u.f = x*0x1p54;	59 u.f = x * 0x1p54;

59 » » hx = u.i>>32 & 0x7fffffff;	60 hx = u.i >> 32 & 0x7fffffff;

60 » » if (hx == 0)	61 if (hx == 0)

61 » » » return x; /* cbrt(0) is itself */	62 return x; /* cbrt(0) is itself */

62 » » hx = hx/3 + B2;	63 hx = hx / 3 + B2;

63 » } else	64 } else

64 » » hx = hx/3 + B1;	65 hx = hx / 3 + B1;

65 » u.i &= 1ULL<<63;	66 u.i &= 1ULL << 63;

66 » u.i \|= (uint64_t)hx << 32;	67 u.i \|= (uint64_t)hx << 32;

67 » t = u.f;	68 t = u.f;

68	69

69 » /*	70 /*

70 » * New cbrt to 23 bits:	71 * New cbrt to 23 bits:

71 » * cbrt(x) = tcbrt(x/t3) ~= tP(t**3/x)	72 * cbrt(x) = tcbrt(x/t3) ~= tP(t**3/x)

72 » * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)	73 * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)

73 » * to within 2**-23.5 when \|r - 1\| < 1/10. The rough approximation	74 * to within 2**-23.5 when \|r - 1\| < 1/10. The rough approximation

74 » * has produced t such than \|t/cbrt(x) - 1\| ~< 1/32, and cubing this	75 * has produced t such than \|t/cbrt(x) - 1\| ~< 1/32, and cubing this

75 » * gives us bounds for r = t**3/x.	76 * gives us bounds for r = t**3/x.

76 » *	77 *

77 » * Try to optimize for parallel evaluation as in __tanf.c.	78 * Try to optimize for parallel evaluation as in __tanf.c.

78 » */	79 */

79 » r = (tt)(t/x);	80 r = (t * t) * (t / x);

80 » t = t((P0+r(P1+rP2))+((rr)r)(P3+r*P4));	81 t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4));

81	82

82 » /*	83 /*

83 » * Round t away from zero to 23 bits (sloppily except for ensuring that	84 * Round t away from zero to 23 bits (sloppily except for ensuring that

84 » * the result is larger in magnitude than cbrt(x) but not much more than	85 * the result is larger in magnitude than cbrt(x) but not much more than

85 » * 2 23-bit ulps larger). With rounding towards zero, the error bound	86 * 2 23-bit ulps larger). With rounding towards zero, the error bound

86 » * would be ~5/6 instead of ~4/6. With a maximum error of 2 23-bit ulps	87 * would be ~5/6 instead of ~4/6. With a maximum error of 2 23-bit ulps

87 » * in the rounded t, the infinite-precision error in the Newton	88 * in the rounded t, the infinite-precision error in the Newton

88 » * approximation barely affects third digit in the final error	89 * approximation barely affects third digit in the final error

89 » * 0.667; the error in the rounded t can be up to about 3 23-bit ulps	90 * 0.667; the error in the rounded t can be up to about 3 23-bit ulps

90 » * before the final error is larger than 0.667 ulps.	91 * before the final error is larger than 0.667 ulps.

91 » */	92 */

92 » u.f = t;	93 u.f = t;

93 » u.i = (u.i + 0x80000000) & 0xffffffffc0000000ULL;	94 u.i = (u.i + 0x80000000) & 0xffffffffc0000000ULL;

94 » t = u.f;	95 t = u.f;

95	96

96 » /* one step Newton iteration to 53 bits with error < 0.667 ulps */	97 /* one step Newton iteration to 53 bits with error < 0.667 ulps */

97 » s = tt; / tt is exact /	98 s = t * t; /* tt is exact /

98 » r = x/s; /* error <= 0.5 ulps; \|r\| < \|t\| */	99 r = x / s; /* error <= 0.5 ulps; \|r\| < \|t\| */

99 » w = t+t; /* t+t is exact */	100 w = t + t; /* t+t is exact */

100 » r = (r-t)/(w+r); /* r-t is exact; w+r ~= 3t /	101 r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3t /

101 » t = t+tr; / error <= 0.5 + 0.5/3 + epsilon */	102 t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */

102 » return t;	103 return t;

103 }	104 }

OLD	NEW

« fusl/arch/aarch64/atomic_arch.h ('K') | « fusl/src/math/atanl.c ('k') | fusl/src/math/cbrtf.c » ('j') | no next file with comments »