OLD | NEW |
| (Empty) |
1 # exp(x) = 2^hi + 2^hi (2^lo - 1) | |
2 # where hi+lo = log2e*x with 128bit precision | |
3 # exact log2e*x calculation depends on nearest rounding mode | |
4 # using the exact multiplication method of Dekker and Veltkamp | |
5 | |
6 .global expl | |
7 .type expl,@function | |
8 expl: | |
9 fldt 4(%esp) | |
10 | |
11 # interesting case: 0x1p-32 <= |x| < 16384 | |
12 # check if (exponent|0x8000) is in [0xbfff-32, 0xbfff+13] | |
13 mov 12(%esp), %ax | |
14 or $0x8000, %ax | |
15 sub $0xbfdf, %ax | |
16 cmp $45, %ax | |
17 jbe 2f | |
18 test %ax, %ax | |
19 fld1 | |
20 js 1f | |
21 # if |x|>=0x1p14 or nan return 2^trunc(x) | |
22 fscale | |
23 fstp %st(1) | |
24 ret | |
25 # if |x|<0x1p-32 return 1+x | |
26 1: faddp | |
27 ret | |
28 | |
29 # should be 0x1.71547652b82fe178p0L == 0x3fff b8aa3b29 5c17f0bc | |
30 # it will be wrong on non-nearest rounding mode | |
31 2: fldl2e | |
32 subl $44, %esp | |
33 # hi = log2e_hi*x | |
34 # 2^hi = exp2l(hi) | |
35 fmul %st(1),%st | |
36 fld %st(0) | |
37 fstpt (%esp) | |
38 fstpt 16(%esp) | |
39 fstpt 32(%esp) | |
40 .hidden __exp2l | |
41 call __exp2l | |
42 # if 2^hi == inf return 2^hi | |
43 fld %st(0) | |
44 fstpt (%esp) | |
45 cmpw $0x7fff, 8(%esp) | |
46 je 1f | |
47 fldt 32(%esp) | |
48 fldt 16(%esp) | |
49 # fpu stack: 2^hi x hi | |
50 # exact mult: x*log2e | |
51 fld %st(1) | |
52 # c = 0x1p32+1 | |
53 pushl $0x41f00000 | |
54 pushl $0x00100000 | |
55 fldl (%esp) | |
56 # xh = x - c*x + c*x | |
57 # xl = x - xh | |
58 fmulp | |
59 fld %st(2) | |
60 fsub %st(1), %st | |
61 faddp | |
62 fld %st(2) | |
63 fsub %st(1), %st | |
64 # yh = log2e_hi - c*log2e_hi + c*log2e_hi | |
65 pushl $0x3ff71547 | |
66 pushl $0x65200000 | |
67 fldl (%esp) | |
68 # fpu stack: 2^hi x hi xh xl yh | |
69 # lo = hi - xh*yh + xl*yh | |
70 fld %st(2) | |
71 fmul %st(1), %st | |
72 fsubp %st, %st(4) | |
73 fmul %st(1), %st | |
74 faddp %st, %st(3) | |
75 # yl = log2e_hi - yh | |
76 pushl $0x3de705fc | |
77 pushl $0x2f000000 | |
78 fldl (%esp) | |
79 # fpu stack: 2^hi x lo xh xl yl | |
80 # lo += xh*yl + xl*yl | |
81 fmul %st, %st(2) | |
82 fmulp %st, %st(1) | |
83 fxch %st(2) | |
84 faddp | |
85 faddp | |
86 # log2e_lo | |
87 pushl $0xbfbe | |
88 pushl $0x82f0025f | |
89 pushl $0x2dc582ee | |
90 fldt (%esp) | |
91 addl $36,%esp | |
92 # fpu stack: 2^hi x lo log2e_lo | |
93 # lo += log2e_lo*x | |
94 # return 2^hi + 2^hi (2^lo - 1) | |
95 fmulp %st, %st(2) | |
96 faddp | |
97 f2xm1 | |
98 fmul %st(1), %st | |
99 faddp | |
100 1: addl $44, %esp | |
101 ret | |
OLD | NEW |