OLD | NEW |
(Empty) | |
| 1 # exp(x) = 2^hi + 2^hi (2^lo - 1) |
| 2 # where hi+lo = log2e*x with 128bit precision |
| 3 # exact log2e*x calculation depends on nearest rounding mode |
| 4 # using the exact multiplication method of Dekker and Veltkamp |
| 5 |
| 6 .global expl |
| 7 .type expl,@function |
| 8 expl: |
| 9 fldt 4(%esp) |
| 10 |
| 11 # interesting case: 0x1p-32 <= |x| < 16384 |
| 12 # check if (exponent|0x8000) is in [0xbfff-32, 0xbfff+13] |
| 13 mov 12(%esp), %ax |
| 14 or $0x8000, %ax |
| 15 sub $0xbfdf, %ax |
| 16 cmp $45, %ax |
| 17 jbe 2f |
| 18 test %ax, %ax |
| 19 fld1 |
| 20 js 1f |
| 21 # if |x|>=0x1p14 or nan return 2^trunc(x) |
| 22 fscale |
| 23 fstp %st(1) |
| 24 ret |
| 25 # if |x|<0x1p-32 return 1+x |
| 26 1: faddp |
| 27 ret |
| 28 |
| 29 # should be 0x1.71547652b82fe178p0L == 0x3fff b8aa3b29 5c17f0bc |
| 30 # it will be wrong on non-nearest rounding mode |
| 31 2: fldl2e |
| 32 subl $44, %esp |
| 33 # hi = log2e_hi*x |
| 34 # 2^hi = exp2l(hi) |
| 35 fmul %st(1),%st |
| 36 fld %st(0) |
| 37 fstpt (%esp) |
| 38 fstpt 16(%esp) |
| 39 fstpt 32(%esp) |
| 40 .hidden __exp2l |
| 41 call __exp2l |
| 42 # if 2^hi == inf return 2^hi |
| 43 fld %st(0) |
| 44 fstpt (%esp) |
| 45 cmpw $0x7fff, 8(%esp) |
| 46 je 1f |
| 47 fldt 32(%esp) |
| 48 fldt 16(%esp) |
| 49 # fpu stack: 2^hi x hi |
| 50 # exact mult: x*log2e |
| 51 fld %st(1) |
| 52 # c = 0x1p32+1 |
| 53 pushl $0x41f00000 |
| 54 pushl $0x00100000 |
| 55 fldl (%esp) |
| 56 # xh = x - c*x + c*x |
| 57 # xl = x - xh |
| 58 fmulp |
| 59 fld %st(2) |
| 60 fsub %st(1), %st |
| 61 faddp |
| 62 fld %st(2) |
| 63 fsub %st(1), %st |
| 64 # yh = log2e_hi - c*log2e_hi + c*log2e_hi |
| 65 pushl $0x3ff71547 |
| 66 pushl $0x65200000 |
| 67 fldl (%esp) |
| 68 # fpu stack: 2^hi x hi xh xl yh |
| 69 # lo = hi - xh*yh + xl*yh |
| 70 fld %st(2) |
| 71 fmul %st(1), %st |
| 72 fsubp %st, %st(4) |
| 73 fmul %st(1), %st |
| 74 faddp %st, %st(3) |
| 75 # yl = log2e_hi - yh |
| 76 pushl $0x3de705fc |
| 77 pushl $0x2f000000 |
| 78 fldl (%esp) |
| 79 # fpu stack: 2^hi x lo xh xl yl |
| 80 # lo += xh*yl + xl*yl |
| 81 fmul %st, %st(2) |
| 82 fmulp %st, %st(1) |
| 83 fxch %st(2) |
| 84 faddp |
| 85 faddp |
| 86 # log2e_lo |
| 87 pushl $0xbfbe |
| 88 pushl $0x82f0025f |
| 89 pushl $0x2dc582ee |
| 90 fldt (%esp) |
| 91 addl $36,%esp |
| 92 # fpu stack: 2^hi x lo log2e_lo |
| 93 # lo += log2e_lo*x |
| 94 # return 2^hi + 2^hi (2^lo - 1) |
| 95 fmulp %st, %st(2) |
| 96 faddp |
| 97 f2xm1 |
| 98 fmul %st(1), %st |
| 99 faddp |
| 100 1: addl $44, %esp |
| 101 ret |
OLD | NEW |