OLD | NEW |
(Empty) | |
| 1 # exp(x) = 2^hi + 2^hi (2^lo - 1) |
| 2 # where hi+lo = log2e*x with 128bit precision |
| 3 # exact log2e*x calculation depends on nearest rounding mode |
| 4 # using the exact multiplication method of Dekker and Veltkamp |
| 5 |
| 6 .global expl |
| 7 .type expl,@function |
| 8 expl: |
| 9 fldt 8(%rsp) |
| 10 |
| 11 # interesting case: 0x1p-32 <= |x| < 16384 |
| 12 # check if (exponent|0x8000) is in [0xbfff-32, 0xbfff+13] |
| 13 mov 16(%rsp), %ax |
| 14 or $0x8000, %ax |
| 15 sub $0xbfdf, %ax |
| 16 cmp $45, %ax |
| 17 jbe 2f |
| 18 test %ax, %ax |
| 19 fld1 |
| 20 js 1f |
| 21 # if |x|>=0x1p14 or nan return 2^trunc(x) |
| 22 fscale |
| 23 fstp %st(1) |
| 24 ret |
| 25 # if |x|<0x1p-32 return 1+x |
| 26 1: faddp |
| 27 ret |
| 28 |
| 29 # should be 0x1.71547652b82fe178p0L == 0x3fff b8aa3b29 5c17f0bc |
| 30 # it will be wrong on non-nearest rounding mode |
| 31 2: fldl2e |
| 32 subq $48, %rsp |
| 33 # hi = log2e_hi*x |
| 34 # 2^hi = exp2l(hi) |
| 35 fmul %st(1),%st |
| 36 fld %st(0) |
| 37 fstpt (%rsp) |
| 38 fstpt 16(%rsp) |
| 39 fstpt 32(%rsp) |
| 40 call exp2l@PLT |
| 41 # if 2^hi == inf return 2^hi |
| 42 fld %st(0) |
| 43 fstpt (%rsp) |
| 44 cmpw $0x7fff, 8(%rsp) |
| 45 je 1f |
| 46 fldt 32(%rsp) |
| 47 fldt 16(%rsp) |
| 48 # fpu stack: 2^hi x hi |
| 49 # exact mult: x*log2e |
| 50 fld %st(1) |
| 51 # c = 0x1p32+1 |
| 52 movq $0x41f0000000100000,%rax |
| 53 pushq %rax |
| 54 fldl (%rsp) |
| 55 # xh = x - c*x + c*x |
| 56 # xl = x - xh |
| 57 fmulp |
| 58 fld %st(2) |
| 59 fsub %st(1), %st |
| 60 faddp |
| 61 fld %st(2) |
| 62 fsub %st(1), %st |
| 63 # yh = log2e_hi - c*log2e_hi + c*log2e_hi |
| 64 movq $0x3ff7154765200000,%rax |
| 65 pushq %rax |
| 66 fldl (%rsp) |
| 67 # fpu stack: 2^hi x hi xh xl yh |
| 68 # lo = hi - xh*yh + xl*yh |
| 69 fld %st(2) |
| 70 fmul %st(1), %st |
| 71 fsubp %st, %st(4) |
| 72 fmul %st(1), %st |
| 73 faddp %st, %st(3) |
| 74 # yl = log2e_hi - yh |
| 75 movq $0x3de705fc2f000000,%rax |
| 76 pushq %rax |
| 77 fldl (%rsp) |
| 78 # fpu stack: 2^hi x lo xh xl yl |
| 79 # lo += xh*yl + xl*yl |
| 80 fmul %st, %st(2) |
| 81 fmulp %st, %st(1) |
| 82 fxch %st(2) |
| 83 faddp |
| 84 faddp |
| 85 # log2e_lo |
| 86 movq $0xbfbe,%rax |
| 87 pushq %rax |
| 88 movq $0x82f0025f2dc582ee,%rax |
| 89 pushq %rax |
| 90 fldt (%rsp) |
| 91 addq $40,%rsp |
| 92 # fpu stack: 2^hi x lo log2e_lo |
| 93 # lo += log2e_lo*x |
| 94 # return 2^hi + 2^hi (2^lo - 1) |
| 95 fmulp %st, %st(2) |
| 96 faddp |
| 97 f2xm1 |
| 98 fmul %st(1), %st |
| 99 faddp |
| 100 1: addq $48, %rsp |
| 101 ret |
OLD | NEW |