src/base/ieee754.cc - Issue 2073123002: [builtins] Introduce proper Float64Cos and Float64Sin.

Side by Side Diff: src/base/ieee754.cc

Issue 2073123002: [builtins] Introduce proper Float64Cos and Float64Sin. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Fix missing breaks Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm).	1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm).

2 //	2 //

3 // ====================================================	3 // ====================================================

4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.	4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.

5 //	5 //

6 // Developed at SunSoft, a Sun Microsystems, Inc. business.	6 // Developed at SunSoft, a Sun Microsystems, Inc. business.

7 // Permission to use, copy, modify, and distribute this	7 // Permission to use, copy, modify, and distribute this

8 // software is freely granted, provided that this notice	8 // software is freely granted, provided that this notice

9 // is preserved.	9 // is preserved.

10 // ====================================================	10 // ====================================================

(...skipping 150 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
161 ieee_double_shape_type sl_u; \	161 ieee_double_shape_type sl_u; \

162 sl_u.value = (d); \	162 sl_u.value = (d); \

163 sl_u.parts.lsw = (v); \	163 sl_u.parts.lsw = (v); \

164 (d) = sl_u.value; \	164 (d) = sl_u.value; \

165 } while (0)	165 } while (0)

166	166

167 /* Support macro. */	167 /* Support macro. */

168	168

169 #define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval))	169 #define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval))

170	170

	171 int32_t __ieee754_rem_pio2(double x, double *y) WARN_UNUSED_RESULT;

	172 double __kernel_cos(double x, double y) WARN_UNUSED_RESULT;

	173 int __kernel_rem_pio2(double x, double y, int e0, int nx, int prec,

	174 const int32_t *ipio2) WARN_UNUSED_RESULT;

	175 double __kernel_sin(double x, double y, int iy) WARN_UNUSED_RESULT;

	176

	177 /* __ieee754_rem_pio2(x,y)

	178 *

	179 * return the remainder of x rem pi/2 in y[0]+y[1]

	180 * use __kernel_rem_pio2()

	181 */

	182 int32_t __ieee754_rem_pio2(double x, double *y) {

	183 /*

	184 * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi

	185 */

	186 static const int32_t two_over_pi[] = {

	187 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C,

	188 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649,

	189 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, 0xA73EE8, 0x8235F5, 0x2EBB44,

	190 0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, 0x845F8B,

	191 0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D,

	192 0x367ECF, 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,

	193 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330,

	194 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, 0x91615E, 0xE61B08,

	195 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA,

	196 0x73A8C9, 0x60E27B, 0xC08C6B,

	197 };

	198

	199 static const int32_t npio2_hw[] = {

	200 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,

	201 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,

	202 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,

	203 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,

	204 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,

	205 0x404858EB, 0x404921FB,

	206 };

	207

	208 /*

	209 * invpio2: 53 bits of 2/pi

	210 * pio2_1: first 33 bit of pi/2

	211 * pio2_1t: pi/2 - pio2_1

	212 * pio2_2: second 33 bit of pi/2

	213 * pio2_2t: pi/2 - (pio2_1+pio2_2)

	214 * pio2_3: third 33 bit of pi/2

	215 * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)

	216 */

	217

	218 static const double

	219 zero = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */

	220 half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */

	221 two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */

	222 invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */

	223 pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */

	224 pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */

	225 pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */

	226 pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */

	227 pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */

	228 pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */

	229

	230 double z, w, t, r, fn;

	231 double tx[3];

	232 int32_t e0, i, j, nx, n, ix, hx;

	233 u_int32_t low;

	234

	235 z = 0;

	236 GET_HIGH_WORD(hx, x); /* high word of x */

	237 ix = hx & 0x7fffffff;

	238 if (ix <= 0x3fe921fb) { /* \|x\| ~<= pi/4 , no need for reduction */

	239 y[0] = x;

	240 y[1] = 0;

	241 return 0;

	242 }

	243 if (ix < 0x4002d97c) { /* \|x\| < 3pi/4, special case with n=+-1 */

	244 if (hx > 0) {

	245 z = x - pio2_1;

	246 if (ix != 0x3ff921fb) { /* 33+53 bit pi is good enough */

	247 y[0] = z - pio2_1t;

	248 y[1] = (z - y[0]) - pio2_1t;

	249 } else { /* near pi/2, use 33+33+53 bit pi */

	250 z -= pio2_2;

	251 y[0] = z - pio2_2t;

	252 y[1] = (z - y[0]) - pio2_2t;

	253 }

	254 return 1;

	255 } else { /* negative x */

	256 z = x + pio2_1;

	257 if (ix != 0x3ff921fb) { /* 33+53 bit pi is good enough */

	258 y[0] = z + pio2_1t;

	259 y[1] = (z - y[0]) + pio2_1t;

	260 } else { /* near pi/2, use 33+33+53 bit pi */

	261 z += pio2_2;

	262 y[0] = z + pio2_2t;

	263 y[1] = (z - y[0]) + pio2_2t;

	264 }

	265 return -1;

	266 }

	267 }

	268 if (ix <= 0x413921fb) { /* \|x\| ~<= 2^19(pi/2), medium size /

	269 t = fabs(x);

	270 n = static_cast<int32_t>(t * invpio2 + half);

	271 fn = static_cast<double>(n);

	272 r = t - fn * pio2_1;

	273 w = fn * pio2_1t; /* 1st round good to 85 bit */

	274 if (n < 32 && ix != npio2_hw[n - 1]) {

	275 y[0] = r - w; /* quick check no cancellation */

	276 } else {

	277 u_int32_t high;

	278 j = ix >> 20;

	279 y[0] = r - w;

	280 GET_HIGH_WORD(high, y[0]);

	281 i = j - ((high >> 20) & 0x7ff);

	282 if (i > 16) { /* 2nd iteration needed, good to 118 */

	283 t = r;

	284 w = fn * pio2_2;

	285 r = t - w;

	286 w = fn * pio2_2t - ((t - r) - w);

	287 y[0] = r - w;

	288 GET_HIGH_WORD(high, y[0]);

	289 i = j - ((high >> 20) & 0x7ff);

	290 if (i > 49) { /* 3rd iteration need, 151 bits acc */

	291 t = r; /* will cover all possible cases */

	292 w = fn * pio2_3;

	293 r = t - w;

	294 w = fn * pio2_3t - ((t - r) - w);

	295 y[0] = r - w;

	296 }

	297 }

	298 }

	299 y[1] = (r - y[0]) - w;

	300 if (hx < 0) {

	301 y[0] = -y[0];

	302 y[1] = -y[1];

	303 return -n;

	304 } else {

	305 return n;

	306 }

	307 }

	308 /*

	309 * all other (large) arguments

	310 */

	311 if (ix >= 0x7ff00000) { /* x is inf or NaN */

	312 y[0] = y[1] = x - x;

	313 return 0;

	314 }

	315 /* set z = scalbn(\|x\|,ilogb(x)-23) */

	316 GET_LOW_WORD(low, x);

	317 SET_LOW_WORD(z, low);

	318 e0 = (ix >> 20) - 1046; /* e0 = ilogb(z)-23; */

	319 SET_HIGH_WORD(z, ix - static_cast<int32_t>(e0 << 20));

	320 for (i = 0; i < 2; i++) {

	321 tx[i] = static_cast<double>(static_cast<int32_t>(z));

	322 z = (z - tx[i]) * two24;

	323 }

	324 tx[2] = z;

	325 nx = 3;

	326 while (tx[nx - 1] == zero) nx--; /* skip zero term */

	327 n = __kernel_rem_pio2(tx, y, e0, nx, 2, two_over_pi);

	328 if (hx < 0) {

	329 y[0] = -y[0];

	330 y[1] = -y[1];

	331 return -n;

	332 }

	333 return n;

	334 }

	335

	336 /* __kernel_cos( x, y )

	337 * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164

	338 * Input x is assumed to be bounded by ~pi/4 in magnitude.

	339 * Input y is the tail of x.

	340 *

	341 * Algorithm

	342 * 1. Since cos(-x) = cos(x), we need only to consider positive x.

	343 * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.

	344 * 3. cos(x) is approximated by a polynomial of degree 14 on

	345 * [0,pi/4]

	346 * 4 14

	347 * cos(x) ~ 1 - xx/2 + C1x + ... + C6*x

	348 * where the remez error is

	349 *

	350 * \| 2 4 6 8 10 12 14 \| -58

	351 * \|cos(x)-(1-.5x +C1x +C2x +C3x +C4x +C5x +C6*x )\| <= 2

	352 * \| \|

	353 *

	354 * 4 6 8 10 12 14

	355 * 4. let r = C1x +C2x +C3x +C4x +C5x +C6x , then

	356 * cos(x) = 1 - x*x/2 + r

	357 * since cos(x+y) ~ cos(x) - sin(x)*y

	358 * ~ cos(x) - x*y,

	359 * a correction term is necessary in cos(x) and hence

	360 * cos(x+y) = 1 - (xx/2 - (r - xy))

	361 * For better accuracy when x > 0.3, let qx = \|x\|/4 with

	362 * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.

	363 * Then

	364 * cos(x+y) = (1-qx) - ((xx/2-qx) - (r-xy)).

	365 * Note that 1-qx and (x*x/2-qx) is EXACT here, and the

	366 * magnitude of the latter is at least a quarter of x*x/2,

	367 * thus, reducing the rounding error in the subtraction.

	368 */

	369 V8_INLINE double __kernel_cos(double x, double y) {

	370 static const double

	371 one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */

	372 C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */

	373 C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */

	374 C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */

	375 C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */

	376 C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */

	377 C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */

	378

	379 double a, hz, z, r, qx;

	380 int32_t ix;

	381 GET_HIGH_WORD(ix, x);

	382 ix &= 0x7fffffff; /* ix = \|x\|'s high word*/

	383 if (ix < 0x3e400000) { /* if x < 2*27 /

	384 if (static_cast<int>(x) == 0) return one; /* generate inexact */

	385 }

	386 z = x * x;

	387 r = z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * C6)))));

	388 if (ix < 0x3FD33333) { /* if \|x\| < 0.3 */

	389 return one - (0.5 * z - (z * r - x * y));

	390 } else {

	391 if (ix > 0x3fe90000) { /* x > 0.78125 */

	392 qx = 0.28125;

	393 } else {

	394 INSERT_WORDS(qx, ix - 0x00200000, 0); /* x/4 */

	395 }

	396 hz = 0.5 * z - qx;

	397 a = one - qx;

	398 return a - (hz - (z * r - x * y));

	399 }

	400 }

	401

	402 /* __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)

	403 * double x[],y[]; int e0,nx,prec; int ipio2[];

	404 *

	405 * __kernel_rem_pio2 return the last three digits of N with

	406 * y = x - N*pi/2

	407 * so that \|y\| < pi/2.

	408 *

	409 * The method is to compute the integer (mod 8) and fraction parts of

	410 * (2/pi)*x without doing the full multiplication. In general we

	411 * skip the part of the product that are known to be a huge integer (

	412 * more accurately, = 0 mod 8 ). Thus the number of operations are

	413 * independent of the exponent of the input.

	414 *

	415 * (2/pi) is represented by an array of 24-bit integers in ipio2[].

	416 *

	417 * Input parameters:

	418 * x[] The input value (must be positive) is broken into nx

	419 * pieces of 24-bit integers in double precision format.

	420 * x[i] will be the i-th 24 bit of x. The scaled exponent

	421 * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0

	422 * match x's up to 24 bits.

	423 *

	424 * Example of breaking a double positive z into x[0]+x[1]+x[2]:

	425 * e0 = ilogb(z)-23

	426 * z = scalbn(z,-e0)

	427 * for i = 0,1,2

	428 * x[i] = floor(z)

	429 * z = (z-x[i])2*24

	430 *

	431 *

	432 * y[] output result in an array of double precision numbers.

	433 * The dimension of y[] is:

	434 * 24-bit precision 1

	435 * 53-bit precision 2

	436 * 64-bit precision 2

	437 * 113-bit precision 3

	438 * The actual value is the sum of them. Thus for 113-bit

	439 * precison, one may have to do something like:

	440 *

	441 * long double t,w,r_head, r_tail;

	442 * t = (long double)y[2] + (long double)y[1];

	443 * w = (long double)y[0];

	444 * r_head = t+w;

	445 * r_tail = w - (r_head - t);

	446 *

	447 * e0 The exponent of x[0]

	448 *

	449 * nx dimension of x[]

	450 *

	451 * prec an integer indicating the precision:

	452 * 0 24 bits (single)

	453 * 1 53 bits (double)

	454 * 2 64 bits (extended)

	455 * 3 113 bits (quad)

	456 *

	457 * ipio2[]

	458 * integer array, contains the (24i)-th to (24i+23)-th

	459 * bit of 2/pi after binary point. The corresponding

	460 * floating value is

	461 *

	462 * ipio2[i] * 2^(-24(i+1)).

	463 *

	464 * External function:

	465 * double scalbn(), floor();

	466 *

	467 *

	468 * Here is the description of some local variables:

	469 *

	470 * jk jk+1 is the initial number of terms of ipio2[] needed

	471 * in the computation. The recommended value is 2,3,4,

	472 * 6 for single, double, extended,and quad.

	473 *

	474 * jz local integer variable indicating the number of

	475 * terms of ipio2[] used.

	476 *

	477 * jx nx - 1

	478 *

	479 * jv index for pointing to the suitable ipio2[] for the

	480 * computation. In general, we want

	481 * ( 2^e0x[0] ipio2[jv-1]*2^(-24jv) )/8

	482 * is an integer. Thus

	483 * e0-3-24*jv >= 0 or (e0-3)/24 >= jv

	484 * Hence jv = max(0,(e0-3)/24).

	485 *

	486 * jp jp+1 is the number of terms in PIo2[] needed, jp = jk.

	487 *

	488 * q[] double array with integral value, representing the

	489 * 24-bits chunk of the product of x and 2/pi.

	490 *

	491 * q0 the corresponding exponent of q[0]. Note that the

	492 * exponent for q[i] would be q0-24*i.

	493 *

	494 * PIo2[] double precision array, obtained by cutting pi/2

	495 * into 24 bits chunks.

	496 *

	497 * f[] ipio2[] in floating point

	498 *

	499 * iq[] integer array by breaking up q[] in 24-bits chunk.

	500 *

	501 * fq[] final product of x*(2/pi) in fq[0],..,fq[jk]

	502 *

	503 * ih integer. If >0 it indicates q[] is >= 0.5, hence

	504 * it also indicates the sign of the result.

	505 *

	506 */

	507 int __kernel_rem_pio2(double x, double y, int e0, int nx, int prec,

	508 const int32_t *ipio2) {

	509 /* Constants:

	510 * The hexadecimal values are the intended ones for the following

	511 * constants. The decimal values may be used, provided that the

	512 * compiler will convert from decimal to binary accurately enough

	513 * to produce the hexadecimal values shown.

	514 */

	515 static const int init_jk[] = {2, 3, 4, 6}; /* initial value for jk */

	516

	517 static const double PIo2[] = {

	518 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */

	519 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */

	520 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */

	521 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */

	522 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */

	523 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */

	524 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */

	525 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */

	526 };

	527

	528 static const double

	529 zero = 0.0,

	530 one = 1.0,

	531 two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */

	532 twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */

	533

	534 int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;

	535 double z, fw, f[20], fq[20], q[20];

	536

	537 /* initialize jk*/

	538 jk = init_jk[prec];

	539 jp = jk;

	540

	541 /* determine jx,jv,q0, note that 3>q0 */

	542 jx = nx - 1;

	543 jv = (e0 - 3) / 24;

	544 if (jv < 0) jv = 0;

	545 q0 = e0 - 24 * (jv + 1);

	546

	547 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */

	548 j = jv - jx;

	549 m = jx + jk;

	550 for (i = 0; i <= m; i++, j++) {

	551 f[i] = (j < 0) ? zero : static_cast<double>(ipio2[j]);

	552 }

	553

	554 /* compute q[0],q[1],...q[jk] */

	555 for (i = 0; i <= jk; i++) {

	556 for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j];

	557 q[i] = fw;

	558 }

	559

	560 jz = jk;

	561 recompute:

	562 /* distill q[] into iq[] reversingly */

	563 for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) {

	564 fw = static_cast<double>(static_cast<int32_t>(twon24 * z));

	565 iq[i] = static_cast<int32_t>(z - two24 * fw);

	566 z = q[j - 1] + fw;

	567 }

	568

	569 /* compute n */

	570 z = scalbn(z, q0); /* actual value of z */

	571 z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */

	572 n = static_cast<int32_t>(z);

	573 z -= static_cast<double>(n);

	574 ih = 0;

	575 if (q0 > 0) { /* need iq[jz-1] to determine n */

	576 i = (iq[jz - 1] >> (24 - q0));

	577 n += i;

	578 iq[jz - 1] -= i << (24 - q0);

	579 ih = iq[jz - 1] >> (23 - q0);

	580 } else if (q0 == 0) {

	581 ih = iq[jz - 1] >> 23;

	582 } else if (z >= 0.5) {

	583 ih = 2;

	584 }

	585

	586 if (ih > 0) { /* q > 0.5 */

	587 n += 1;

	588 carry = 0;

	589 for (i = 0; i < jz; i++) { /* compute 1-q */

	590 j = iq[i];

	591 if (carry == 0) {

	592 if (j != 0) {

	593 carry = 1;

	594 iq[i] = 0x1000000 - j;

	595 }

	596 } else {

	597 iq[i] = 0xffffff - j;

	598 }

	599 }

	600 if (q0 > 0) { /* rare case: chance is 1 in 12 */

	601 switch (q0) {

	602 case 1:

	603 iq[jz - 1] &= 0x7fffff;

	604 break;

	605 case 2:

	606 iq[jz - 1] &= 0x3fffff;

	607 break;

	608 }

	609 }

	610 if (ih == 2) {

	611 z = one - z;

	612 if (carry != 0) z -= scalbn(one, q0);

	613 }

	614 }

	615

	616 /* check if recomputation is needed */

	617 if (z == zero) {

	618 j = 0;

	619 for (i = jz - 1; i >= jk; i--) j \|= iq[i];

	620 if (j == 0) { /* need recomputation */

	621 for (k = 1; iq[jk - k] == 0; k++) {

	622 /* k = no. of terms needed */

	623 }

	624

	625 for (i = jz + 1; i <= jz + k; i++) { /* add q[jz+1] to q[jz+k] */

	626 f[jx + i] = ipio2[jv + i];

	627 for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j];

	628 q[i] = fw;

	629 }

	630 jz += k;

	631 goto recompute;

	632 }

	633 }

	634

	635 /* chop off zero terms */

	636 if (z == 0.0) {

	637 jz -= 1;

	638 q0 -= 24;

	639 while (iq[jz] == 0) {

	640 jz--;

	641 q0 -= 24;

	642 }

	643 } else { /* break z into 24-bit if necessary */

	644 z = scalbn(z, -q0);

	645 if (z >= two24) {

	646 fw = static_cast<double>(static_cast<int32_t>(twon24 * z));

	647 iq[jz] = z - two24 * fw;

	648 jz += 1;

	649 q0 += 24;

	650 iq[jz] = fw;

	651 } else {

	652 iq[jz] = z;

	653 }

	654 }

	655

	656 /* convert integer "bit" chunk to floating-point value */

	657 fw = scalbn(one, q0);

	658 for (i = jz; i >= 0; i--) {

	659 q[i] = fw * iq[i];

	660 fw *= twon24;

	661 }

	662

	663 /* compute PIo2[0,...,jp]q[jz,...,0] /

	664 for (i = jz; i >= 0; i--) {

	665 for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++) fw += PIo2[k] * q[i + k];

	666 fq[jz - i] = fw;

	667 }

	668

	669 /* compress fq[] into y[] */

	670 switch (prec) {

	671 case 0:

	672 fw = 0.0;

	673 for (i = jz; i >= 0; i--) fw += fq[i];

	674 y[0] = (ih == 0) ? fw : -fw;

	675 break;

	676 case 1:

	677 case 2:

	678 fw = 0.0;

	679 for (i = jz; i >= 0; i--) fw += fq[i];

	680 y[0] = (ih == 0) ? fw : -fw;

	681 fw = fq[0] - fw;

	682 for (i = 1; i <= jz; i++) fw += fq[i];

	683 y[1] = (ih == 0) ? fw : -fw;

	684 break;

	685 case 3: /* painful */

	686 for (i = jz; i > 0; i--) {

	687 fw = fq[i - 1] + fq[i];

	688 fq[i] += fq[i - 1] - fw;

	689 fq[i - 1] = fw;

	690 }

	691 for (i = jz; i > 1; i--) {

	692 fw = fq[i - 1] + fq[i];

	693 fq[i] += fq[i - 1] - fw;

	694 fq[i - 1] = fw;

	695 }

	696 for (fw = 0.0, i = jz; i >= 2; i--) fw += fq[i];

	697 if (ih == 0) {

	698 y[0] = fq[0];

	699 y[1] = fq[1];

	700 y[2] = fw;

	701 } else {

	702 y[0] = -fq[0];

	703 y[1] = -fq[1];

	704 y[2] = -fw;

	705 }

	706 }

	707 return n & 7;

	708 }

	709

	710 /* __kernel_sin( x, y, iy)

	711 * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854

	712 * Input x is assumed to be bounded by ~pi/4 in magnitude.

	713 * Input y is the tail of x.

	714 * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).

	715 *

	716 * Algorithm

	717 * 1. Since sin(-x) = -sin(x), we need only to consider positive x.

	718 * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.

	719 * 3. sin(x) is approximated by a polynomial of degree 13 on

	720 * [0,pi/4]

	721 * 3 13

	722 * sin(x) ~ x + S1x + ... + S6x

	723 * where

	724 *

	725 * \|sin(x) 2 4 6 8 10 12 \| -58

	726 * \|----- - (1+S1x +S2x +S3x +S4x +S5x +S6x )\| <= 2

	727 * \| x \|

	728 *

	729 * 4. sin(x+y) = sin(x) + sin'(x')*y

	730 * ~ sin(x) + (1-xx/2)y

	731 * For better accuracy, let

	732 * 3 2 2 2 2

	733 * r = x (S2+x (S3+x (S4+x (S5+x *S6))))

	734 * then 3 2

	735 * sin(x) = x + (S1x + (x (r-y/2)+y))

	736 */

	737 V8_INLINE double __kernel_sin(double x, double y, int iy) {

	738 static const double

	739 half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */

	740 S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */

	741 S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */

	742 S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */

	743 S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */

	744 S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */

	745 S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */

	746

	747 double z, r, v;

	748 int32_t ix;

	749 GET_HIGH_WORD(ix, x);

	750 ix &= 0x7fffffff; /* high word of x */

	751 if (ix < 0x3e400000) { /* \|x\| < 2*-27 /

	752 if (static_cast<int>(x) == 0) return x;

	753 } /* generate inexact */

	754 z = x * x;

	755 v = z * x;

	756 r = S2 + z * (S3 + z * (S4 + z * (S5 + z * S6)));

	757 if (iy == 0) {

	758 return x + v * (S1 + z * r);

	759 } else {

	760 return x - ((z * (half * y - v * r) - y) - v * S1);

	761 }

	762 }

	763

171 } // namespace	764 } // namespace

172	765

173 /* atan(x)	766 /* atan(x)

174 * Method	767 * Method

175 * 1. Reduce x to positive by atan(x) = -atan(-x).	768 * 1. Reduce x to positive by atan(x) = -atan(-x).

176 * 2. According to the integer k=4t+0.25 chopped, t=x, the argument	769 * 2. According to the integer k=4t+0.25 chopped, t=x, the argument

177 * is further reduced to one of the following intervals and the	770 * is further reduced to one of the following intervals and the

178 * arctangent of t is evaluated by the corresponding formula:	771 * arctangent of t is evaluated by the corresponding formula:

179 *	772 *

180 * [0,7/16] atan(x) = t-t^3(a1+t^2(a2+...(a10+t^2*a11)...)	773 * [0,7/16] atan(x) = t-t^3(a1+t^2(a2+...(a10+t^2*a11)...)

(...skipping 204 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
385 return z; /* atan(+,+) */	978 return z; /* atan(+,+) */

386 case 1:	979 case 1:

387 return -z; /* atan(-,+) */	980 return -z; /* atan(-,+) */

388 case 2:	981 case 2:

389 return pi - (z - pi_lo); /* atan(+,-) */	982 return pi - (z - pi_lo); /* atan(+,-) */

390 default: /* case 3 */	983 default: /* case 3 */

391 return (z - pi_lo) - pi; /* atan(-,-) */	984 return (z - pi_lo) - pi; /* atan(-,-) */

392 }	985 }

393 }	986 }

394	987

	988 /* cos(x)

	989 * Return cosine function of x.

	990 *

	991 * kernel function:

	992 * __kernel_sin ... sine function on [-pi/4,pi/4]

	993 * __kernel_cos ... cosine function on [-pi/4,pi/4]

	994 * __ieee754_rem_pio2 ... argument reduction routine

	995 *

	996 * Method.

	997 * Let S,C and T denote the sin, cos and tan respectively on

	998 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2

	999 * in [-pi/4 , +pi/4], and let n = k mod 4.

	1000 * We have

	1001 *

	1002 * n sin(x) cos(x) tan(x)

	1003 * ----------------------------------------------------------

	1004 * 0 S C T

	1005 * 1 C -S -1/T

	1006 * 2 -S -C T

	1007 * 3 -C S -1/T

	1008 * ----------------------------------------------------------

	1009 *

	1010 * Special cases:

	1011 * Let trig be any of sin, cos, or tan.

	1012 * trig(+-INF) is NaN, with signals;

	1013 * trig(NaN) is that NaN;

	1014 *

	1015 * Accuracy:

	1016 * TRIG(x) returns trig(x) nearly rounded

	1017 */

	1018 double cos(double x) {

	1019 double y[2], z = 0.0;

	1020 int32_t n, ix;

	1021

	1022 /* High word of x. */

	1023 GET_HIGH_WORD(ix, x);

	1024

	1025 /* \|x\| ~< pi/4 */

	1026 ix &= 0x7fffffff;

	1027 if (ix <= 0x3fe921fb) {

	1028 return __kernel_cos(x, z);

	1029 } else if (ix >= 0x7ff00000) {

	1030 /* cos(Inf or NaN) is NaN */

	1031 return x - x;

	1032 } else {

	1033 /* argument reduction needed */

	1034 n = __ieee754_rem_pio2(x, y);

	1035 switch (n & 3) {

	1036 case 0:

	1037 return __kernel_cos(y[0], y[1]);

	1038 case 1:

	1039 return -__kernel_sin(y[0], y[1], 1);

	1040 case 2:

	1041 return -__kernel_cos(y[0], y[1]);

	1042 default:

	1043 return __kernel_sin(y[0], y[1], 1);

	1044 }

	1045 }

	1046 }

	1047

395 /* exp(x)	1048 /* exp(x)

396 * Returns the exponential of x.	1049 * Returns the exponential of x.

397 *	1050 *

398 * Method	1051 * Method

399 * 1. Argument reduction:	1052 * 1. Argument reduction:

400 * Reduce x to an r so that \|r\| <= 0.5*ln2 ~ 0.34658.	1053 * Reduce x to an r so that \|r\| <= 0.5*ln2 ~ 0.34658.

401 * Given x, find r and integer k such that	1054 * Given x, find r and integer k such that

402 *	1055 *

403 * x = kln2 + r, \|r\| <= 0.5ln2.	1056 * x = kln2 + r, \|r\| <= 0.5ln2.

404 *	1057 *

(...skipping 998 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1403 /* one step Newton iteration to 53 bits with error < 0.667 ulps */	2056 /* one step Newton iteration to 53 bits with error < 0.667 ulps */

1404 s = t * t; /* tt is exact /	2057 s = t * t; /* tt is exact /

1405 r = x / s; /* error <= 0.5 ulps; \|r\| < \|t\| */	2058 r = x / s; /* error <= 0.5 ulps; \|r\| < \|t\| */

1406 w = t + t; /* t+t is exact */	2059 w = t + t; /* t+t is exact */

1407 r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3t /	2060 r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3t /

1408 t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */	2061 t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */

1409	2062

1410 return (t);	2063 return (t);

1411 }	2064 }

1412	2065

	2066 /* sin(x)

	2067 * Return sine function of x.

	2068 *

	2069 * kernel function:

	2070 * __kernel_sin ... sine function on [-pi/4,pi/4]

	2071 * __kernel_cos ... cose function on [-pi/4,pi/4]

	2072 * __ieee754_rem_pio2 ... argument reduction routine

	2073 *

	2074 * Method.

	2075 * Let S,C and T denote the sin, cos and tan respectively on

	2076 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2

	2077 * in [-pi/4 , +pi/4], and let n = k mod 4.

	2078 * We have

	2079 *

	2080 * n sin(x) cos(x) tan(x)

	2081 * ----------------------------------------------------------

	2082 * 0 S C T

	2083 * 1 C -S -1/T

	2084 * 2 -S -C T

	2085 * 3 -C S -1/T

	2086 * ----------------------------------------------------------

	2087 *

	2088 * Special cases:

	2089 * Let trig be any of sin, cos, or tan.

	2090 * trig(+-INF) is NaN, with signals;

	2091 * trig(NaN) is that NaN;

	2092 *

	2093 * Accuracy:

	2094 * TRIG(x) returns trig(x) nearly rounded

	2095 */

	2096 double sin(double x) {

	2097 double y[2], z = 0.0;

	2098 int32_t n, ix;

	2099

	2100 /* High word of x. */

	2101 GET_HIGH_WORD(ix, x);

	2102

	2103 /* \|x\| ~< pi/4 */

	2104 ix &= 0x7fffffff;

	2105 if (ix <= 0x3fe921fb) {

	2106 return __kernel_sin(x, z, 0);

	2107 } else if (ix >= 0x7ff00000) {

	2108 /* sin(Inf or NaN) is NaN */

	2109 return x - x;

	2110 } else {

	2111 /* argument reduction needed */

	2112 n = __ieee754_rem_pio2(x, y);

	2113 switch (n & 3) {

	2114 case 0:

	2115 return __kernel_sin(y[0], y[1], 1);

	2116 case 1:

	2117 return __kernel_cos(y[0], y[1]);

	2118 case 2:

	2119 return -__kernel_sin(y[0], y[1], 1);

	2120 default:

	2121 return -__kernel_cos(y[0], y[1]);

	2122 }

	2123 }

	2124 }

	2125

1413 } // namespace ieee754	2126 } // namespace ieee754

1414 } // namespace base	2127 } // namespace base

1415 } // namespace v8	2128 } // namespace v8

OLD	NEW

« no previous file with comments | « src/base/ieee754.h ('k') | src/bootstrapper.cc » ('j') | no next file with comments »