Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(247)

Side by Side Diff: src/base/ieee754.cc

Issue 2073123002: [builtins] Introduce proper Float64Cos and Float64Sin. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Fix missing breaks Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/base/ieee754.h ('k') | src/bootstrapper.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm). 1 // The following is adapted from fdlibm (http://www.netlib.org/fdlibm).
2 // 2 //
3 // ==================================================== 3 // ====================================================
4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 4 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5 // 5 //
6 // Developed at SunSoft, a Sun Microsystems, Inc. business. 6 // Developed at SunSoft, a Sun Microsystems, Inc. business.
7 // Permission to use, copy, modify, and distribute this 7 // Permission to use, copy, modify, and distribute this
8 // software is freely granted, provided that this notice 8 // software is freely granted, provided that this notice
9 // is preserved. 9 // is preserved.
10 // ==================================================== 10 // ====================================================
(...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after
161 ieee_double_shape_type sl_u; \ 161 ieee_double_shape_type sl_u; \
162 sl_u.value = (d); \ 162 sl_u.value = (d); \
163 sl_u.parts.lsw = (v); \ 163 sl_u.parts.lsw = (v); \
164 (d) = sl_u.value; \ 164 (d) = sl_u.value; \
165 } while (0) 165 } while (0)
166 166
167 /* Support macro. */ 167 /* Support macro. */
168 168
169 #define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval)) 169 #define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval))
170 170
171 int32_t __ieee754_rem_pio2(double x, double *y) WARN_UNUSED_RESULT;
172 double __kernel_cos(double x, double y) WARN_UNUSED_RESULT;
173 int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec,
174 const int32_t *ipio2) WARN_UNUSED_RESULT;
175 double __kernel_sin(double x, double y, int iy) WARN_UNUSED_RESULT;
176
177 /* __ieee754_rem_pio2(x,y)
178 *
179 * return the remainder of x rem pi/2 in y[0]+y[1]
180 * use __kernel_rem_pio2()
181 */
182 int32_t __ieee754_rem_pio2(double x, double *y) {
183 /*
184 * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
185 */
186 static const int32_t two_over_pi[] = {
187 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C,
188 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649,
189 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, 0xA73EE8, 0x8235F5, 0x2EBB44,
190 0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, 0x845F8B,
191 0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D,
192 0x367ECF, 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
193 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330,
194 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, 0x91615E, 0xE61B08,
195 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA,
196 0x73A8C9, 0x60E27B, 0xC08C6B,
197 };
198
199 static const int32_t npio2_hw[] = {
200 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
201 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
202 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
203 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
204 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
205 0x404858EB, 0x404921FB,
206 };
207
208 /*
209 * invpio2: 53 bits of 2/pi
210 * pio2_1: first 33 bit of pi/2
211 * pio2_1t: pi/2 - pio2_1
212 * pio2_2: second 33 bit of pi/2
213 * pio2_2t: pi/2 - (pio2_1+pio2_2)
214 * pio2_3: third 33 bit of pi/2
215 * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
216 */
217
218 static const double
219 zero = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
220 half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
221 two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
222 invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
223 pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
224 pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
225 pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
226 pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
227 pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
228 pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
229
230 double z, w, t, r, fn;
231 double tx[3];
232 int32_t e0, i, j, nx, n, ix, hx;
233 u_int32_t low;
234
235 z = 0;
236 GET_HIGH_WORD(hx, x); /* high word of x */
237 ix = hx & 0x7fffffff;
238 if (ix <= 0x3fe921fb) { /* |x| ~<= pi/4 , no need for reduction */
239 y[0] = x;
240 y[1] = 0;
241 return 0;
242 }
243 if (ix < 0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */
244 if (hx > 0) {
245 z = x - pio2_1;
246 if (ix != 0x3ff921fb) { /* 33+53 bit pi is good enough */
247 y[0] = z - pio2_1t;
248 y[1] = (z - y[0]) - pio2_1t;
249 } else { /* near pi/2, use 33+33+53 bit pi */
250 z -= pio2_2;
251 y[0] = z - pio2_2t;
252 y[1] = (z - y[0]) - pio2_2t;
253 }
254 return 1;
255 } else { /* negative x */
256 z = x + pio2_1;
257 if (ix != 0x3ff921fb) { /* 33+53 bit pi is good enough */
258 y[0] = z + pio2_1t;
259 y[1] = (z - y[0]) + pio2_1t;
260 } else { /* near pi/2, use 33+33+53 bit pi */
261 z += pio2_2;
262 y[0] = z + pio2_2t;
263 y[1] = (z - y[0]) + pio2_2t;
264 }
265 return -1;
266 }
267 }
268 if (ix <= 0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
269 t = fabs(x);
270 n = static_cast<int32_t>(t * invpio2 + half);
271 fn = static_cast<double>(n);
272 r = t - fn * pio2_1;
273 w = fn * pio2_1t; /* 1st round good to 85 bit */
274 if (n < 32 && ix != npio2_hw[n - 1]) {
275 y[0] = r - w; /* quick check no cancellation */
276 } else {
277 u_int32_t high;
278 j = ix >> 20;
279 y[0] = r - w;
280 GET_HIGH_WORD(high, y[0]);
281 i = j - ((high >> 20) & 0x7ff);
282 if (i > 16) { /* 2nd iteration needed, good to 118 */
283 t = r;
284 w = fn * pio2_2;
285 r = t - w;
286 w = fn * pio2_2t - ((t - r) - w);
287 y[0] = r - w;
288 GET_HIGH_WORD(high, y[0]);
289 i = j - ((high >> 20) & 0x7ff);
290 if (i > 49) { /* 3rd iteration need, 151 bits acc */
291 t = r; /* will cover all possible cases */
292 w = fn * pio2_3;
293 r = t - w;
294 w = fn * pio2_3t - ((t - r) - w);
295 y[0] = r - w;
296 }
297 }
298 }
299 y[1] = (r - y[0]) - w;
300 if (hx < 0) {
301 y[0] = -y[0];
302 y[1] = -y[1];
303 return -n;
304 } else {
305 return n;
306 }
307 }
308 /*
309 * all other (large) arguments
310 */
311 if (ix >= 0x7ff00000) { /* x is inf or NaN */
312 y[0] = y[1] = x - x;
313 return 0;
314 }
315 /* set z = scalbn(|x|,ilogb(x)-23) */
316 GET_LOW_WORD(low, x);
317 SET_LOW_WORD(z, low);
318 e0 = (ix >> 20) - 1046; /* e0 = ilogb(z)-23; */
319 SET_HIGH_WORD(z, ix - static_cast<int32_t>(e0 << 20));
320 for (i = 0; i < 2; i++) {
321 tx[i] = static_cast<double>(static_cast<int32_t>(z));
322 z = (z - tx[i]) * two24;
323 }
324 tx[2] = z;
325 nx = 3;
326 while (tx[nx - 1] == zero) nx--; /* skip zero term */
327 n = __kernel_rem_pio2(tx, y, e0, nx, 2, two_over_pi);
328 if (hx < 0) {
329 y[0] = -y[0];
330 y[1] = -y[1];
331 return -n;
332 }
333 return n;
334 }
335
336 /* __kernel_cos( x, y )
337 * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
338 * Input x is assumed to be bounded by ~pi/4 in magnitude.
339 * Input y is the tail of x.
340 *
341 * Algorithm
342 * 1. Since cos(-x) = cos(x), we need only to consider positive x.
343 * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
344 * 3. cos(x) is approximated by a polynomial of degree 14 on
345 * [0,pi/4]
346 * 4 14
347 * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
348 * where the remez error is
349 *
350 * | 2 4 6 8 10 12 14 | -58
351 * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
352 * | |
353 *
354 * 4 6 8 10 12 14
355 * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
356 * cos(x) = 1 - x*x/2 + r
357 * since cos(x+y) ~ cos(x) - sin(x)*y
358 * ~ cos(x) - x*y,
359 * a correction term is necessary in cos(x) and hence
360 * cos(x+y) = 1 - (x*x/2 - (r - x*y))
361 * For better accuracy when x > 0.3, let qx = |x|/4 with
362 * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
363 * Then
364 * cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
365 * Note that 1-qx and (x*x/2-qx) is EXACT here, and the
366 * magnitude of the latter is at least a quarter of x*x/2,
367 * thus, reducing the rounding error in the subtraction.
368 */
369 V8_INLINE double __kernel_cos(double x, double y) {
370 static const double
371 one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
372 C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
373 C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
374 C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
375 C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
376 C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
377 C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
378
379 double a, hz, z, r, qx;
380 int32_t ix;
381 GET_HIGH_WORD(ix, x);
382 ix &= 0x7fffffff; /* ix = |x|'s high word*/
383 if (ix < 0x3e400000) { /* if x < 2**27 */
384 if (static_cast<int>(x) == 0) return one; /* generate inexact */
385 }
386 z = x * x;
387 r = z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * C6)))));
388 if (ix < 0x3FD33333) { /* if |x| < 0.3 */
389 return one - (0.5 * z - (z * r - x * y));
390 } else {
391 if (ix > 0x3fe90000) { /* x > 0.78125 */
392 qx = 0.28125;
393 } else {
394 INSERT_WORDS(qx, ix - 0x00200000, 0); /* x/4 */
395 }
396 hz = 0.5 * z - qx;
397 a = one - qx;
398 return a - (hz - (z * r - x * y));
399 }
400 }
401
402 /* __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
403 * double x[],y[]; int e0,nx,prec; int ipio2[];
404 *
405 * __kernel_rem_pio2 return the last three digits of N with
406 * y = x - N*pi/2
407 * so that |y| < pi/2.
408 *
409 * The method is to compute the integer (mod 8) and fraction parts of
410 * (2/pi)*x without doing the full multiplication. In general we
411 * skip the part of the product that are known to be a huge integer (
412 * more accurately, = 0 mod 8 ). Thus the number of operations are
413 * independent of the exponent of the input.
414 *
415 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
416 *
417 * Input parameters:
418 * x[] The input value (must be positive) is broken into nx
419 * pieces of 24-bit integers in double precision format.
420 * x[i] will be the i-th 24 bit of x. The scaled exponent
421 * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
422 * match x's up to 24 bits.
423 *
424 * Example of breaking a double positive z into x[0]+x[1]+x[2]:
425 * e0 = ilogb(z)-23
426 * z = scalbn(z,-e0)
427 * for i = 0,1,2
428 * x[i] = floor(z)
429 * z = (z-x[i])*2**24
430 *
431 *
432 * y[] output result in an array of double precision numbers.
433 * The dimension of y[] is:
434 * 24-bit precision 1
435 * 53-bit precision 2
436 * 64-bit precision 2
437 * 113-bit precision 3
438 * The actual value is the sum of them. Thus for 113-bit
439 * precison, one may have to do something like:
440 *
441 * long double t,w,r_head, r_tail;
442 * t = (long double)y[2] + (long double)y[1];
443 * w = (long double)y[0];
444 * r_head = t+w;
445 * r_tail = w - (r_head - t);
446 *
447 * e0 The exponent of x[0]
448 *
449 * nx dimension of x[]
450 *
451 * prec an integer indicating the precision:
452 * 0 24 bits (single)
453 * 1 53 bits (double)
454 * 2 64 bits (extended)
455 * 3 113 bits (quad)
456 *
457 * ipio2[]
458 * integer array, contains the (24*i)-th to (24*i+23)-th
459 * bit of 2/pi after binary point. The corresponding
460 * floating value is
461 *
462 * ipio2[i] * 2^(-24(i+1)).
463 *
464 * External function:
465 * double scalbn(), floor();
466 *
467 *
468 * Here is the description of some local variables:
469 *
470 * jk jk+1 is the initial number of terms of ipio2[] needed
471 * in the computation. The recommended value is 2,3,4,
472 * 6 for single, double, extended,and quad.
473 *
474 * jz local integer variable indicating the number of
475 * terms of ipio2[] used.
476 *
477 * jx nx - 1
478 *
479 * jv index for pointing to the suitable ipio2[] for the
480 * computation. In general, we want
481 * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
482 * is an integer. Thus
483 * e0-3-24*jv >= 0 or (e0-3)/24 >= jv
484 * Hence jv = max(0,(e0-3)/24).
485 *
486 * jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
487 *
488 * q[] double array with integral value, representing the
489 * 24-bits chunk of the product of x and 2/pi.
490 *
491 * q0 the corresponding exponent of q[0]. Note that the
492 * exponent for q[i] would be q0-24*i.
493 *
494 * PIo2[] double precision array, obtained by cutting pi/2
495 * into 24 bits chunks.
496 *
497 * f[] ipio2[] in floating point
498 *
499 * iq[] integer array by breaking up q[] in 24-bits chunk.
500 *
501 * fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
502 *
503 * ih integer. If >0 it indicates q[] is >= 0.5, hence
504 * it also indicates the *sign* of the result.
505 *
506 */
507 int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec,
508 const int32_t *ipio2) {
509 /* Constants:
510 * The hexadecimal values are the intended ones for the following
511 * constants. The decimal values may be used, provided that the
512 * compiler will convert from decimal to binary accurately enough
513 * to produce the hexadecimal values shown.
514 */
515 static const int init_jk[] = {2, 3, 4, 6}; /* initial value for jk */
516
517 static const double PIo2[] = {
518 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
519 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
520 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
521 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
522 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
523 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
524 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
525 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
526 };
527
528 static const double
529 zero = 0.0,
530 one = 1.0,
531 two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
532 twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
533
534 int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
535 double z, fw, f[20], fq[20], q[20];
536
537 /* initialize jk*/
538 jk = init_jk[prec];
539 jp = jk;
540
541 /* determine jx,jv,q0, note that 3>q0 */
542 jx = nx - 1;
543 jv = (e0 - 3) / 24;
544 if (jv < 0) jv = 0;
545 q0 = e0 - 24 * (jv + 1);
546
547 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
548 j = jv - jx;
549 m = jx + jk;
550 for (i = 0; i <= m; i++, j++) {
551 f[i] = (j < 0) ? zero : static_cast<double>(ipio2[j]);
552 }
553
554 /* compute q[0],q[1],...q[jk] */
555 for (i = 0; i <= jk; i++) {
556 for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j];
557 q[i] = fw;
558 }
559
560 jz = jk;
561 recompute:
562 /* distill q[] into iq[] reversingly */
563 for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) {
564 fw = static_cast<double>(static_cast<int32_t>(twon24 * z));
565 iq[i] = static_cast<int32_t>(z - two24 * fw);
566 z = q[j - 1] + fw;
567 }
568
569 /* compute n */
570 z = scalbn(z, q0); /* actual value of z */
571 z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */
572 n = static_cast<int32_t>(z);
573 z -= static_cast<double>(n);
574 ih = 0;
575 if (q0 > 0) { /* need iq[jz-1] to determine n */
576 i = (iq[jz - 1] >> (24 - q0));
577 n += i;
578 iq[jz - 1] -= i << (24 - q0);
579 ih = iq[jz - 1] >> (23 - q0);
580 } else if (q0 == 0) {
581 ih = iq[jz - 1] >> 23;
582 } else if (z >= 0.5) {
583 ih = 2;
584 }
585
586 if (ih > 0) { /* q > 0.5 */
587 n += 1;
588 carry = 0;
589 for (i = 0; i < jz; i++) { /* compute 1-q */
590 j = iq[i];
591 if (carry == 0) {
592 if (j != 0) {
593 carry = 1;
594 iq[i] = 0x1000000 - j;
595 }
596 } else {
597 iq[i] = 0xffffff - j;
598 }
599 }
600 if (q0 > 0) { /* rare case: chance is 1 in 12 */
601 switch (q0) {
602 case 1:
603 iq[jz - 1] &= 0x7fffff;
604 break;
605 case 2:
606 iq[jz - 1] &= 0x3fffff;
607 break;
608 }
609 }
610 if (ih == 2) {
611 z = one - z;
612 if (carry != 0) z -= scalbn(one, q0);
613 }
614 }
615
616 /* check if recomputation is needed */
617 if (z == zero) {
618 j = 0;
619 for (i = jz - 1; i >= jk; i--) j |= iq[i];
620 if (j == 0) { /* need recomputation */
621 for (k = 1; iq[jk - k] == 0; k++) {
622 /* k = no. of terms needed */
623 }
624
625 for (i = jz + 1; i <= jz + k; i++) { /* add q[jz+1] to q[jz+k] */
626 f[jx + i] = ipio2[jv + i];
627 for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j];
628 q[i] = fw;
629 }
630 jz += k;
631 goto recompute;
632 }
633 }
634
635 /* chop off zero terms */
636 if (z == 0.0) {
637 jz -= 1;
638 q0 -= 24;
639 while (iq[jz] == 0) {
640 jz--;
641 q0 -= 24;
642 }
643 } else { /* break z into 24-bit if necessary */
644 z = scalbn(z, -q0);
645 if (z >= two24) {
646 fw = static_cast<double>(static_cast<int32_t>(twon24 * z));
647 iq[jz] = z - two24 * fw;
648 jz += 1;
649 q0 += 24;
650 iq[jz] = fw;
651 } else {
652 iq[jz] = z;
653 }
654 }
655
656 /* convert integer "bit" chunk to floating-point value */
657 fw = scalbn(one, q0);
658 for (i = jz; i >= 0; i--) {
659 q[i] = fw * iq[i];
660 fw *= twon24;
661 }
662
663 /* compute PIo2[0,...,jp]*q[jz,...,0] */
664 for (i = jz; i >= 0; i--) {
665 for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++) fw += PIo2[k] * q[i + k];
666 fq[jz - i] = fw;
667 }
668
669 /* compress fq[] into y[] */
670 switch (prec) {
671 case 0:
672 fw = 0.0;
673 for (i = jz; i >= 0; i--) fw += fq[i];
674 y[0] = (ih == 0) ? fw : -fw;
675 break;
676 case 1:
677 case 2:
678 fw = 0.0;
679 for (i = jz; i >= 0; i--) fw += fq[i];
680 y[0] = (ih == 0) ? fw : -fw;
681 fw = fq[0] - fw;
682 for (i = 1; i <= jz; i++) fw += fq[i];
683 y[1] = (ih == 0) ? fw : -fw;
684 break;
685 case 3: /* painful */
686 for (i = jz; i > 0; i--) {
687 fw = fq[i - 1] + fq[i];
688 fq[i] += fq[i - 1] - fw;
689 fq[i - 1] = fw;
690 }
691 for (i = jz; i > 1; i--) {
692 fw = fq[i - 1] + fq[i];
693 fq[i] += fq[i - 1] - fw;
694 fq[i - 1] = fw;
695 }
696 for (fw = 0.0, i = jz; i >= 2; i--) fw += fq[i];
697 if (ih == 0) {
698 y[0] = fq[0];
699 y[1] = fq[1];
700 y[2] = fw;
701 } else {
702 y[0] = -fq[0];
703 y[1] = -fq[1];
704 y[2] = -fw;
705 }
706 }
707 return n & 7;
708 }
709
710 /* __kernel_sin( x, y, iy)
711 * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
712 * Input x is assumed to be bounded by ~pi/4 in magnitude.
713 * Input y is the tail of x.
714 * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
715 *
716 * Algorithm
717 * 1. Since sin(-x) = -sin(x), we need only to consider positive x.
718 * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
719 * 3. sin(x) is approximated by a polynomial of degree 13 on
720 * [0,pi/4]
721 * 3 13
722 * sin(x) ~ x + S1*x + ... + S6*x
723 * where
724 *
725 * |sin(x) 2 4 6 8 10 12 | -58
726 * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
727 * | x |
728 *
729 * 4. sin(x+y) = sin(x) + sin'(x')*y
730 * ~ sin(x) + (1-x*x/2)*y
731 * For better accuracy, let
732 * 3 2 2 2 2
733 * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
734 * then 3 2
735 * sin(x) = x + (S1*x + (x *(r-y/2)+y))
736 */
737 V8_INLINE double __kernel_sin(double x, double y, int iy) {
738 static const double
739 half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
740 S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
741 S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
742 S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
743 S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
744 S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
745 S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
746
747 double z, r, v;
748 int32_t ix;
749 GET_HIGH_WORD(ix, x);
750 ix &= 0x7fffffff; /* high word of x */
751 if (ix < 0x3e400000) { /* |x| < 2**-27 */
752 if (static_cast<int>(x) == 0) return x;
753 } /* generate inexact */
754 z = x * x;
755 v = z * x;
756 r = S2 + z * (S3 + z * (S4 + z * (S5 + z * S6)));
757 if (iy == 0) {
758 return x + v * (S1 + z * r);
759 } else {
760 return x - ((z * (half * y - v * r) - y) - v * S1);
761 }
762 }
763
171 } // namespace 764 } // namespace
172 765
173 /* atan(x) 766 /* atan(x)
174 * Method 767 * Method
175 * 1. Reduce x to positive by atan(x) = -atan(-x). 768 * 1. Reduce x to positive by atan(x) = -atan(-x).
176 * 2. According to the integer k=4t+0.25 chopped, t=x, the argument 769 * 2. According to the integer k=4t+0.25 chopped, t=x, the argument
177 * is further reduced to one of the following intervals and the 770 * is further reduced to one of the following intervals and the
178 * arctangent of t is evaluated by the corresponding formula: 771 * arctangent of t is evaluated by the corresponding formula:
179 * 772 *
180 * [0,7/16] atan(x) = t-t^3*(a1+t^2*(a2+...(a10+t^2*a11)...) 773 * [0,7/16] atan(x) = t-t^3*(a1+t^2*(a2+...(a10+t^2*a11)...)
(...skipping 204 matching lines...) Expand 10 before | Expand all | Expand 10 after
385 return z; /* atan(+,+) */ 978 return z; /* atan(+,+) */
386 case 1: 979 case 1:
387 return -z; /* atan(-,+) */ 980 return -z; /* atan(-,+) */
388 case 2: 981 case 2:
389 return pi - (z - pi_lo); /* atan(+,-) */ 982 return pi - (z - pi_lo); /* atan(+,-) */
390 default: /* case 3 */ 983 default: /* case 3 */
391 return (z - pi_lo) - pi; /* atan(-,-) */ 984 return (z - pi_lo) - pi; /* atan(-,-) */
392 } 985 }
393 } 986 }
394 987
988 /* cos(x)
989 * Return cosine function of x.
990 *
991 * kernel function:
992 * __kernel_sin ... sine function on [-pi/4,pi/4]
993 * __kernel_cos ... cosine function on [-pi/4,pi/4]
994 * __ieee754_rem_pio2 ... argument reduction routine
995 *
996 * Method.
997 * Let S,C and T denote the sin, cos and tan respectively on
998 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
999 * in [-pi/4 , +pi/4], and let n = k mod 4.
1000 * We have
1001 *
1002 * n sin(x) cos(x) tan(x)
1003 * ----------------------------------------------------------
1004 * 0 S C T
1005 * 1 C -S -1/T
1006 * 2 -S -C T
1007 * 3 -C S -1/T
1008 * ----------------------------------------------------------
1009 *
1010 * Special cases:
1011 * Let trig be any of sin, cos, or tan.
1012 * trig(+-INF) is NaN, with signals;
1013 * trig(NaN) is that NaN;
1014 *
1015 * Accuracy:
1016 * TRIG(x) returns trig(x) nearly rounded
1017 */
1018 double cos(double x) {
1019 double y[2], z = 0.0;
1020 int32_t n, ix;
1021
1022 /* High word of x. */
1023 GET_HIGH_WORD(ix, x);
1024
1025 /* |x| ~< pi/4 */
1026 ix &= 0x7fffffff;
1027 if (ix <= 0x3fe921fb) {
1028 return __kernel_cos(x, z);
1029 } else if (ix >= 0x7ff00000) {
1030 /* cos(Inf or NaN) is NaN */
1031 return x - x;
1032 } else {
1033 /* argument reduction needed */
1034 n = __ieee754_rem_pio2(x, y);
1035 switch (n & 3) {
1036 case 0:
1037 return __kernel_cos(y[0], y[1]);
1038 case 1:
1039 return -__kernel_sin(y[0], y[1], 1);
1040 case 2:
1041 return -__kernel_cos(y[0], y[1]);
1042 default:
1043 return __kernel_sin(y[0], y[1], 1);
1044 }
1045 }
1046 }
1047
395 /* exp(x) 1048 /* exp(x)
396 * Returns the exponential of x. 1049 * Returns the exponential of x.
397 * 1050 *
398 * Method 1051 * Method
399 * 1. Argument reduction: 1052 * 1. Argument reduction:
400 * Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658. 1053 * Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658.
401 * Given x, find r and integer k such that 1054 * Given x, find r and integer k such that
402 * 1055 *
403 * x = k*ln2 + r, |r| <= 0.5*ln2. 1056 * x = k*ln2 + r, |r| <= 0.5*ln2.
404 * 1057 *
(...skipping 998 matching lines...) Expand 10 before | Expand all | Expand 10 after
1403 /* one step Newton iteration to 53 bits with error < 0.667 ulps */ 2056 /* one step Newton iteration to 53 bits with error < 0.667 ulps */
1404 s = t * t; /* t*t is exact */ 2057 s = t * t; /* t*t is exact */
1405 r = x / s; /* error <= 0.5 ulps; |r| < |t| */ 2058 r = x / s; /* error <= 0.5 ulps; |r| < |t| */
1406 w = t + t; /* t+t is exact */ 2059 w = t + t; /* t+t is exact */
1407 r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */ 2060 r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */
1408 t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */ 2061 t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */
1409 2062
1410 return (t); 2063 return (t);
1411 } 2064 }
1412 2065
2066 /* sin(x)
2067 * Return sine function of x.
2068 *
2069 * kernel function:
2070 * __kernel_sin ... sine function on [-pi/4,pi/4]
2071 * __kernel_cos ... cose function on [-pi/4,pi/4]
2072 * __ieee754_rem_pio2 ... argument reduction routine
2073 *
2074 * Method.
2075 * Let S,C and T denote the sin, cos and tan respectively on
2076 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
2077 * in [-pi/4 , +pi/4], and let n = k mod 4.
2078 * We have
2079 *
2080 * n sin(x) cos(x) tan(x)
2081 * ----------------------------------------------------------
2082 * 0 S C T
2083 * 1 C -S -1/T
2084 * 2 -S -C T
2085 * 3 -C S -1/T
2086 * ----------------------------------------------------------
2087 *
2088 * Special cases:
2089 * Let trig be any of sin, cos, or tan.
2090 * trig(+-INF) is NaN, with signals;
2091 * trig(NaN) is that NaN;
2092 *
2093 * Accuracy:
2094 * TRIG(x) returns trig(x) nearly rounded
2095 */
2096 double sin(double x) {
2097 double y[2], z = 0.0;
2098 int32_t n, ix;
2099
2100 /* High word of x. */
2101 GET_HIGH_WORD(ix, x);
2102
2103 /* |x| ~< pi/4 */
2104 ix &= 0x7fffffff;
2105 if (ix <= 0x3fe921fb) {
2106 return __kernel_sin(x, z, 0);
2107 } else if (ix >= 0x7ff00000) {
2108 /* sin(Inf or NaN) is NaN */
2109 return x - x;
2110 } else {
2111 /* argument reduction needed */
2112 n = __ieee754_rem_pio2(x, y);
2113 switch (n & 3) {
2114 case 0:
2115 return __kernel_sin(y[0], y[1], 1);
2116 case 1:
2117 return __kernel_cos(y[0], y[1]);
2118 case 2:
2119 return -__kernel_sin(y[0], y[1], 1);
2120 default:
2121 return -__kernel_cos(y[0], y[1]);
2122 }
2123 }
2124 }
2125
1413 } // namespace ieee754 2126 } // namespace ieee754
1414 } // namespace base 2127 } // namespace base
1415 } // namespace v8 2128 } // namespace v8
OLDNEW
« no previous file with comments | « src/base/ieee754.h ('k') | src/bootstrapper.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698