comparison src/share/vm/runtime/sharedRuntimeTrig.cpp @ 0:a61af66fc99e jdk7-b24

Initial load
author duke
date Sat, 01 Dec 2007 00:00:00 +0000
parents
children fb57d4cf76c2
comparison
equal deleted inserted replaced
-1:000000000000 0:a61af66fc99e
1 /*
2 * Copyright 2001-2005 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
24
25 #include "incls/_precompiled.incl"
26 #include "incls/_sharedRuntimeTrig.cpp.incl"
27
28 // This file contains copies of the fdlibm routines used by
29 // StrictMath. It turns out that it is almost always required to use
30 // these runtime routines; the Intel CPU doesn't meet the Java
31 // specification for sin/cos outside a certain limited argument range,
32 // and the SPARC CPU doesn't appear to have sin/cos instructions. It
33 // also turns out that avoiding the indirect call through function
34 // pointer out to libjava.so in SharedRuntime speeds these routines up
35 // by roughly 15% on both Win32/x86 and Solaris/SPARC.
36
37 // Enabling optimizations in this file causes incorrect code to be
38 // generated; can not figure out how to turn down optimization for one
39 // file in the IDE on Windows
40 #ifdef WIN32
41 # pragma optimize ( "", off )
42 #endif
43
44 #include <math.h>
45
46 // VM_LITTLE_ENDIAN is #defined appropriately in the Makefiles
47 // [jk] this is not 100% correct because the float word order may different
48 // from the byte order (e.g. on ARM)
49 #ifdef VM_LITTLE_ENDIAN
50 # define __HI(x) *(1+(int*)&x)
51 # define __LO(x) *(int*)&x
52 #else
53 # define __HI(x) *(int*)&x
54 # define __LO(x) *(1+(int*)&x)
55 #endif
56
57 static double copysignA(double x, double y) {
58 __HI(x) = (__HI(x)&0x7fffffff)|(__HI(y)&0x80000000);
59 return x;
60 }
61
62 /*
63 * scalbn (double x, int n)
64 * scalbn(x,n) returns x* 2**n computed by exponent
65 * manipulation rather than by actually performing an
66 * exponentiation or a multiplication.
67 */
68
69 static const double
70 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
71 twom54 = 5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
72 hugeX = 1.0e+300,
73 tiny = 1.0e-300;
74
75 static double scalbnA (double x, int n) {
76 int k,hx,lx;
77 hx = __HI(x);
78 lx = __LO(x);
79 k = (hx&0x7ff00000)>>20; /* extract exponent */
80 if (k==0) { /* 0 or subnormal x */
81 if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */
82 x *= two54;
83 hx = __HI(x);
84 k = ((hx&0x7ff00000)>>20) - 54;
85 if (n< -50000) return tiny*x; /*underflow*/
86 }
87 if (k==0x7ff) return x+x; /* NaN or Inf */
88 k = k+n;
89 if (k > 0x7fe) return hugeX*copysignA(hugeX,x); /* overflow */
90 if (k > 0) /* normal result */
91 {__HI(x) = (hx&0x800fffff)|(k<<20); return x;}
92 if (k <= -54) {
93 if (n > 50000) /* in case integer overflow in n+k */
94 return hugeX*copysignA(hugeX,x); /*overflow*/
95 else return tiny*copysignA(tiny,x); /*underflow*/
96 }
97 k += 54; /* subnormal result */
98 __HI(x) = (hx&0x800fffff)|(k<<20);
99 return x*twom54;
100 }
101
102 /*
103 * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
104 * double x[],y[]; int e0,nx,prec; int ipio2[];
105 *
106 * __kernel_rem_pio2 return the last three digits of N with
107 * y = x - N*pi/2
108 * so that |y| < pi/2.
109 *
110 * The method is to compute the integer (mod 8) and fraction parts of
111 * (2/pi)*x without doing the full multiplication. In general we
112 * skip the part of the product that are known to be a huge integer (
113 * more accurately, = 0 mod 8 ). Thus the number of operations are
114 * independent of the exponent of the input.
115 *
116 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
117 *
118 * Input parameters:
119 * x[] The input value (must be positive) is broken into nx
120 * pieces of 24-bit integers in double precision format.
121 * x[i] will be the i-th 24 bit of x. The scaled exponent
122 * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
123 * match x's up to 24 bits.
124 *
125 * Example of breaking a double positive z into x[0]+x[1]+x[2]:
126 * e0 = ilogb(z)-23
127 * z = scalbn(z,-e0)
128 * for i = 0,1,2
129 * x[i] = floor(z)
130 * z = (z-x[i])*2**24
131 *
132 *
133 * y[] ouput result in an array of double precision numbers.
134 * The dimension of y[] is:
135 * 24-bit precision 1
136 * 53-bit precision 2
137 * 64-bit precision 2
138 * 113-bit precision 3
139 * The actual value is the sum of them. Thus for 113-bit
140 * precsion, one may have to do something like:
141 *
142 * long double t,w,r_head, r_tail;
143 * t = (long double)y[2] + (long double)y[1];
144 * w = (long double)y[0];
145 * r_head = t+w;
146 * r_tail = w - (r_head - t);
147 *
148 * e0 The exponent of x[0]
149 *
150 * nx dimension of x[]
151 *
152 * prec an interger indicating the precision:
153 * 0 24 bits (single)
154 * 1 53 bits (double)
155 * 2 64 bits (extended)
156 * 3 113 bits (quad)
157 *
158 * ipio2[]
159 * integer array, contains the (24*i)-th to (24*i+23)-th
160 * bit of 2/pi after binary point. The corresponding
161 * floating value is
162 *
163 * ipio2[i] * 2^(-24(i+1)).
164 *
165 * External function:
166 * double scalbn(), floor();
167 *
168 *
169 * Here is the description of some local variables:
170 *
171 * jk jk+1 is the initial number of terms of ipio2[] needed
172 * in the computation. The recommended value is 2,3,4,
173 * 6 for single, double, extended,and quad.
174 *
175 * jz local integer variable indicating the number of
176 * terms of ipio2[] used.
177 *
178 * jx nx - 1
179 *
180 * jv index for pointing to the suitable ipio2[] for the
181 * computation. In general, we want
182 * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
183 * is an integer. Thus
184 * e0-3-24*jv >= 0 or (e0-3)/24 >= jv
185 * Hence jv = max(0,(e0-3)/24).
186 *
187 * jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
188 *
189 * q[] double array with integral value, representing the
190 * 24-bits chunk of the product of x and 2/pi.
191 *
192 * q0 the corresponding exponent of q[0]. Note that the
193 * exponent for q[i] would be q0-24*i.
194 *
195 * PIo2[] double precision array, obtained by cutting pi/2
196 * into 24 bits chunks.
197 *
198 * f[] ipio2[] in floating point
199 *
200 * iq[] integer array by breaking up q[] in 24-bits chunk.
201 *
202 * fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
203 *
204 * ih integer. If >0 it indicats q[] is >= 0.5, hence
205 * it also indicates the *sign* of the result.
206 *
207 */
208
209
210 /*
211 * Constants:
212 * The hexadecimal values are the intended ones for the following
213 * constants. The decimal values may be used, provided that the
214 * compiler will convert from decimal to binary accurately enough
215 * to produce the hexadecimal values shown.
216 */
217
218
219 static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
220
221 static const double PIo2[] = {
222 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
223 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
224 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
225 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
226 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
227 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
228 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
229 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
230 };
231
232 static const double
233 zeroB = 0.0,
234 one = 1.0,
235 two24B = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
236 twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
237
238 static int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) {
239 int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
240 double z,fw,f[20],fq[20],q[20];
241
242 /* initialize jk*/
243 jk = init_jk[prec];
244 jp = jk;
245
246 /* determine jx,jv,q0, note that 3>q0 */
247 jx = nx-1;
248 jv = (e0-3)/24; if(jv<0) jv=0;
249 q0 = e0-24*(jv+1);
250
251 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
252 j = jv-jx; m = jx+jk;
253 for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j];
254
255 /* compute q[0],q[1],...q[jk] */
256 for (i=0;i<=jk;i++) {
257 for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
258 }
259
260 jz = jk;
261 recompute:
262 /* distill q[] into iq[] reversingly */
263 for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
264 fw = (double)((int)(twon24* z));
265 iq[i] = (int)(z-two24B*fw);
266 z = q[j-1]+fw;
267 }
268
269 /* compute n */
270 z = scalbnA(z,q0); /* actual value of z */
271 z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */
272 n = (int) z;
273 z -= (double)n;
274 ih = 0;
275 if(q0>0) { /* need iq[jz-1] to determine n */
276 i = (iq[jz-1]>>(24-q0)); n += i;
277 iq[jz-1] -= i<<(24-q0);
278 ih = iq[jz-1]>>(23-q0);
279 }
280 else if(q0==0) ih = iq[jz-1]>>23;
281 else if(z>=0.5) ih=2;
282
283 if(ih>0) { /* q > 0.5 */
284 n += 1; carry = 0;
285 for(i=0;i<jz ;i++) { /* compute 1-q */
286 j = iq[i];
287 if(carry==0) {
288 if(j!=0) {
289 carry = 1; iq[i] = 0x1000000- j;
290 }
291 } else iq[i] = 0xffffff - j;
292 }
293 if(q0>0) { /* rare case: chance is 1 in 12 */
294 switch(q0) {
295 case 1:
296 iq[jz-1] &= 0x7fffff; break;
297 case 2:
298 iq[jz-1] &= 0x3fffff; break;
299 }
300 }
301 if(ih==2) {
302 z = one - z;
303 if(carry!=0) z -= scalbnA(one,q0);
304 }
305 }
306
307 /* check if recomputation is needed */
308 if(z==zeroB) {
309 j = 0;
310 for (i=jz-1;i>=jk;i--) j |= iq[i];
311 if(j==0) { /* need recomputation */
312 for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */
313
314 for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */
315 f[jx+i] = (double) ipio2[jv+i];
316 for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
317 q[i] = fw;
318 }
319 jz += k;
320 goto recompute;
321 }
322 }
323
324 /* chop off zero terms */
325 if(z==0.0) {
326 jz -= 1; q0 -= 24;
327 while(iq[jz]==0) { jz--; q0-=24;}
328 } else { /* break z into 24-bit if neccessary */
329 z = scalbnA(z,-q0);
330 if(z>=two24B) {
331 fw = (double)((int)(twon24*z));
332 iq[jz] = (int)(z-two24B*fw);
333 jz += 1; q0 += 24;
334 iq[jz] = (int) fw;
335 } else iq[jz] = (int) z ;
336 }
337
338 /* convert integer "bit" chunk to floating-point value */
339 fw = scalbnA(one,q0);
340 for(i=jz;i>=0;i--) {
341 q[i] = fw*(double)iq[i]; fw*=twon24;
342 }
343
344 /* compute PIo2[0,...,jp]*q[jz,...,0] */
345 for(i=jz;i>=0;i--) {
346 for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
347 fq[jz-i] = fw;
348 }
349
350 /* compress fq[] into y[] */
351 switch(prec) {
352 case 0:
353 fw = 0.0;
354 for (i=jz;i>=0;i--) fw += fq[i];
355 y[0] = (ih==0)? fw: -fw;
356 break;
357 case 1:
358 case 2:
359 fw = 0.0;
360 for (i=jz;i>=0;i--) fw += fq[i];
361 y[0] = (ih==0)? fw: -fw;
362 fw = fq[0]-fw;
363 for (i=1;i<=jz;i++) fw += fq[i];
364 y[1] = (ih==0)? fw: -fw;
365 break;
366 case 3: /* painful */
367 for (i=jz;i>0;i--) {
368 fw = fq[i-1]+fq[i];
369 fq[i] += fq[i-1]-fw;
370 fq[i-1] = fw;
371 }
372 for (i=jz;i>1;i--) {
373 fw = fq[i-1]+fq[i];
374 fq[i] += fq[i-1]-fw;
375 fq[i-1] = fw;
376 }
377 for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
378 if(ih==0) {
379 y[0] = fq[0]; y[1] = fq[1]; y[2] = fw;
380 } else {
381 y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
382 }
383 }
384 return n&7;
385 }
386
387
388 /*
389 * ====================================================
390 * Copyright 13 Dec 1993 Sun Microsystems, Inc. All Rights Reserved.
391 *
392 * Developed at SunPro, a Sun Microsystems, Inc. business.
393 * Permission to use, copy, modify, and distribute this
394 * software is freely granted, provided that this notice
395 * is preserved.
396 * ====================================================
397 *
398 */
399
400 /* __ieee754_rem_pio2(x,y)
401 *
402 * return the remainder of x rem pi/2 in y[0]+y[1]
403 * use __kernel_rem_pio2()
404 */
405
406 /*
407 * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
408 */
409 static const int two_over_pi[] = {
410 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
411 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
412 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
413 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
414 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
415 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
416 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
417 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
418 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
419 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
420 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
421 };
422
423 static const int npio2_hw[] = {
424 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
425 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
426 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
427 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
428 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
429 0x404858EB, 0x404921FB,
430 };
431
432 /*
433 * invpio2: 53 bits of 2/pi
434 * pio2_1: first 33 bit of pi/2
435 * pio2_1t: pi/2 - pio2_1
436 * pio2_2: second 33 bit of pi/2
437 * pio2_2t: pi/2 - (pio2_1+pio2_2)
438 * pio2_3: third 33 bit of pi/2
439 * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
440 */
441
442 static const double
443 zeroA = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
444 half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
445 two24A = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
446 invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
447 pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
448 pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
449 pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
450 pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
451 pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
452 pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
453
454 static int __ieee754_rem_pio2(double x, double *y) {
455 double z,w,t,r,fn;
456 double tx[3];
457 int e0,i,j,nx,n,ix,hx,i0;
458
459 i0 = ((*(int*)&two24A)>>30)^1; /* high word index */
460 hx = *(i0+(int*)&x); /* high word of x */
461 ix = hx&0x7fffffff;
462 if(ix<=0x3fe921fb) /* |x| ~<= pi/4 , no need for reduction */
463 {y[0] = x; y[1] = 0; return 0;}
464 if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */
465 if(hx>0) {
466 z = x - pio2_1;
467 if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
468 y[0] = z - pio2_1t;
469 y[1] = (z-y[0])-pio2_1t;
470 } else { /* near pi/2, use 33+33+53 bit pi */
471 z -= pio2_2;
472 y[0] = z - pio2_2t;
473 y[1] = (z-y[0])-pio2_2t;
474 }
475 return 1;
476 } else { /* negative x */
477 z = x + pio2_1;
478 if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
479 y[0] = z + pio2_1t;
480 y[1] = (z-y[0])+pio2_1t;
481 } else { /* near pi/2, use 33+33+53 bit pi */
482 z += pio2_2;
483 y[0] = z + pio2_2t;
484 y[1] = (z-y[0])+pio2_2t;
485 }
486 return -1;
487 }
488 }
489 if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
490 t = fabsd(x);
491 n = (int) (t*invpio2+half);
492 fn = (double)n;
493 r = t-fn*pio2_1;
494 w = fn*pio2_1t; /* 1st round good to 85 bit */
495 if(n<32&&ix!=npio2_hw[n-1]) {
496 y[0] = r-w; /* quick check no cancellation */
497 } else {
498 j = ix>>20;
499 y[0] = r-w;
500 i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
501 if(i>16) { /* 2nd iteration needed, good to 118 */
502 t = r;
503 w = fn*pio2_2;
504 r = t-w;
505 w = fn*pio2_2t-((t-r)-w);
506 y[0] = r-w;
507 i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
508 if(i>49) { /* 3rd iteration need, 151 bits acc */
509 t = r; /* will cover all possible cases */
510 w = fn*pio2_3;
511 r = t-w;
512 w = fn*pio2_3t-((t-r)-w);
513 y[0] = r-w;
514 }
515 }
516 }
517 y[1] = (r-y[0])-w;
518 if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
519 else return n;
520 }
521 /*
522 * all other (large) arguments
523 */
524 if(ix>=0x7ff00000) { /* x is inf or NaN */
525 y[0]=y[1]=x-x; return 0;
526 }
527 /* set z = scalbn(|x|,ilogb(x)-23) */
528 *(1-i0+(int*)&z) = *(1-i0+(int*)&x);
529 e0 = (ix>>20)-1046; /* e0 = ilogb(z)-23; */
530 *(i0+(int*)&z) = ix - (e0<<20);
531 for(i=0;i<2;i++) {
532 tx[i] = (double)((int)(z));
533 z = (z-tx[i])*two24A;
534 }
535 tx[2] = z;
536 nx = 3;
537 while(tx[nx-1]==zeroA) nx--; /* skip zero term */
538 n = __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
539 if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
540 return n;
541 }
542
543
544 /* __kernel_sin( x, y, iy)
545 * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
546 * Input x is assumed to be bounded by ~pi/4 in magnitude.
547 * Input y is the tail of x.
548 * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
549 *
550 * Algorithm
551 * 1. Since sin(-x) = -sin(x), we need only to consider positive x.
552 * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
553 * 3. sin(x) is approximated by a polynomial of degree 13 on
554 * [0,pi/4]
555 * 3 13
556 * sin(x) ~ x + S1*x + ... + S6*x
557 * where
558 *
559 * |sin(x) 2 4 6 8 10 12 | -58
560 * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
561 * | x |
562 *
563 * 4. sin(x+y) = sin(x) + sin'(x')*y
564 * ~ sin(x) + (1-x*x/2)*y
565 * For better accuracy, let
566 * 3 2 2 2 2
567 * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
568 * then 3 2
569 * sin(x) = x + (S1*x + (x *(r-y/2)+y))
570 */
571
572 static const double
573 S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
574 S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
575 S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
576 S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
577 S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
578 S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
579
580 static double __kernel_sin(double x, double y, int iy)
581 {
582 double z,r,v;
583 int ix;
584 ix = __HI(x)&0x7fffffff; /* high word of x */
585 if(ix<0x3e400000) /* |x| < 2**-27 */
586 {if((int)x==0) return x;} /* generate inexact */
587 z = x*x;
588 v = z*x;
589 r = S2+z*(S3+z*(S4+z*(S5+z*S6)));
590 if(iy==0) return x+v*(S1+z*r);
591 else return x-((z*(half*y-v*r)-y)-v*S1);
592 }
593
594 /*
595 * __kernel_cos( x, y )
596 * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
597 * Input x is assumed to be bounded by ~pi/4 in magnitude.
598 * Input y is the tail of x.
599 *
600 * Algorithm
601 * 1. Since cos(-x) = cos(x), we need only to consider positive x.
602 * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
603 * 3. cos(x) is approximated by a polynomial of degree 14 on
604 * [0,pi/4]
605 * 4 14
606 * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
607 * where the remez error is
608 *
609 * | 2 4 6 8 10 12 14 | -58
610 * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
611 * | |
612 *
613 * 4 6 8 10 12 14
614 * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
615 * cos(x) = 1 - x*x/2 + r
616 * since cos(x+y) ~ cos(x) - sin(x)*y
617 * ~ cos(x) - x*y,
618 * a correction term is necessary in cos(x) and hence
619 * cos(x+y) = 1 - (x*x/2 - (r - x*y))
620 * For better accuracy when x > 0.3, let qx = |x|/4 with
621 * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
622 * Then
623 * cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
624 * Note that 1-qx and (x*x/2-qx) is EXACT here, and the
625 * magnitude of the latter is at least a quarter of x*x/2,
626 * thus, reducing the rounding error in the subtraction.
627 */
628
629 static const double
630 C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
631 C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
632 C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
633 C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
634 C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
635 C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
636
637 static double __kernel_cos(double x, double y)
638 {
639 double a,hz,z,r,qx;
640 int ix;
641 ix = __HI(x)&0x7fffffff; /* ix = |x|'s high word*/
642 if(ix<0x3e400000) { /* if x < 2**27 */
643 if(((int)x)==0) return one; /* generate inexact */
644 }
645 z = x*x;
646 r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
647 if(ix < 0x3FD33333) /* if |x| < 0.3 */
648 return one - (0.5*z - (z*r - x*y));
649 else {
650 if(ix > 0x3fe90000) { /* x > 0.78125 */
651 qx = 0.28125;
652 } else {
653 __HI(qx) = ix-0x00200000; /* x/4 */
654 __LO(qx) = 0;
655 }
656 hz = 0.5*z-qx;
657 a = one-qx;
658 return a - (hz - (z*r-x*y));
659 }
660 }
661
662 /* __kernel_tan( x, y, k )
663 * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
664 * Input x is assumed to be bounded by ~pi/4 in magnitude.
665 * Input y is the tail of x.
666 * Input k indicates whether tan (if k=1) or
667 * -1/tan (if k= -1) is returned.
668 *
669 * Algorithm
670 * 1. Since tan(-x) = -tan(x), we need only to consider positive x.
671 * 2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
672 * 3. tan(x) is approximated by a odd polynomial of degree 27 on
673 * [0,0.67434]
674 * 3 27
675 * tan(x) ~ x + T1*x + ... + T13*x
676 * where
677 *
678 * |tan(x) 2 4 26 | -59.2
679 * |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2
680 * | x |
681 *
682 * Note: tan(x+y) = tan(x) + tan'(x)*y
683 * ~ tan(x) + (1+x*x)*y
684 * Therefore, for better accuracy in computing tan(x+y), let
685 * 3 2 2 2 2
686 * r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
687 * then
688 * 3 2
689 * tan(x+y) = x + (T1*x + (x *(r+y)+y))
690 *
691 * 4. For x in [0.67434,pi/4], let y = pi/4 - x, then
692 * tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
693 * = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
694 */
695
696 static const double
697 pio4 = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
698 pio4lo= 3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */
699 T[] = {
700 3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */
701 1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */
702 5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */
703 2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */
704 8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */
705 3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */
706 1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */
707 5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */
708 2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */
709 7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */
710 7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */
711 -1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */
712 2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */
713 };
714
715 static double __kernel_tan(double x, double y, int iy)
716 {
717 double z,r,v,w,s;
718 int ix,hx;
719 hx = __HI(x); /* high word of x */
720 ix = hx&0x7fffffff; /* high word of |x| */
721 if(ix<0x3e300000) { /* x < 2**-28 */
722 if((int)x==0) { /* generate inexact */
723 if (((ix | __LO(x)) | (iy + 1)) == 0)
724 return one / fabsd(x);
725 else {
726 if (iy == 1)
727 return x;
728 else { /* compute -1 / (x+y) carefully */
729 double a, t;
730
731 z = w = x + y;
732 __LO(z) = 0;
733 v = y - (z - x);
734 t = a = -one / w;
735 __LO(t) = 0;
736 s = one + t * z;
737 return t + a * (s + t * v);
738 }
739 }
740 }
741 }
742 if(ix>=0x3FE59428) { /* |x|>=0.6744 */
743 if(hx<0) {x = -x; y = -y;}
744 z = pio4-x;
745 w = pio4lo-y;
746 x = z+w; y = 0.0;
747 }
748 z = x*x;
749 w = z*z;
750 /* Break x^5*(T[1]+x^2*T[2]+...) into
751 * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
752 * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
753 */
754 r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
755 v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
756 s = z*x;
757 r = y + z*(s*(r+v)+y);
758 r += T[0]*s;
759 w = x+r;
760 if(ix>=0x3FE59428) {
761 v = (double)iy;
762 return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r)));
763 }
764 if(iy==1) return w;
765 else { /* if allow error up to 2 ulp,
766 simply return -1.0/(x+r) here */
767 /* compute -1.0/(x+r) accurately */
768 double a,t;
769 z = w;
770 __LO(z) = 0;
771 v = r-(z - x); /* z+v = r+x */
772 t = a = -1.0/w; /* a = -1.0/w */
773 __LO(t) = 0;
774 s = 1.0+t*z;
775 return t+a*(s+t*v);
776 }
777 }
778
779
780 //----------------------------------------------------------------------
781 //
782 // Routines for new sin/cos implementation
783 //
784 //----------------------------------------------------------------------
785
786 /* sin(x)
787 * Return sine function of x.
788 *
789 * kernel function:
790 * __kernel_sin ... sine function on [-pi/4,pi/4]
791 * __kernel_cos ... cose function on [-pi/4,pi/4]
792 * __ieee754_rem_pio2 ... argument reduction routine
793 *
794 * Method.
795 * Let S,C and T denote the sin, cos and tan respectively on
796 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
797 * in [-pi/4 , +pi/4], and let n = k mod 4.
798 * We have
799 *
800 * n sin(x) cos(x) tan(x)
801 * ----------------------------------------------------------
802 * 0 S C T
803 * 1 C -S -1/T
804 * 2 -S -C T
805 * 3 -C S -1/T
806 * ----------------------------------------------------------
807 *
808 * Special cases:
809 * Let trig be any of sin, cos, or tan.
810 * trig(+-INF) is NaN, with signals;
811 * trig(NaN) is that NaN;
812 *
813 * Accuracy:
814 * TRIG(x) returns trig(x) nearly rounded
815 */
816
817 JRT_LEAF(jdouble, SharedRuntime::dsin(jdouble x))
818 double y[2],z=0.0;
819 int n, ix;
820
821 /* High word of x. */
822 ix = __HI(x);
823
824 /* |x| ~< pi/4 */
825 ix &= 0x7fffffff;
826 if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0);
827
828 /* sin(Inf or NaN) is NaN */
829 else if (ix>=0x7ff00000) return x-x;
830
831 /* argument reduction needed */
832 else {
833 n = __ieee754_rem_pio2(x,y);
834 switch(n&3) {
835 case 0: return __kernel_sin(y[0],y[1],1);
836 case 1: return __kernel_cos(y[0],y[1]);
837 case 2: return -__kernel_sin(y[0],y[1],1);
838 default:
839 return -__kernel_cos(y[0],y[1]);
840 }
841 }
842 JRT_END
843
844 /* cos(x)
845 * Return cosine function of x.
846 *
847 * kernel function:
848 * __kernel_sin ... sine function on [-pi/4,pi/4]
849 * __kernel_cos ... cosine function on [-pi/4,pi/4]
850 * __ieee754_rem_pio2 ... argument reduction routine
851 *
852 * Method.
853 * Let S,C and T denote the sin, cos and tan respectively on
854 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
855 * in [-pi/4 , +pi/4], and let n = k mod 4.
856 * We have
857 *
858 * n sin(x) cos(x) tan(x)
859 * ----------------------------------------------------------
860 * 0 S C T
861 * 1 C -S -1/T
862 * 2 -S -C T
863 * 3 -C S -1/T
864 * ----------------------------------------------------------
865 *
866 * Special cases:
867 * Let trig be any of sin, cos, or tan.
868 * trig(+-INF) is NaN, with signals;
869 * trig(NaN) is that NaN;
870 *
871 * Accuracy:
872 * TRIG(x) returns trig(x) nearly rounded
873 */
874
875 JRT_LEAF(jdouble, SharedRuntime::dcos(jdouble x))
876 double y[2],z=0.0;
877 int n, ix;
878
879 /* High word of x. */
880 ix = __HI(x);
881
882 /* |x| ~< pi/4 */
883 ix &= 0x7fffffff;
884 if(ix <= 0x3fe921fb) return __kernel_cos(x,z);
885
886 /* cos(Inf or NaN) is NaN */
887 else if (ix>=0x7ff00000) return x-x;
888
889 /* argument reduction needed */
890 else {
891 n = __ieee754_rem_pio2(x,y);
892 switch(n&3) {
893 case 0: return __kernel_cos(y[0],y[1]);
894 case 1: return -__kernel_sin(y[0],y[1],1);
895 case 2: return -__kernel_cos(y[0],y[1]);
896 default:
897 return __kernel_sin(y[0],y[1],1);
898 }
899 }
900 JRT_END
901
902 /* tan(x)
903 * Return tangent function of x.
904 *
905 * kernel function:
906 * __kernel_tan ... tangent function on [-pi/4,pi/4]
907 * __ieee754_rem_pio2 ... argument reduction routine
908 *
909 * Method.
910 * Let S,C and T denote the sin, cos and tan respectively on
911 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
912 * in [-pi/4 , +pi/4], and let n = k mod 4.
913 * We have
914 *
915 * n sin(x) cos(x) tan(x)
916 * ----------------------------------------------------------
917 * 0 S C T
918 * 1 C -S -1/T
919 * 2 -S -C T
920 * 3 -C S -1/T
921 * ----------------------------------------------------------
922 *
923 * Special cases:
924 * Let trig be any of sin, cos, or tan.
925 * trig(+-INF) is NaN, with signals;
926 * trig(NaN) is that NaN;
927 *
928 * Accuracy:
929 * TRIG(x) returns trig(x) nearly rounded
930 */
931
932 JRT_LEAF(jdouble, SharedRuntime::dtan(jdouble x))
933 double y[2],z=0.0;
934 int n, ix;
935
936 /* High word of x. */
937 ix = __HI(x);
938
939 /* |x| ~< pi/4 */
940 ix &= 0x7fffffff;
941 if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1);
942
943 /* tan(Inf or NaN) is NaN */
944 else if (ix>=0x7ff00000) return x-x; /* NaN */
945
946 /* argument reduction needed */
947 else {
948 n = __ieee754_rem_pio2(x,y);
949 return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /* 1 -- n even
950 -1 -- n odd */
951 }
952 JRT_END
953
954
955 #ifdef WIN32
956 # pragma optimize ( "", on )
957 #endif