Mercurial > hg > graal-compiler
annotate src/share/vm/runtime/sharedRuntimeTrans.cpp @ 2607:008adfd6d850
Fixed the stateBefore of invokes and monitorenter instructions to include the arguments of the instruction.
This is necessary to ensure correct continuation in the interpreter when the stateBefore is used as a deoptimization point.
author | Thomas Wuerthinger <thomas@wuerthinger.net> |
---|---|
date | Fri, 06 May 2011 17:47:17 +0200 |
parents | f95d63e2154a |
children | 63a4eb8bcd23 bdd155477289 |
rev | line source |
---|---|
0 | 1 /* |
1972 | 2 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
0
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
0
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
0
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
1972 | 25 #include "precompiled.hpp" |
26 #include "prims/jni.h" | |
27 #include "runtime/interfaceSupport.hpp" | |
28 #include "runtime/sharedRuntime.hpp" | |
0 | 29 |
30 // This file contains copies of the fdlibm routines used by | |
31 // StrictMath. It turns out that it is almost always required to use | |
32 // these runtime routines; the Intel CPU doesn't meet the Java | |
33 // specification for sin/cos outside a certain limited argument range, | |
34 // and the SPARC CPU doesn't appear to have sin/cos instructions. It | |
35 // also turns out that avoiding the indirect call through function | |
36 // pointer out to libjava.so in SharedRuntime speeds these routines up | |
37 // by roughly 15% on both Win32/x86 and Solaris/SPARC. | |
38 | |
39 // Enabling optimizations in this file causes incorrect code to be | |
40 // generated; can not figure out how to turn down optimization for one | |
41 // file in the IDE on Windows | |
42 #ifdef WIN32 | |
43 # pragma optimize ( "", off ) | |
44 #endif | |
45 | |
46 #include <math.h> | |
47 | |
48 // VM_LITTLE_ENDIAN is #defined appropriately in the Makefiles | |
49 // [jk] this is not 100% correct because the float word order may different | |
50 // from the byte order (e.g. on ARM) | |
51 #ifdef VM_LITTLE_ENDIAN | |
52 # define __HI(x) *(1+(int*)&x) | |
53 # define __LO(x) *(int*)&x | |
54 #else | |
55 # define __HI(x) *(int*)&x | |
56 # define __LO(x) *(1+(int*)&x) | |
57 #endif | |
58 | |
59 double copysign(double x, double y) { | |
60 __HI(x) = (__HI(x)&0x7fffffff)|(__HI(y)&0x80000000); | |
61 return x; | |
62 } | |
63 | |
64 /* | |
65 * ==================================================== | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
0
diff
changeset
|
66 * Copyright (c) 1998 Oracle and/or its affiliates. All rights reserved. |
0 | 67 * |
68 * Developed at SunSoft, a Sun Microsystems, Inc. business. | |
69 * Permission to use, copy, modify, and distribute this | |
70 * software is freely granted, provided that this notice | |
71 * is preserved. | |
72 * ==================================================== | |
73 */ | |
74 | |
75 /* | |
76 * scalbn (double x, int n) | |
77 * scalbn(x,n) returns x* 2**n computed by exponent | |
78 * manipulation rather than by actually performing an | |
79 * exponentiation or a multiplication. | |
80 */ | |
81 | |
82 static const double | |
83 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ | |
84 twom54 = 5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */ | |
85 hugeX = 1.0e+300, | |
86 tiny = 1.0e-300; | |
87 | |
88 double scalbn (double x, int n) { | |
89 int k,hx,lx; | |
90 hx = __HI(x); | |
91 lx = __LO(x); | |
92 k = (hx&0x7ff00000)>>20; /* extract exponent */ | |
93 if (k==0) { /* 0 or subnormal x */ | |
94 if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */ | |
95 x *= two54; | |
96 hx = __HI(x); | |
97 k = ((hx&0x7ff00000)>>20) - 54; | |
98 if (n< -50000) return tiny*x; /*underflow*/ | |
99 } | |
100 if (k==0x7ff) return x+x; /* NaN or Inf */ | |
101 k = k+n; | |
102 if (k > 0x7fe) return hugeX*copysign(hugeX,x); /* overflow */ | |
103 if (k > 0) /* normal result */ | |
104 {__HI(x) = (hx&0x800fffff)|(k<<20); return x;} | |
105 if (k <= -54) { | |
106 if (n > 50000) /* in case integer overflow in n+k */ | |
107 return hugeX*copysign(hugeX,x); /*overflow*/ | |
108 else return tiny*copysign(tiny,x); /*underflow*/ | |
109 } | |
110 k += 54; /* subnormal result */ | |
111 __HI(x) = (hx&0x800fffff)|(k<<20); | |
112 return x*twom54; | |
113 } | |
114 | |
115 /* __ieee754_log(x) | |
116 * Return the logrithm of x | |
117 * | |
118 * Method : | |
119 * 1. Argument Reduction: find k and f such that | |
120 * x = 2^k * (1+f), | |
121 * where sqrt(2)/2 < 1+f < sqrt(2) . | |
122 * | |
123 * 2. Approximation of log(1+f). | |
124 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) | |
125 * = 2s + 2/3 s**3 + 2/5 s**5 + ....., | |
126 * = 2s + s*R | |
127 * We use a special Reme algorithm on [0,0.1716] to generate | |
128 * a polynomial of degree 14 to approximate R The maximum error | |
129 * of this polynomial approximation is bounded by 2**-58.45. In | |
130 * other words, | |
131 * 2 4 6 8 10 12 14 | |
132 * R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s | |
133 * (the values of Lg1 to Lg7 are listed in the program) | |
134 * and | |
135 * | 2 14 | -58.45 | |
136 * | Lg1*s +...+Lg7*s - R(z) | <= 2 | |
137 * | | | |
138 * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. | |
139 * In order to guarantee error in log below 1ulp, we compute log | |
140 * by | |
141 * log(1+f) = f - s*(f - R) (if f is not too large) | |
142 * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy) | |
143 * | |
144 * 3. Finally, log(x) = k*ln2 + log(1+f). | |
145 * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) | |
146 * Here ln2 is split into two floating point number: | |
147 * ln2_hi + ln2_lo, | |
148 * where n*ln2_hi is always exact for |n| < 2000. | |
149 * | |
150 * Special cases: | |
151 * log(x) is NaN with signal if x < 0 (including -INF) ; | |
152 * log(+INF) is +INF; log(0) is -INF with signal; | |
153 * log(NaN) is that NaN with no signal. | |
154 * | |
155 * Accuracy: | |
156 * according to an error analysis, the error is always less than | |
157 * 1 ulp (unit in the last place). | |
158 * | |
159 * Constants: | |
160 * The hexadecimal values are the intended ones for the following | |
161 * constants. The decimal values may be used, provided that the | |
162 * compiler will convert from decimal to binary accurately enough | |
163 * to produce the hexadecimal values shown. | |
164 */ | |
165 | |
166 static const double | |
167 ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */ | |
168 ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */ | |
169 Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */ | |
170 Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ | |
171 Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */ | |
172 Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ | |
173 Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ | |
174 Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ | |
175 Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ | |
176 | |
177 static double zero = 0.0; | |
178 | |
179 static double __ieee754_log(double x) { | |
180 double hfsq,f,s,z,R,w,t1,t2,dk; | |
181 int k,hx,i,j; | |
182 unsigned lx; | |
183 | |
184 hx = __HI(x); /* high word of x */ | |
185 lx = __LO(x); /* low word of x */ | |
186 | |
187 k=0; | |
188 if (hx < 0x00100000) { /* x < 2**-1022 */ | |
189 if (((hx&0x7fffffff)|lx)==0) | |
190 return -two54/zero; /* log(+-0)=-inf */ | |
191 if (hx<0) return (x-x)/zero; /* log(-#) = NaN */ | |
192 k -= 54; x *= two54; /* subnormal number, scale up x */ | |
193 hx = __HI(x); /* high word of x */ | |
194 } | |
195 if (hx >= 0x7ff00000) return x+x; | |
196 k += (hx>>20)-1023; | |
197 hx &= 0x000fffff; | |
198 i = (hx+0x95f64)&0x100000; | |
199 __HI(x) = hx|(i^0x3ff00000); /* normalize x or x/2 */ | |
200 k += (i>>20); | |
201 f = x-1.0; | |
202 if((0x000fffff&(2+hx))<3) { /* |f| < 2**-20 */ | |
203 if(f==zero) { | |
204 if (k==0) return zero; | |
205 else {dk=(double)k; return dk*ln2_hi+dk*ln2_lo;} | |
206 } | |
207 R = f*f*(0.5-0.33333333333333333*f); | |
208 if(k==0) return f-R; else {dk=(double)k; | |
209 return dk*ln2_hi-((R-dk*ln2_lo)-f);} | |
210 } | |
211 s = f/(2.0+f); | |
212 dk = (double)k; | |
213 z = s*s; | |
214 i = hx-0x6147a; | |
215 w = z*z; | |
216 j = 0x6b851-hx; | |
217 t1= w*(Lg2+w*(Lg4+w*Lg6)); | |
218 t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); | |
219 i |= j; | |
220 R = t2+t1; | |
221 if(i>0) { | |
222 hfsq=0.5*f*f; | |
223 if(k==0) return f-(hfsq-s*(hfsq+R)); else | |
224 return dk*ln2_hi-((hfsq-(s*(hfsq+R)+dk*ln2_lo))-f); | |
225 } else { | |
226 if(k==0) return f-s*(f-R); else | |
227 return dk*ln2_hi-((s*(f-R)-dk*ln2_lo)-f); | |
228 } | |
229 } | |
230 | |
231 JRT_LEAF(jdouble, SharedRuntime::dlog(jdouble x)) | |
232 return __ieee754_log(x); | |
233 JRT_END | |
234 | |
235 /* __ieee754_log10(x) | |
236 * Return the base 10 logarithm of x | |
237 * | |
238 * Method : | |
239 * Let log10_2hi = leading 40 bits of log10(2) and | |
240 * log10_2lo = log10(2) - log10_2hi, | |
241 * ivln10 = 1/log(10) rounded. | |
242 * Then | |
243 * n = ilogb(x), | |
244 * if(n<0) n = n+1; | |
245 * x = scalbn(x,-n); | |
246 * log10(x) := n*log10_2hi + (n*log10_2lo + ivln10*log(x)) | |
247 * | |
248 * Note 1: | |
249 * To guarantee log10(10**n)=n, where 10**n is normal, the rounding | |
250 * mode must set to Round-to-Nearest. | |
251 * Note 2: | |
252 * [1/log(10)] rounded to 53 bits has error .198 ulps; | |
253 * log10 is monotonic at all binary break points. | |
254 * | |
255 * Special cases: | |
256 * log10(x) is NaN with signal if x < 0; | |
257 * log10(+INF) is +INF with no signal; log10(0) is -INF with signal; | |
258 * log10(NaN) is that NaN with no signal; | |
259 * log10(10**N) = N for N=0,1,...,22. | |
260 * | |
261 * Constants: | |
262 * The hexadecimal values are the intended ones for the following constants. | |
263 * The decimal values may be used, provided that the compiler will convert | |
264 * from decimal to binary accurately enough to produce the hexadecimal values | |
265 * shown. | |
266 */ | |
267 | |
268 static const double | |
269 ivln10 = 4.34294481903251816668e-01, /* 0x3FDBCB7B, 0x1526E50E */ | |
270 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */ | |
271 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */ | |
272 | |
273 static double __ieee754_log10(double x) { | |
274 double y,z; | |
275 int i,k,hx; | |
276 unsigned lx; | |
277 | |
278 hx = __HI(x); /* high word of x */ | |
279 lx = __LO(x); /* low word of x */ | |
280 | |
281 k=0; | |
282 if (hx < 0x00100000) { /* x < 2**-1022 */ | |
283 if (((hx&0x7fffffff)|lx)==0) | |
284 return -two54/zero; /* log(+-0)=-inf */ | |
285 if (hx<0) return (x-x)/zero; /* log(-#) = NaN */ | |
286 k -= 54; x *= two54; /* subnormal number, scale up x */ | |
287 hx = __HI(x); /* high word of x */ | |
288 } | |
289 if (hx >= 0x7ff00000) return x+x; | |
290 k += (hx>>20)-1023; | |
291 i = ((unsigned)k&0x80000000)>>31; | |
292 hx = (hx&0x000fffff)|((0x3ff-i)<<20); | |
293 y = (double)(k+i); | |
294 __HI(x) = hx; | |
295 z = y*log10_2lo + ivln10*__ieee754_log(x); | |
296 return z+y*log10_2hi; | |
297 } | |
298 | |
299 JRT_LEAF(jdouble, SharedRuntime::dlog10(jdouble x)) | |
300 return __ieee754_log10(x); | |
301 JRT_END | |
302 | |
303 | |
304 /* __ieee754_exp(x) | |
305 * Returns the exponential of x. | |
306 * | |
307 * Method | |
308 * 1. Argument reduction: | |
309 * Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658. | |
310 * Given x, find r and integer k such that | |
311 * | |
312 * x = k*ln2 + r, |r| <= 0.5*ln2. | |
313 * | |
314 * Here r will be represented as r = hi-lo for better | |
315 * accuracy. | |
316 * | |
317 * 2. Approximation of exp(r) by a special rational function on | |
318 * the interval [0,0.34658]: | |
319 * Write | |
320 * R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ... | |
321 * We use a special Reme algorithm on [0,0.34658] to generate | |
322 * a polynomial of degree 5 to approximate R. The maximum error | |
323 * of this polynomial approximation is bounded by 2**-59. In | |
324 * other words, | |
325 * R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5 | |
326 * (where z=r*r, and the values of P1 to P5 are listed below) | |
327 * and | |
328 * | 5 | -59 | |
329 * | 2.0+P1*z+...+P5*z - R(z) | <= 2 | |
330 * | | | |
331 * The computation of exp(r) thus becomes | |
332 * 2*r | |
333 * exp(r) = 1 + ------- | |
334 * R - r | |
335 * r*R1(r) | |
336 * = 1 + r + ----------- (for better accuracy) | |
337 * 2 - R1(r) | |
338 * where | |
339 * 2 4 10 | |
340 * R1(r) = r - (P1*r + P2*r + ... + P5*r ). | |
341 * | |
342 * 3. Scale back to obtain exp(x): | |
343 * From step 1, we have | |
344 * exp(x) = 2^k * exp(r) | |
345 * | |
346 * Special cases: | |
347 * exp(INF) is INF, exp(NaN) is NaN; | |
348 * exp(-INF) is 0, and | |
349 * for finite argument, only exp(0)=1 is exact. | |
350 * | |
351 * Accuracy: | |
352 * according to an error analysis, the error is always less than | |
353 * 1 ulp (unit in the last place). | |
354 * | |
355 * Misc. info. | |
356 * For IEEE double | |
357 * if x > 7.09782712893383973096e+02 then exp(x) overflow | |
358 * if x < -7.45133219101941108420e+02 then exp(x) underflow | |
359 * | |
360 * Constants: | |
361 * The hexadecimal values are the intended ones for the following | |
362 * constants. The decimal values may be used, provided that the | |
363 * compiler will convert from decimal to binary accurately enough | |
364 * to produce the hexadecimal values shown. | |
365 */ | |
366 | |
367 static const double | |
368 one = 1.0, | |
369 halF[2] = {0.5,-0.5,}, | |
370 twom1000= 9.33263618503218878990e-302, /* 2**-1000=0x01700000,0*/ | |
371 o_threshold= 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */ | |
372 u_threshold= -7.45133219101941108420e+02, /* 0xc0874910, 0xD52D3051 */ | |
373 ln2HI[2] ={ 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */ | |
374 -6.93147180369123816490e-01,},/* 0xbfe62e42, 0xfee00000 */ | |
375 ln2LO[2] ={ 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */ | |
376 -1.90821492927058770002e-10,},/* 0xbdea39ef, 0x35793c76 */ | |
377 invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */ | |
378 P1 = 1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */ | |
379 P2 = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */ | |
380 P3 = 6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */ | |
381 P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */ | |
382 P5 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ | |
383 | |
384 static double __ieee754_exp(double x) { | |
385 double y,hi=0,lo=0,c,t; | |
386 int k=0,xsb; | |
387 unsigned hx; | |
388 | |
389 hx = __HI(x); /* high word of x */ | |
390 xsb = (hx>>31)&1; /* sign bit of x */ | |
391 hx &= 0x7fffffff; /* high word of |x| */ | |
392 | |
393 /* filter out non-finite argument */ | |
394 if(hx >= 0x40862E42) { /* if |x|>=709.78... */ | |
395 if(hx>=0x7ff00000) { | |
396 if(((hx&0xfffff)|__LO(x))!=0) | |
397 return x+x; /* NaN */ | |
398 else return (xsb==0)? x:0.0; /* exp(+-inf)={inf,0} */ | |
399 } | |
400 if(x > o_threshold) return hugeX*hugeX; /* overflow */ | |
401 if(x < u_threshold) return twom1000*twom1000; /* underflow */ | |
402 } | |
403 | |
404 /* argument reduction */ | |
405 if(hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */ | |
406 if(hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */ | |
407 hi = x-ln2HI[xsb]; lo=ln2LO[xsb]; k = 1-xsb-xsb; | |
408 } else { | |
409 k = (int)(invln2*x+halF[xsb]); | |
410 t = k; | |
411 hi = x - t*ln2HI[0]; /* t*ln2HI is exact here */ | |
412 lo = t*ln2LO[0]; | |
413 } | |
414 x = hi - lo; | |
415 } | |
416 else if(hx < 0x3e300000) { /* when |x|<2**-28 */ | |
417 if(hugeX+x>one) return one+x;/* trigger inexact */ | |
418 } | |
419 else k = 0; | |
420 | |
421 /* x is now in primary range */ | |
422 t = x*x; | |
423 c = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))); | |
424 if(k==0) return one-((x*c)/(c-2.0)-x); | |
425 else y = one-((lo-(x*c)/(2.0-c))-hi); | |
426 if(k >= -1021) { | |
427 __HI(y) += (k<<20); /* add k to y's exponent */ | |
428 return y; | |
429 } else { | |
430 __HI(y) += ((k+1000)<<20);/* add k to y's exponent */ | |
431 return y*twom1000; | |
432 } | |
433 } | |
434 | |
435 JRT_LEAF(jdouble, SharedRuntime::dexp(jdouble x)) | |
436 return __ieee754_exp(x); | |
437 JRT_END | |
438 | |
439 /* __ieee754_pow(x,y) return x**y | |
440 * | |
441 * n | |
442 * Method: Let x = 2 * (1+f) | |
443 * 1. Compute and return log2(x) in two pieces: | |
444 * log2(x) = w1 + w2, | |
445 * where w1 has 53-24 = 29 bit trailing zeros. | |
446 * 2. Perform y*log2(x) = n+y' by simulating muti-precision | |
447 * arithmetic, where |y'|<=0.5. | |
448 * 3. Return x**y = 2**n*exp(y'*log2) | |
449 * | |
450 * Special cases: | |
451 * 1. (anything) ** 0 is 1 | |
452 * 2. (anything) ** 1 is itself | |
453 * 3. (anything) ** NAN is NAN | |
454 * 4. NAN ** (anything except 0) is NAN | |
455 * 5. +-(|x| > 1) ** +INF is +INF | |
456 * 6. +-(|x| > 1) ** -INF is +0 | |
457 * 7. +-(|x| < 1) ** +INF is +0 | |
458 * 8. +-(|x| < 1) ** -INF is +INF | |
459 * 9. +-1 ** +-INF is NAN | |
460 * 10. +0 ** (+anything except 0, NAN) is +0 | |
461 * 11. -0 ** (+anything except 0, NAN, odd integer) is +0 | |
462 * 12. +0 ** (-anything except 0, NAN) is +INF | |
463 * 13. -0 ** (-anything except 0, NAN, odd integer) is +INF | |
464 * 14. -0 ** (odd integer) = -( +0 ** (odd integer) ) | |
465 * 15. +INF ** (+anything except 0,NAN) is +INF | |
466 * 16. +INF ** (-anything except 0,NAN) is +0 | |
467 * 17. -INF ** (anything) = -0 ** (-anything) | |
468 * 18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer) | |
469 * 19. (-anything except 0 and inf) ** (non-integer) is NAN | |
470 * | |
471 * Accuracy: | |
472 * pow(x,y) returns x**y nearly rounded. In particular | |
473 * pow(integer,integer) | |
474 * always returns the correct integer provided it is | |
475 * representable. | |
476 * | |
477 * Constants : | |
478 * The hexadecimal values are the intended ones for the following | |
479 * constants. The decimal values may be used, provided that the | |
480 * compiler will convert from decimal to binary accurately enough | |
481 * to produce the hexadecimal values shown. | |
482 */ | |
483 | |
484 static const double | |
485 bp[] = {1.0, 1.5,}, | |
486 dp_h[] = { 0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */ | |
487 dp_l[] = { 0.0, 1.35003920212974897128e-08,}, /* 0x3E4CFDEB, 0x43CFD006 */ | |
488 zeroX = 0.0, | |
489 two = 2.0, | |
490 two53 = 9007199254740992.0, /* 0x43400000, 0x00000000 */ | |
491 /* poly coefs for (3/2)*(log(x)-2s-2/3*s**3 */ | |
492 L1X = 5.99999999999994648725e-01, /* 0x3FE33333, 0x33333303 */ | |
493 L2X = 4.28571428578550184252e-01, /* 0x3FDB6DB6, 0xDB6FABFF */ | |
494 L3X = 3.33333329818377432918e-01, /* 0x3FD55555, 0x518F264D */ | |
495 L4X = 2.72728123808534006489e-01, /* 0x3FD17460, 0xA91D4101 */ | |
496 L5X = 2.30660745775561754067e-01, /* 0x3FCD864A, 0x93C9DB65 */ | |
497 L6X = 2.06975017800338417784e-01, /* 0x3FCA7E28, 0x4A454EEF */ | |
498 lg2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */ | |
499 lg2_h = 6.93147182464599609375e-01, /* 0x3FE62E43, 0x00000000 */ | |
500 lg2_l = -1.90465429995776804525e-09, /* 0xBE205C61, 0x0CA86C39 */ | |
501 ovt = 8.0085662595372944372e-0017, /* -(1024-log2(ovfl+.5ulp)) */ | |
502 cp = 9.61796693925975554329e-01, /* 0x3FEEC709, 0xDC3A03FD =2/(3ln2) */ | |
503 cp_h = 9.61796700954437255859e-01, /* 0x3FEEC709, 0xE0000000 =(float)cp */ | |
504 cp_l = -7.02846165095275826516e-09, /* 0xBE3E2FE0, 0x145B01F5 =tail of cp_h*/ | |
505 ivln2 = 1.44269504088896338700e+00, /* 0x3FF71547, 0x652B82FE =1/ln2 */ | |
506 ivln2_h = 1.44269502162933349609e+00, /* 0x3FF71547, 0x60000000 =24b 1/ln2*/ | |
507 ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/ | |
508 | |
509 double __ieee754_pow(double x, double y) { | |
510 double z,ax,z_h,z_l,p_h,p_l; | |
511 double y1,t1,t2,r,s,t,u,v,w; | |
512 int i0,i1,i,j,k,yisint,n; | |
513 int hx,hy,ix,iy; | |
514 unsigned lx,ly; | |
515 | |
516 i0 = ((*(int*)&one)>>29)^1; i1=1-i0; | |
517 hx = __HI(x); lx = __LO(x); | |
518 hy = __HI(y); ly = __LO(y); | |
519 ix = hx&0x7fffffff; iy = hy&0x7fffffff; | |
520 | |
521 /* y==zero: x**0 = 1 */ | |
522 if((iy|ly)==0) return one; | |
523 | |
524 /* +-NaN return x+y */ | |
525 if(ix > 0x7ff00000 || ((ix==0x7ff00000)&&(lx!=0)) || | |
526 iy > 0x7ff00000 || ((iy==0x7ff00000)&&(ly!=0))) | |
527 return x+y; | |
528 | |
529 /* determine if y is an odd int when x < 0 | |
530 * yisint = 0 ... y is not an integer | |
531 * yisint = 1 ... y is an odd int | |
532 * yisint = 2 ... y is an even int | |
533 */ | |
534 yisint = 0; | |
535 if(hx<0) { | |
536 if(iy>=0x43400000) yisint = 2; /* even integer y */ | |
537 else if(iy>=0x3ff00000) { | |
538 k = (iy>>20)-0x3ff; /* exponent */ | |
539 if(k>20) { | |
540 j = ly>>(52-k); | |
541 if((unsigned)(j<<(52-k))==ly) yisint = 2-(j&1); | |
542 } else if(ly==0) { | |
543 j = iy>>(20-k); | |
544 if((j<<(20-k))==iy) yisint = 2-(j&1); | |
545 } | |
546 } | |
547 } | |
548 | |
549 /* special value of y */ | |
550 if(ly==0) { | |
551 if (iy==0x7ff00000) { /* y is +-inf */ | |
552 if(((ix-0x3ff00000)|lx)==0) | |
553 return y - y; /* inf**+-1 is NaN */ | |
554 else if (ix >= 0x3ff00000)/* (|x|>1)**+-inf = inf,0 */ | |
555 return (hy>=0)? y: zeroX; | |
556 else /* (|x|<1)**-,+inf = inf,0 */ | |
557 return (hy<0)?-y: zeroX; | |
558 } | |
559 if(iy==0x3ff00000) { /* y is +-1 */ | |
560 if(hy<0) return one/x; else return x; | |
561 } | |
562 if(hy==0x40000000) return x*x; /* y is 2 */ | |
563 if(hy==0x3fe00000) { /* y is 0.5 */ | |
564 if(hx>=0) /* x >= +0 */ | |
565 return sqrt(x); | |
566 } | |
567 } | |
568 | |
569 ax = fabsd(x); | |
570 /* special value of x */ | |
571 if(lx==0) { | |
572 if(ix==0x7ff00000||ix==0||ix==0x3ff00000){ | |
573 z = ax; /*x is +-0,+-inf,+-1*/ | |
574 if(hy<0) z = one/z; /* z = (1/|x|) */ | |
575 if(hx<0) { | |
576 if(((ix-0x3ff00000)|yisint)==0) { | |
1681
126ea7725993
6953477: Increase portability and flexibility of building Hotspot
bobv
parents:
1552
diff
changeset
|
577 #ifdef CAN_USE_NAN_DEFINE |
126ea7725993
6953477: Increase portability and flexibility of building Hotspot
bobv
parents:
1552
diff
changeset
|
578 z = NAN; |
126ea7725993
6953477: Increase portability and flexibility of building Hotspot
bobv
parents:
1552
diff
changeset
|
579 #else |
0 | 580 z = (z-z)/(z-z); /* (-1)**non-int is NaN */ |
1681
126ea7725993
6953477: Increase portability and flexibility of building Hotspot
bobv
parents:
1552
diff
changeset
|
581 #endif |
0 | 582 } else if(yisint==1) |
583 z = -1.0*z; /* (x<0)**odd = -(|x|**odd) */ | |
584 } | |
585 return z; | |
586 } | |
587 } | |
588 | |
589 n = (hx>>31)+1; | |
590 | |
591 /* (x<0)**(non-int) is NaN */ | |
1681
126ea7725993
6953477: Increase portability and flexibility of building Hotspot
bobv
parents:
1552
diff
changeset
|
592 if((n|yisint)==0) |
126ea7725993
6953477: Increase portability and flexibility of building Hotspot
bobv
parents:
1552
diff
changeset
|
593 #ifdef CAN_USE_NAN_DEFINE |
126ea7725993
6953477: Increase portability and flexibility of building Hotspot
bobv
parents:
1552
diff
changeset
|
594 return NAN; |
126ea7725993
6953477: Increase portability and flexibility of building Hotspot
bobv
parents:
1552
diff
changeset
|
595 #else |
126ea7725993
6953477: Increase portability and flexibility of building Hotspot
bobv
parents:
1552
diff
changeset
|
596 return (x-x)/(x-x); |
126ea7725993
6953477: Increase portability and flexibility of building Hotspot
bobv
parents:
1552
diff
changeset
|
597 #endif |
0 | 598 |
599 s = one; /* s (sign of result -ve**odd) = -1 else = 1 */ | |
600 if((n|(yisint-1))==0) s = -one;/* (-ve)**(odd int) */ | |
601 | |
602 /* |y| is huge */ | |
603 if(iy>0x41e00000) { /* if |y| > 2**31 */ | |
604 if(iy>0x43f00000){ /* if |y| > 2**64, must o/uflow */ | |
605 if(ix<=0x3fefffff) return (hy<0)? hugeX*hugeX:tiny*tiny; | |
606 if(ix>=0x3ff00000) return (hy>0)? hugeX*hugeX:tiny*tiny; | |
607 } | |
608 /* over/underflow if x is not close to one */ | |
609 if(ix<0x3fefffff) return (hy<0)? s*hugeX*hugeX:s*tiny*tiny; | |
610 if(ix>0x3ff00000) return (hy>0)? s*hugeX*hugeX:s*tiny*tiny; | |
611 /* now |1-x| is tiny <= 2**-20, suffice to compute | |
612 log(x) by x-x^2/2+x^3/3-x^4/4 */ | |
613 t = ax-one; /* t has 20 trailing zeros */ | |
614 w = (t*t)*(0.5-t*(0.3333333333333333333333-t*0.25)); | |
615 u = ivln2_h*t; /* ivln2_h has 21 sig. bits */ | |
616 v = t*ivln2_l-w*ivln2; | |
617 t1 = u+v; | |
618 __LO(t1) = 0; | |
619 t2 = v-(t1-u); | |
620 } else { | |
621 double ss,s2,s_h,s_l,t_h,t_l; | |
622 n = 0; | |
623 /* take care subnormal number */ | |
624 if(ix<0x00100000) | |
625 {ax *= two53; n -= 53; ix = __HI(ax); } | |
626 n += ((ix)>>20)-0x3ff; | |
627 j = ix&0x000fffff; | |
628 /* determine interval */ | |
629 ix = j|0x3ff00000; /* normalize ix */ | |
630 if(j<=0x3988E) k=0; /* |x|<sqrt(3/2) */ | |
631 else if(j<0xBB67A) k=1; /* |x|<sqrt(3) */ | |
632 else {k=0;n+=1;ix -= 0x00100000;} | |
633 __HI(ax) = ix; | |
634 | |
635 /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */ | |
636 u = ax-bp[k]; /* bp[0]=1.0, bp[1]=1.5 */ | |
637 v = one/(ax+bp[k]); | |
638 ss = u*v; | |
639 s_h = ss; | |
640 __LO(s_h) = 0; | |
641 /* t_h=ax+bp[k] High */ | |
642 t_h = zeroX; | |
643 __HI(t_h)=((ix>>1)|0x20000000)+0x00080000+(k<<18); | |
644 t_l = ax - (t_h-bp[k]); | |
645 s_l = v*((u-s_h*t_h)-s_h*t_l); | |
646 /* compute log(ax) */ | |
647 s2 = ss*ss; | |
648 r = s2*s2*(L1X+s2*(L2X+s2*(L3X+s2*(L4X+s2*(L5X+s2*L6X))))); | |
649 r += s_l*(s_h+ss); | |
650 s2 = s_h*s_h; | |
651 t_h = 3.0+s2+r; | |
652 __LO(t_h) = 0; | |
653 t_l = r-((t_h-3.0)-s2); | |
654 /* u+v = ss*(1+...) */ | |
655 u = s_h*t_h; | |
656 v = s_l*t_h+t_l*ss; | |
657 /* 2/(3log2)*(ss+...) */ | |
658 p_h = u+v; | |
659 __LO(p_h) = 0; | |
660 p_l = v-(p_h-u); | |
661 z_h = cp_h*p_h; /* cp_h+cp_l = 2/(3*log2) */ | |
662 z_l = cp_l*p_h+p_l*cp+dp_l[k]; | |
663 /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */ | |
664 t = (double)n; | |
665 t1 = (((z_h+z_l)+dp_h[k])+t); | |
666 __LO(t1) = 0; | |
667 t2 = z_l-(((t1-t)-dp_h[k])-z_h); | |
668 } | |
669 | |
670 /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ | |
671 y1 = y; | |
672 __LO(y1) = 0; | |
673 p_l = (y-y1)*t1+y*t2; | |
674 p_h = y1*t1; | |
675 z = p_l+p_h; | |
676 j = __HI(z); | |
677 i = __LO(z); | |
678 if (j>=0x40900000) { /* z >= 1024 */ | |
679 if(((j-0x40900000)|i)!=0) /* if z > 1024 */ | |
680 return s*hugeX*hugeX; /* overflow */ | |
681 else { | |
682 if(p_l+ovt>z-p_h) return s*hugeX*hugeX; /* overflow */ | |
683 } | |
684 } else if((j&0x7fffffff)>=0x4090cc00 ) { /* z <= -1075 */ | |
685 if(((j-0xc090cc00)|i)!=0) /* z < -1075 */ | |
686 return s*tiny*tiny; /* underflow */ | |
687 else { | |
688 if(p_l<=z-p_h) return s*tiny*tiny; /* underflow */ | |
689 } | |
690 } | |
691 /* | |
692 * compute 2**(p_h+p_l) | |
693 */ | |
694 i = j&0x7fffffff; | |
695 k = (i>>20)-0x3ff; | |
696 n = 0; | |
697 if(i>0x3fe00000) { /* if |z| > 0.5, set n = [z+0.5] */ | |
698 n = j+(0x00100000>>(k+1)); | |
699 k = ((n&0x7fffffff)>>20)-0x3ff; /* new k for n */ | |
700 t = zeroX; | |
701 __HI(t) = (n&~(0x000fffff>>k)); | |
702 n = ((n&0x000fffff)|0x00100000)>>(20-k); | |
703 if(j<0) n = -n; | |
704 p_h -= t; | |
705 } | |
706 t = p_l+p_h; | |
707 __LO(t) = 0; | |
708 u = t*lg2_h; | |
709 v = (p_l-(t-p_h))*lg2+t*lg2_l; | |
710 z = u+v; | |
711 w = v-(z-u); | |
712 t = z*z; | |
713 t1 = z - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))); | |
714 r = (z*t1)/(t1-two)-(w+z*w); | |
715 z = one-(r-z); | |
716 j = __HI(z); | |
717 j += (n<<20); | |
718 if((j>>20)<=0) z = scalbn(z,n); /* subnormal output */ | |
719 else __HI(z) += (n<<20); | |
720 return s*z; | |
721 } | |
722 | |
723 | |
724 JRT_LEAF(jdouble, SharedRuntime::dpow(jdouble x, jdouble y)) | |
725 return __ieee754_pow(x, y); | |
726 JRT_END | |
727 | |
728 #ifdef WIN32 | |
729 # pragma optimize ( "", on ) | |
730 #endif |