Mercurial > hg > truffle
annotate src/cpu/x86/vm/vm_version_x86.cpp @ 3101:6ccb95c97e6d
IdealGraphVisualizer: Work around a problem with JSplitPane and the NetBeans editor: setDividerLocation() doesn't work when the split pane has not been layouted and painted yet. JSplitPane then initially uses a tiny width for the left editor component, which causes the editor to calculate invalid offsets and constantly throw exceptions, particularly on mouse events. Thus, defer adding the two components and setting the divider's location.
author | Peter Hofer <peter.hofer@jku.at> |
---|---|
date | Thu, 30 Jun 2011 12:17:27 +0200 |
parents | 15c9a0e16269 |
children | 2a34a4fbc52c |
rev | line source |
---|---|
585 | 1 /* |
2426
1d1603768966
7010070: Update all 2010 Oracle-changed OpenJDK files to have the proper copyright dates - second pass
trims
parents:
2406
diff
changeset
|
2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved. |
585 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1060
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1060
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1060
diff
changeset
|
21 * questions. |
585 | 22 * |
23 */ | |
24 | |
1972 | 25 #include "precompiled.hpp" |
26 #include "assembler_x86.inline.hpp" | |
27 #include "memory/resourceArea.hpp" | |
28 #include "runtime/java.hpp" | |
29 #include "runtime/stubCodeGenerator.hpp" | |
30 #include "vm_version_x86.hpp" | |
31 #ifdef TARGET_OS_FAMILY_linux | |
32 # include "os_linux.inline.hpp" | |
33 #endif | |
34 #ifdef TARGET_OS_FAMILY_solaris | |
35 # include "os_solaris.inline.hpp" | |
36 #endif | |
37 #ifdef TARGET_OS_FAMILY_windows | |
38 # include "os_windows.inline.hpp" | |
39 #endif | |
585 | 40 |
41 | |
42 int VM_Version::_cpu; | |
43 int VM_Version::_model; | |
44 int VM_Version::_stepping; | |
45 int VM_Version::_cpuFeatures; | |
46 const char* VM_Version::_features_str = ""; | |
47 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; | |
48 | |
49 static BufferBlob* stub_blob; | |
1622 | 50 static const int stub_size = 400; |
585 | 51 |
52 extern "C" { | |
53 typedef void (*getPsrInfo_stub_t)(void*); | |
54 } | |
55 static getPsrInfo_stub_t getPsrInfo_stub = NULL; | |
56 | |
57 | |
58 class VM_Version_StubGenerator: public StubCodeGenerator { | |
59 public: | |
60 | |
61 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} | |
62 | |
63 address generate_getPsrInfo() { | |
64 // Flags to test CPU type. | |
65 const uint32_t EFL_AC = 0x40000; | |
66 const uint32_t EFL_ID = 0x200000; | |
67 // Values for when we don't have a CPUID instruction. | |
68 const int CPU_FAMILY_SHIFT = 8; | |
69 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); | |
70 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); | |
71 | |
1622 | 72 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; |
585 | 73 Label ext_cpuid1, ext_cpuid5, done; |
74 | |
75 StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); | |
76 # define __ _masm-> | |
77 | |
78 address start = __ pc(); | |
79 | |
80 // | |
81 // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info); | |
82 // | |
83 // LP64: rcx and rdx are first and second argument registers on windows | |
84 | |
85 __ push(rbp); | |
86 #ifdef _LP64 | |
87 __ mov(rbp, c_rarg0); // cpuid_info address | |
88 #else | |
89 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address | |
90 #endif | |
91 __ push(rbx); | |
92 __ push(rsi); | |
93 __ pushf(); // preserve rbx, and flags | |
94 __ pop(rax); | |
95 __ push(rax); | |
96 __ mov(rcx, rax); | |
97 // | |
98 // if we are unable to change the AC flag, we have a 386 | |
99 // | |
100 __ xorl(rax, EFL_AC); | |
101 __ push(rax); | |
102 __ popf(); | |
103 __ pushf(); | |
104 __ pop(rax); | |
105 __ cmpptr(rax, rcx); | |
106 __ jccb(Assembler::notEqual, detect_486); | |
107 | |
108 __ movl(rax, CPU_FAMILY_386); | |
109 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); | |
110 __ jmp(done); | |
111 | |
112 // | |
113 // If we are unable to change the ID flag, we have a 486 which does | |
114 // not support the "cpuid" instruction. | |
115 // | |
116 __ bind(detect_486); | |
117 __ mov(rax, rcx); | |
118 __ xorl(rax, EFL_ID); | |
119 __ push(rax); | |
120 __ popf(); | |
121 __ pushf(); | |
122 __ pop(rax); | |
123 __ cmpptr(rcx, rax); | |
124 __ jccb(Assembler::notEqual, detect_586); | |
125 | |
126 __ bind(cpu486); | |
127 __ movl(rax, CPU_FAMILY_486); | |
128 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); | |
129 __ jmp(done); | |
130 | |
131 // | |
132 // At this point, we have a chip which supports the "cpuid" instruction | |
133 // | |
134 __ bind(detect_586); | |
135 __ xorl(rax, rax); | |
136 __ cpuid(); | |
137 __ orl(rax, rax); | |
138 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input | |
139 // value of at least 1, we give up and | |
140 // assume a 486 | |
141 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); | |
142 __ movl(Address(rsi, 0), rax); | |
143 __ movl(Address(rsi, 4), rbx); | |
144 __ movl(Address(rsi, 8), rcx); | |
145 __ movl(Address(rsi,12), rdx); | |
146 | |
1622 | 147 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? |
148 __ jccb(Assembler::belowEqual, std_cpuid4); | |
149 | |
150 // | |
151 // cpuid(0xB) Processor Topology | |
152 // | |
153 __ movl(rax, 0xb); | |
154 __ xorl(rcx, rcx); // Threads level | |
155 __ cpuid(); | |
156 | |
157 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); | |
158 __ movl(Address(rsi, 0), rax); | |
159 __ movl(Address(rsi, 4), rbx); | |
160 __ movl(Address(rsi, 8), rcx); | |
161 __ movl(Address(rsi,12), rdx); | |
162 | |
163 __ movl(rax, 0xb); | |
164 __ movl(rcx, 1); // Cores level | |
165 __ cpuid(); | |
166 __ push(rax); | |
167 __ andl(rax, 0x1f); // Determine if valid topology level | |
168 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level | |
169 __ andl(rax, 0xffff); | |
170 __ pop(rax); | |
171 __ jccb(Assembler::equal, std_cpuid4); | |
172 | |
173 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); | |
174 __ movl(Address(rsi, 0), rax); | |
175 __ movl(Address(rsi, 4), rbx); | |
176 __ movl(Address(rsi, 8), rcx); | |
177 __ movl(Address(rsi,12), rdx); | |
178 | |
179 __ movl(rax, 0xb); | |
180 __ movl(rcx, 2); // Packages level | |
181 __ cpuid(); | |
182 __ push(rax); | |
183 __ andl(rax, 0x1f); // Determine if valid topology level | |
184 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level | |
185 __ andl(rax, 0xffff); | |
186 __ pop(rax); | |
187 __ jccb(Assembler::equal, std_cpuid4); | |
188 | |
189 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); | |
190 __ movl(Address(rsi, 0), rax); | |
191 __ movl(Address(rsi, 4), rbx); | |
192 __ movl(Address(rsi, 8), rcx); | |
193 __ movl(Address(rsi,12), rdx); | |
585 | 194 |
195 // | |
196 // cpuid(0x4) Deterministic cache params | |
197 // | |
1622 | 198 __ bind(std_cpuid4); |
585 | 199 __ movl(rax, 4); |
1622 | 200 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? |
201 __ jccb(Assembler::greater, std_cpuid1); | |
202 | |
585 | 203 __ xorl(rcx, rcx); // L1 cache |
204 __ cpuid(); | |
205 __ push(rax); | |
206 __ andl(rax, 0x1f); // Determine if valid cache parameters used | |
207 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache | |
208 __ pop(rax); | |
209 __ jccb(Assembler::equal, std_cpuid1); | |
210 | |
211 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); | |
212 __ movl(Address(rsi, 0), rax); | |
213 __ movl(Address(rsi, 4), rbx); | |
214 __ movl(Address(rsi, 8), rcx); | |
215 __ movl(Address(rsi,12), rdx); | |
216 | |
217 // | |
218 // Standard cpuid(0x1) | |
219 // | |
220 __ bind(std_cpuid1); | |
221 __ movl(rax, 1); | |
222 __ cpuid(); | |
223 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); | |
224 __ movl(Address(rsi, 0), rax); | |
225 __ movl(Address(rsi, 4), rbx); | |
226 __ movl(Address(rsi, 8), rcx); | |
227 __ movl(Address(rsi,12), rdx); | |
228 | |
229 __ movl(rax, 0x80000000); | |
230 __ cpuid(); | |
231 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? | |
232 __ jcc(Assembler::belowEqual, done); | |
233 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? | |
234 __ jccb(Assembler::belowEqual, ext_cpuid1); | |
235 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? | |
236 __ jccb(Assembler::belowEqual, ext_cpuid5); | |
237 // | |
238 // Extended cpuid(0x80000008) | |
239 // | |
240 __ movl(rax, 0x80000008); | |
241 __ cpuid(); | |
242 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); | |
243 __ movl(Address(rsi, 0), rax); | |
244 __ movl(Address(rsi, 4), rbx); | |
245 __ movl(Address(rsi, 8), rcx); | |
246 __ movl(Address(rsi,12), rdx); | |
247 | |
248 // | |
249 // Extended cpuid(0x80000005) | |
250 // | |
251 __ bind(ext_cpuid5); | |
252 __ movl(rax, 0x80000005); | |
253 __ cpuid(); | |
254 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); | |
255 __ movl(Address(rsi, 0), rax); | |
256 __ movl(Address(rsi, 4), rbx); | |
257 __ movl(Address(rsi, 8), rcx); | |
258 __ movl(Address(rsi,12), rdx); | |
259 | |
260 // | |
261 // Extended cpuid(0x80000001) | |
262 // | |
263 __ bind(ext_cpuid1); | |
264 __ movl(rax, 0x80000001); | |
265 __ cpuid(); | |
266 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); | |
267 __ movl(Address(rsi, 0), rax); | |
268 __ movl(Address(rsi, 4), rbx); | |
269 __ movl(Address(rsi, 8), rcx); | |
270 __ movl(Address(rsi,12), rdx); | |
271 | |
272 // | |
273 // return | |
274 // | |
275 __ bind(done); | |
276 __ popf(); | |
277 __ pop(rsi); | |
278 __ pop(rbx); | |
279 __ pop(rbp); | |
280 __ ret(0); | |
281 | |
282 # undef __ | |
283 | |
284 return start; | |
285 }; | |
286 }; | |
287 | |
288 | |
289 void VM_Version::get_processor_features() { | |
290 | |
291 _cpu = 4; // 486 by default | |
292 _model = 0; | |
293 _stepping = 0; | |
294 _cpuFeatures = 0; | |
295 _logical_processors_per_package = 1; | |
296 | |
297 if (!Use486InstrsOnly) { | |
298 // Get raw processor info | |
299 getPsrInfo_stub(&_cpuid_info); | |
300 assert_is_initialized(); | |
301 _cpu = extended_cpu_family(); | |
302 _model = extended_cpu_model(); | |
303 _stepping = cpu_stepping(); | |
304 | |
305 if (cpu_family() > 4) { // it supports CPUID | |
306 _cpuFeatures = feature_flags(); | |
307 // Logical processors are only available on P4s and above, | |
308 // and only if hyperthreading is available. | |
309 _logical_processors_per_package = logical_processor_count(); | |
310 } | |
311 } | |
312 | |
313 _supports_cx8 = supports_cmpxchg8(); | |
314 | |
315 #ifdef _LP64 | |
316 // OS should support SSE for x64 and hardware should support at least SSE2. | |
317 if (!VM_Version::supports_sse2()) { | |
318 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); | |
319 } | |
1060 | 320 // in 64 bit the use of SSE2 is the minimum |
321 if (UseSSE < 2) UseSSE = 2; | |
585 | 322 #endif |
323 | |
324 // If the OS doesn't support SSE, we can't use this feature even if the HW does | |
325 if (!os::supports_sse()) | |
326 _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2); | |
327 | |
328 if (UseSSE < 4) { | |
329 _cpuFeatures &= ~CPU_SSE4_1; | |
330 _cpuFeatures &= ~CPU_SSE4_2; | |
331 } | |
332 | |
333 if (UseSSE < 3) { | |
334 _cpuFeatures &= ~CPU_SSE3; | |
335 _cpuFeatures &= ~CPU_SSSE3; | |
336 _cpuFeatures &= ~CPU_SSE4A; | |
337 } | |
338 | |
339 if (UseSSE < 2) | |
340 _cpuFeatures &= ~CPU_SSE2; | |
341 | |
342 if (UseSSE < 1) | |
343 _cpuFeatures &= ~CPU_SSE; | |
344 | |
345 if (logical_processors_per_package() == 1) { | |
346 // HT processor could be installed on a system which doesn't support HT. | |
347 _cpuFeatures &= ~CPU_HT; | |
348 } | |
349 | |
350 char buf[256]; | |
2479 | 351 jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", |
585 | 352 cores_per_cpu(), threads_per_core(), |
353 cpu_family(), _model, _stepping, | |
354 (supports_cmov() ? ", cmov" : ""), | |
355 (supports_cmpxchg8() ? ", cx8" : ""), | |
356 (supports_fxsr() ? ", fxsr" : ""), | |
357 (supports_mmx() ? ", mmx" : ""), | |
358 (supports_sse() ? ", sse" : ""), | |
359 (supports_sse2() ? ", sse2" : ""), | |
360 (supports_sse3() ? ", sse3" : ""), | |
361 (supports_ssse3()? ", ssse3": ""), | |
362 (supports_sse4_1() ? ", sse4.1" : ""), | |
363 (supports_sse4_2() ? ", sse4.2" : ""), | |
643
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
364 (supports_popcnt() ? ", popcnt" : ""), |
585 | 365 (supports_mmx_ext() ? ", mmxext" : ""), |
2479 | 366 (supports_3dnow_prefetch() ? ", 3dnowpref" : ""), |
775
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
367 (supports_lzcnt() ? ", lzcnt": ""), |
585 | 368 (supports_sse4a() ? ", sse4a": ""), |
369 (supports_ht() ? ", ht": "")); | |
370 _features_str = strdup(buf); | |
371 | |
372 // UseSSE is set to the smaller of what hardware supports and what | |
373 // the command line requires. I.e., you cannot set UseSSE to 2 on | |
374 // older Pentiums which do not support it. | |
375 if( UseSSE > 4 ) UseSSE=4; | |
376 if( UseSSE < 0 ) UseSSE=0; | |
377 if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support | |
378 UseSSE = MIN2((intx)3,UseSSE); | |
379 if( !supports_sse3() ) // Drop to 2 if no SSE3 support | |
380 UseSSE = MIN2((intx)2,UseSSE); | |
381 if( !supports_sse2() ) // Drop to 1 if no SSE2 support | |
382 UseSSE = MIN2((intx)1,UseSSE); | |
383 if( !supports_sse () ) // Drop to 0 if no SSE support | |
384 UseSSE = 0; | |
385 | |
386 // On new cpus instructions which update whole XMM register should be used | |
387 // to prevent partial register stall due to dependencies on high half. | |
388 // | |
389 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) | |
390 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) | |
391 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). | |
392 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). | |
393 | |
394 if( is_amd() ) { // AMD cpus specific settings | |
395 if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) { | |
396 // Use it on new AMD cpus starting from Opteron. | |
397 UseAddressNop = true; | |
398 } | |
399 if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) { | |
400 // Use it on new AMD cpus starting from Opteron. | |
401 UseNewLongLShift = true; | |
402 } | |
403 if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { | |
404 if( supports_sse4a() ) { | |
405 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron | |
406 } else { | |
407 UseXmmLoadAndClearUpper = false; | |
408 } | |
409 } | |
410 if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { | |
411 if( supports_sse4a() ) { | |
412 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' | |
413 } else { | |
414 UseXmmRegToRegMoveAll = false; | |
415 } | |
416 } | |
417 if( FLAG_IS_DEFAULT(UseXmmI2F) ) { | |
418 if( supports_sse4a() ) { | |
419 UseXmmI2F = true; | |
420 } else { | |
421 UseXmmI2F = false; | |
422 } | |
423 } | |
424 if( FLAG_IS_DEFAULT(UseXmmI2D) ) { | |
425 if( supports_sse4a() ) { | |
426 UseXmmI2D = true; | |
427 } else { | |
428 UseXmmI2D = false; | |
429 } | |
430 } | |
2406 | 431 if( FLAG_IS_DEFAULT(UseSSE42Intrinsics) ) { |
432 if( supports_sse4_2() && UseSSE >= 4 ) { | |
433 UseSSE42Intrinsics = true; | |
434 } | |
435 } | |
775
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
436 |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
437 // Use count leading zeros count instruction if available. |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
438 if (supports_lzcnt()) { |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
439 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
440 UseCountLeadingZerosInstruction = true; |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
441 } |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
442 } |
2358 | 443 |
444 // On family 21 processors default is no sw prefetch | |
445 if ( cpu_family() == 21 ) { | |
446 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { | |
447 AllocatePrefetchStyle = 0; | |
448 } | |
449 } | |
585 | 450 } |
451 | |
452 if( is_intel() ) { // Intel cpus specific settings | |
453 if( FLAG_IS_DEFAULT(UseStoreImmI16) ) { | |
454 UseStoreImmI16 = false; // don't use it on Intel cpus | |
455 } | |
456 if( cpu_family() == 6 || cpu_family() == 15 ) { | |
457 if( FLAG_IS_DEFAULT(UseAddressNop) ) { | |
458 // Use it on all Intel cpus starting from PentiumPro | |
459 UseAddressNop = true; | |
460 } | |
461 } | |
462 if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { | |
463 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus | |
464 } | |
465 if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { | |
466 if( supports_sse3() ) { | |
467 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus | |
468 } else { | |
469 UseXmmRegToRegMoveAll = false; | |
470 } | |
471 } | |
472 if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus | |
473 #ifdef COMPILER2 | |
474 if( FLAG_IS_DEFAULT(MaxLoopPad) ) { | |
475 // For new Intel cpus do the next optimization: | |
476 // don't align the beginning of a loop if there are enough instructions | |
477 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) | |
478 // in current fetch line (OptoLoopAlignment) or the padding | |
479 // is big (> MaxLoopPad). | |
480 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of | |
481 // generated NOP instructions. 11 is the largest size of one | |
482 // address NOP instruction '0F 1F' (see Assembler::nop(i)). | |
483 MaxLoopPad = 11; | |
484 } | |
485 #endif // COMPILER2 | |
486 if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { | |
487 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus | |
488 } | |
489 if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus | |
490 if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { | |
491 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus | |
492 } | |
493 } | |
681 | 494 if( supports_sse4_2() && UseSSE >= 4 ) { |
495 if( FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { | |
496 UseSSE42Intrinsics = true; | |
497 } | |
498 } | |
585 | 499 } |
500 } | |
501 | |
643
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
502 // Use population count instruction if available. |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
503 if (supports_popcnt()) { |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
504 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
505 UsePopCountInstruction = true; |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
506 } |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
507 } |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
508 |
1730
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
509 #ifdef COMPILER2 |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
510 if (UseFPUForSpilling) { |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
511 if (UseSSE < 2) { |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
512 // Only supported with SSE2+ |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
513 FLAG_SET_DEFAULT(UseFPUForSpilling, false); |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
514 } |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
515 } |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
516 #endif |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
517 |
585 | 518 assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); |
519 assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); | |
520 | |
521 // set valid Prefetch instruction | |
522 if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0; | |
523 if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3; | |
2479 | 524 if( ReadPrefetchInstr == 3 && !supports_3dnow_prefetch() ) ReadPrefetchInstr = 0; |
525 if( !supports_sse() && supports_3dnow_prefetch() ) ReadPrefetchInstr = 3; | |
585 | 526 |
527 if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0; | |
528 if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3; | |
2479 | 529 if( AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch() ) AllocatePrefetchInstr=0; |
530 if( !supports_sse() && supports_3dnow_prefetch() ) AllocatePrefetchInstr = 3; | |
585 | 531 |
532 // Allocation prefetch settings | |
533 intx cache_line_size = L1_data_cache_line_size(); | |
534 if( cache_line_size > AllocatePrefetchStepSize ) | |
535 AllocatePrefetchStepSize = cache_line_size; | |
536 if( FLAG_IS_DEFAULT(AllocatePrefetchLines) ) | |
537 AllocatePrefetchLines = 3; // Optimistic value | |
538 assert(AllocatePrefetchLines > 0, "invalid value"); | |
539 if( AllocatePrefetchLines < 1 ) // set valid value in product VM | |
540 AllocatePrefetchLines = 1; // Conservative value | |
541 | |
542 AllocatePrefetchDistance = allocate_prefetch_distance(); | |
543 AllocatePrefetchStyle = allocate_prefetch_style(); | |
544 | |
1622 | 545 if( is_intel() && cpu_family() == 6 && supports_sse3() ) { |
546 if( AllocatePrefetchStyle == 2 ) { // watermark prefetching on Core | |
585 | 547 #ifdef _LP64 |
1622 | 548 AllocatePrefetchDistance = 384; |
585 | 549 #else |
1622 | 550 AllocatePrefetchDistance = 320; |
585 | 551 #endif |
1622 | 552 } |
553 if( supports_sse4_2() && supports_ht() ) { // Nehalem based cpus | |
554 AllocatePrefetchDistance = 192; | |
555 AllocatePrefetchLines = 4; | |
1730
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
556 #ifdef COMPILER2 |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
557 if (AggressiveOpts && FLAG_IS_DEFAULT(UseFPUForSpilling)) { |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
558 FLAG_SET_DEFAULT(UseFPUForSpilling, true); |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
559 } |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
560 #endif |
1622 | 561 } |
585 | 562 } |
563 assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); | |
564 | |
565 #ifdef _LP64 | |
566 // Prefetch settings | |
567 PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes(); | |
568 PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes(); | |
569 PrefetchFieldsAhead = prefetch_fields_ahead(); | |
570 #endif | |
571 | |
572 #ifndef PRODUCT | |
573 if (PrintMiscellaneous && Verbose) { | |
574 tty->print_cr("Logical CPUs per core: %u", | |
575 logical_processors_per_package()); | |
576 tty->print_cr("UseSSE=%d",UseSSE); | |
577 tty->print("Allocation: "); | |
2479 | 578 if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) { |
585 | 579 tty->print_cr("no prefetching"); |
580 } else { | |
2479 | 581 if (UseSSE == 0 && supports_3dnow_prefetch()) { |
585 | 582 tty->print("PREFETCHW"); |
583 } else if (UseSSE >= 1) { | |
584 if (AllocatePrefetchInstr == 0) { | |
585 tty->print("PREFETCHNTA"); | |
586 } else if (AllocatePrefetchInstr == 1) { | |
587 tty->print("PREFETCHT0"); | |
588 } else if (AllocatePrefetchInstr == 2) { | |
589 tty->print("PREFETCHT2"); | |
590 } else if (AllocatePrefetchInstr == 3) { | |
591 tty->print("PREFETCHW"); | |
592 } | |
593 } | |
594 if (AllocatePrefetchLines > 1) { | |
595 tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); | |
596 } else { | |
597 tty->print_cr(" %d, one line", AllocatePrefetchDistance); | |
598 } | |
599 } | |
600 | |
601 if (PrefetchCopyIntervalInBytes > 0) { | |
602 tty->print_cr("PrefetchCopyIntervalInBytes %d", PrefetchCopyIntervalInBytes); | |
603 } | |
604 if (PrefetchScanIntervalInBytes > 0) { | |
605 tty->print_cr("PrefetchScanIntervalInBytes %d", PrefetchScanIntervalInBytes); | |
606 } | |
607 if (PrefetchFieldsAhead > 0) { | |
608 tty->print_cr("PrefetchFieldsAhead %d", PrefetchFieldsAhead); | |
609 } | |
610 } | |
611 #endif // !PRODUCT | |
612 } | |
613 | |
614 void VM_Version::initialize() { | |
615 ResourceMark rm; | |
616 // Making this stub must be FIRST use of assembler | |
617 | |
618 stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size); | |
619 if (stub_blob == NULL) { | |
620 vm_exit_during_initialization("Unable to allocate getPsrInfo_stub"); | |
621 } | |
1748 | 622 CodeBuffer c(stub_blob); |
585 | 623 VM_Version_StubGenerator g(&c); |
624 getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t, | |
625 g.generate_getPsrInfo()); | |
626 | |
627 get_processor_features(); | |
628 } |