Mercurial > hg > truffle
annotate src/cpu/x86/vm/vm_version_x86.cpp @ 4582:b24386206122
Made all vm builds go into subdirectories, even product builds to simplify building the various types of VMs (server, client and graal).
Made HotSpot build jobs use the number of CPUs on the host machine.
author | Doug Simon <doug.simon@oracle.com> |
---|---|
date | Mon, 13 Feb 2012 23:13:37 +0100 |
parents | 04b9a2566eec |
children | 33df1aeaebbf |
rev | line source |
---|---|
585 | 1 /* |
3860
3be7439273c5
7044486: open jdk repos have files with incorrect copyright headers, which can end up in src bundles
katleman
parents:
3787
diff
changeset
|
2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. |
585 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1060
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1060
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1060
diff
changeset
|
21 * questions. |
585 | 22 * |
23 */ | |
24 | |
1972 | 25 #include "precompiled.hpp" |
26 #include "assembler_x86.inline.hpp" | |
27 #include "memory/resourceArea.hpp" | |
28 #include "runtime/java.hpp" | |
29 #include "runtime/stubCodeGenerator.hpp" | |
30 #include "vm_version_x86.hpp" | |
31 #ifdef TARGET_OS_FAMILY_linux | |
32 # include "os_linux.inline.hpp" | |
33 #endif | |
34 #ifdef TARGET_OS_FAMILY_solaris | |
35 # include "os_solaris.inline.hpp" | |
36 #endif | |
37 #ifdef TARGET_OS_FAMILY_windows | |
38 # include "os_windows.inline.hpp" | |
39 #endif | |
3960 | 40 #ifdef TARGET_OS_FAMILY_bsd |
41 # include "os_bsd.inline.hpp" | |
42 #endif | |
585 | 43 |
44 | |
45 int VM_Version::_cpu; | |
46 int VM_Version::_model; | |
47 int VM_Version::_stepping; | |
48 int VM_Version::_cpuFeatures; | |
49 const char* VM_Version::_features_str = ""; | |
50 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; | |
51 | |
52 static BufferBlob* stub_blob; | |
1622 | 53 static const int stub_size = 400; |
585 | 54 |
55 extern "C" { | |
56 typedef void (*getPsrInfo_stub_t)(void*); | |
57 } | |
58 static getPsrInfo_stub_t getPsrInfo_stub = NULL; | |
59 | |
60 | |
61 class VM_Version_StubGenerator: public StubCodeGenerator { | |
62 public: | |
63 | |
64 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} | |
65 | |
66 address generate_getPsrInfo() { | |
67 // Flags to test CPU type. | |
68 const uint32_t EFL_AC = 0x40000; | |
69 const uint32_t EFL_ID = 0x200000; | |
70 // Values for when we don't have a CPUID instruction. | |
71 const int CPU_FAMILY_SHIFT = 8; | |
72 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); | |
73 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); | |
74 | |
1622 | 75 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; |
585 | 76 Label ext_cpuid1, ext_cpuid5, done; |
77 | |
78 StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); | |
79 # define __ _masm-> | |
80 | |
81 address start = __ pc(); | |
82 | |
83 // | |
84 // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info); | |
85 // | |
86 // LP64: rcx and rdx are first and second argument registers on windows | |
87 | |
88 __ push(rbp); | |
89 #ifdef _LP64 | |
90 __ mov(rbp, c_rarg0); // cpuid_info address | |
91 #else | |
92 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address | |
93 #endif | |
94 __ push(rbx); | |
95 __ push(rsi); | |
96 __ pushf(); // preserve rbx, and flags | |
97 __ pop(rax); | |
98 __ push(rax); | |
99 __ mov(rcx, rax); | |
100 // | |
101 // if we are unable to change the AC flag, we have a 386 | |
102 // | |
103 __ xorl(rax, EFL_AC); | |
104 __ push(rax); | |
105 __ popf(); | |
106 __ pushf(); | |
107 __ pop(rax); | |
108 __ cmpptr(rax, rcx); | |
109 __ jccb(Assembler::notEqual, detect_486); | |
110 | |
111 __ movl(rax, CPU_FAMILY_386); | |
112 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); | |
113 __ jmp(done); | |
114 | |
115 // | |
116 // If we are unable to change the ID flag, we have a 486 which does | |
117 // not support the "cpuid" instruction. | |
118 // | |
119 __ bind(detect_486); | |
120 __ mov(rax, rcx); | |
121 __ xorl(rax, EFL_ID); | |
122 __ push(rax); | |
123 __ popf(); | |
124 __ pushf(); | |
125 __ pop(rax); | |
126 __ cmpptr(rcx, rax); | |
127 __ jccb(Assembler::notEqual, detect_586); | |
128 | |
129 __ bind(cpu486); | |
130 __ movl(rax, CPU_FAMILY_486); | |
131 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); | |
132 __ jmp(done); | |
133 | |
134 // | |
135 // At this point, we have a chip which supports the "cpuid" instruction | |
136 // | |
137 __ bind(detect_586); | |
138 __ xorl(rax, rax); | |
139 __ cpuid(); | |
140 __ orl(rax, rax); | |
141 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input | |
142 // value of at least 1, we give up and | |
143 // assume a 486 | |
144 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); | |
145 __ movl(Address(rsi, 0), rax); | |
146 __ movl(Address(rsi, 4), rbx); | |
147 __ movl(Address(rsi, 8), rcx); | |
148 __ movl(Address(rsi,12), rdx); | |
149 | |
1622 | 150 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? |
151 __ jccb(Assembler::belowEqual, std_cpuid4); | |
152 | |
153 // | |
154 // cpuid(0xB) Processor Topology | |
155 // | |
156 __ movl(rax, 0xb); | |
157 __ xorl(rcx, rcx); // Threads level | |
158 __ cpuid(); | |
159 | |
160 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); | |
161 __ movl(Address(rsi, 0), rax); | |
162 __ movl(Address(rsi, 4), rbx); | |
163 __ movl(Address(rsi, 8), rcx); | |
164 __ movl(Address(rsi,12), rdx); | |
165 | |
166 __ movl(rax, 0xb); | |
167 __ movl(rcx, 1); // Cores level | |
168 __ cpuid(); | |
169 __ push(rax); | |
170 __ andl(rax, 0x1f); // Determine if valid topology level | |
171 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level | |
172 __ andl(rax, 0xffff); | |
173 __ pop(rax); | |
174 __ jccb(Assembler::equal, std_cpuid4); | |
175 | |
176 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); | |
177 __ movl(Address(rsi, 0), rax); | |
178 __ movl(Address(rsi, 4), rbx); | |
179 __ movl(Address(rsi, 8), rcx); | |
180 __ movl(Address(rsi,12), rdx); | |
181 | |
182 __ movl(rax, 0xb); | |
183 __ movl(rcx, 2); // Packages level | |
184 __ cpuid(); | |
185 __ push(rax); | |
186 __ andl(rax, 0x1f); // Determine if valid topology level | |
187 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level | |
188 __ andl(rax, 0xffff); | |
189 __ pop(rax); | |
190 __ jccb(Assembler::equal, std_cpuid4); | |
191 | |
192 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); | |
193 __ movl(Address(rsi, 0), rax); | |
194 __ movl(Address(rsi, 4), rbx); | |
195 __ movl(Address(rsi, 8), rcx); | |
196 __ movl(Address(rsi,12), rdx); | |
585 | 197 |
198 // | |
199 // cpuid(0x4) Deterministic cache params | |
200 // | |
1622 | 201 __ bind(std_cpuid4); |
585 | 202 __ movl(rax, 4); |
1622 | 203 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? |
204 __ jccb(Assembler::greater, std_cpuid1); | |
205 | |
585 | 206 __ xorl(rcx, rcx); // L1 cache |
207 __ cpuid(); | |
208 __ push(rax); | |
209 __ andl(rax, 0x1f); // Determine if valid cache parameters used | |
210 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache | |
211 __ pop(rax); | |
212 __ jccb(Assembler::equal, std_cpuid1); | |
213 | |
214 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); | |
215 __ movl(Address(rsi, 0), rax); | |
216 __ movl(Address(rsi, 4), rbx); | |
217 __ movl(Address(rsi, 8), rcx); | |
218 __ movl(Address(rsi,12), rdx); | |
219 | |
220 // | |
221 // Standard cpuid(0x1) | |
222 // | |
223 __ bind(std_cpuid1); | |
224 __ movl(rax, 1); | |
225 __ cpuid(); | |
226 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); | |
227 __ movl(Address(rsi, 0), rax); | |
228 __ movl(Address(rsi, 4), rbx); | |
229 __ movl(Address(rsi, 8), rcx); | |
230 __ movl(Address(rsi,12), rdx); | |
231 | |
232 __ movl(rax, 0x80000000); | |
233 __ cpuid(); | |
234 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? | |
235 __ jcc(Assembler::belowEqual, done); | |
236 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? | |
237 __ jccb(Assembler::belowEqual, ext_cpuid1); | |
238 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? | |
239 __ jccb(Assembler::belowEqual, ext_cpuid5); | |
240 // | |
241 // Extended cpuid(0x80000008) | |
242 // | |
243 __ movl(rax, 0x80000008); | |
244 __ cpuid(); | |
245 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); | |
246 __ movl(Address(rsi, 0), rax); | |
247 __ movl(Address(rsi, 4), rbx); | |
248 __ movl(Address(rsi, 8), rcx); | |
249 __ movl(Address(rsi,12), rdx); | |
250 | |
251 // | |
252 // Extended cpuid(0x80000005) | |
253 // | |
254 __ bind(ext_cpuid5); | |
255 __ movl(rax, 0x80000005); | |
256 __ cpuid(); | |
257 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); | |
258 __ movl(Address(rsi, 0), rax); | |
259 __ movl(Address(rsi, 4), rbx); | |
260 __ movl(Address(rsi, 8), rcx); | |
261 __ movl(Address(rsi,12), rdx); | |
262 | |
263 // | |
264 // Extended cpuid(0x80000001) | |
265 // | |
266 __ bind(ext_cpuid1); | |
267 __ movl(rax, 0x80000001); | |
268 __ cpuid(); | |
269 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); | |
270 __ movl(Address(rsi, 0), rax); | |
271 __ movl(Address(rsi, 4), rbx); | |
272 __ movl(Address(rsi, 8), rcx); | |
273 __ movl(Address(rsi,12), rdx); | |
274 | |
275 // | |
276 // return | |
277 // | |
278 __ bind(done); | |
279 __ popf(); | |
280 __ pop(rsi); | |
281 __ pop(rbx); | |
282 __ pop(rbp); | |
283 __ ret(0); | |
284 | |
285 # undef __ | |
286 | |
287 return start; | |
288 }; | |
289 }; | |
290 | |
291 | |
292 void VM_Version::get_processor_features() { | |
293 | |
294 _cpu = 4; // 486 by default | |
295 _model = 0; | |
296 _stepping = 0; | |
297 _cpuFeatures = 0; | |
298 _logical_processors_per_package = 1; | |
299 | |
300 if (!Use486InstrsOnly) { | |
301 // Get raw processor info | |
302 getPsrInfo_stub(&_cpuid_info); | |
303 assert_is_initialized(); | |
304 _cpu = extended_cpu_family(); | |
305 _model = extended_cpu_model(); | |
306 _stepping = cpu_stepping(); | |
307 | |
308 if (cpu_family() > 4) { // it supports CPUID | |
309 _cpuFeatures = feature_flags(); | |
310 // Logical processors are only available on P4s and above, | |
311 // and only if hyperthreading is available. | |
312 _logical_processors_per_package = logical_processor_count(); | |
313 } | |
314 } | |
315 | |
316 _supports_cx8 = supports_cmpxchg8(); | |
317 | |
318 #ifdef _LP64 | |
319 // OS should support SSE for x64 and hardware should support at least SSE2. | |
320 if (!VM_Version::supports_sse2()) { | |
321 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); | |
322 } | |
1060 | 323 // in 64 bit the use of SSE2 is the minimum |
324 if (UseSSE < 2) UseSSE = 2; | |
585 | 325 #endif |
326 | |
3787
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
327 #ifdef AMD64 |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
328 // flush_icache_stub have to be generated first. |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
329 // That is why Icache line size is hard coded in ICache class, |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
330 // see icache_x86.hpp. It is also the reason why we can't use |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
331 // clflush instruction in 32-bit VM since it could be running |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
332 // on CPU which does not support it. |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
333 // |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
334 // The only thing we can do is to verify that flushed |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
335 // ICache::line_size has correct value. |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
336 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported"); |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
337 // clflush_size is size in quadwords (8 bytes). |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
338 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported"); |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
339 #endif |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
340 |
585 | 341 // If the OS doesn't support SSE, we can't use this feature even if the HW does |
342 if (!os::supports_sse()) | |
343 _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2); | |
344 | |
345 if (UseSSE < 4) { | |
346 _cpuFeatures &= ~CPU_SSE4_1; | |
347 _cpuFeatures &= ~CPU_SSE4_2; | |
348 } | |
349 | |
350 if (UseSSE < 3) { | |
351 _cpuFeatures &= ~CPU_SSE3; | |
352 _cpuFeatures &= ~CPU_SSSE3; | |
353 _cpuFeatures &= ~CPU_SSE4A; | |
354 } | |
355 | |
356 if (UseSSE < 2) | |
357 _cpuFeatures &= ~CPU_SSE2; | |
358 | |
359 if (UseSSE < 1) | |
360 _cpuFeatures &= ~CPU_SSE; | |
361 | |
362 if (logical_processors_per_package() == 1) { | |
363 // HT processor could be installed on a system which doesn't support HT. | |
364 _cpuFeatures &= ~CPU_HT; | |
365 } | |
366 | |
367 char buf[256]; | |
2479 | 368 jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", |
585 | 369 cores_per_cpu(), threads_per_core(), |
370 cpu_family(), _model, _stepping, | |
371 (supports_cmov() ? ", cmov" : ""), | |
372 (supports_cmpxchg8() ? ", cx8" : ""), | |
373 (supports_fxsr() ? ", fxsr" : ""), | |
374 (supports_mmx() ? ", mmx" : ""), | |
375 (supports_sse() ? ", sse" : ""), | |
376 (supports_sse2() ? ", sse2" : ""), | |
377 (supports_sse3() ? ", sse3" : ""), | |
378 (supports_ssse3()? ", ssse3": ""), | |
379 (supports_sse4_1() ? ", sse4.1" : ""), | |
380 (supports_sse4_2() ? ", sse4.2" : ""), | |
643
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
381 (supports_popcnt() ? ", popcnt" : ""), |
585 | 382 (supports_mmx_ext() ? ", mmxext" : ""), |
2479 | 383 (supports_3dnow_prefetch() ? ", 3dnowpref" : ""), |
775
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
384 (supports_lzcnt() ? ", lzcnt": ""), |
585 | 385 (supports_sse4a() ? ", sse4a": ""), |
386 (supports_ht() ? ", ht": "")); | |
387 _features_str = strdup(buf); | |
388 | |
389 // UseSSE is set to the smaller of what hardware supports and what | |
390 // the command line requires. I.e., you cannot set UseSSE to 2 on | |
391 // older Pentiums which do not support it. | |
392 if( UseSSE > 4 ) UseSSE=4; | |
393 if( UseSSE < 0 ) UseSSE=0; | |
394 if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support | |
395 UseSSE = MIN2((intx)3,UseSSE); | |
396 if( !supports_sse3() ) // Drop to 2 if no SSE3 support | |
397 UseSSE = MIN2((intx)2,UseSSE); | |
398 if( !supports_sse2() ) // Drop to 1 if no SSE2 support | |
399 UseSSE = MIN2((intx)1,UseSSE); | |
400 if( !supports_sse () ) // Drop to 0 if no SSE support | |
401 UseSSE = 0; | |
402 | |
403 // On new cpus instructions which update whole XMM register should be used | |
404 // to prevent partial register stall due to dependencies on high half. | |
405 // | |
406 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) | |
407 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) | |
408 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). | |
409 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). | |
410 | |
411 if( is_amd() ) { // AMD cpus specific settings | |
412 if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) { | |
413 // Use it on new AMD cpus starting from Opteron. | |
414 UseAddressNop = true; | |
415 } | |
416 if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) { | |
417 // Use it on new AMD cpus starting from Opteron. | |
418 UseNewLongLShift = true; | |
419 } | |
420 if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { | |
421 if( supports_sse4a() ) { | |
422 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron | |
423 } else { | |
424 UseXmmLoadAndClearUpper = false; | |
425 } | |
426 } | |
427 if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { | |
428 if( supports_sse4a() ) { | |
429 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' | |
430 } else { | |
431 UseXmmRegToRegMoveAll = false; | |
432 } | |
433 } | |
434 if( FLAG_IS_DEFAULT(UseXmmI2F) ) { | |
435 if( supports_sse4a() ) { | |
436 UseXmmI2F = true; | |
437 } else { | |
438 UseXmmI2F = false; | |
439 } | |
440 } | |
441 if( FLAG_IS_DEFAULT(UseXmmI2D) ) { | |
442 if( supports_sse4a() ) { | |
443 UseXmmI2D = true; | |
444 } else { | |
445 UseXmmI2D = false; | |
446 } | |
447 } | |
2406 | 448 if( FLAG_IS_DEFAULT(UseSSE42Intrinsics) ) { |
449 if( supports_sse4_2() && UseSSE >= 4 ) { | |
450 UseSSE42Intrinsics = true; | |
451 } | |
452 } | |
775
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
453 |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
454 // Use count leading zeros count instruction if available. |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
455 if (supports_lzcnt()) { |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
456 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
457 UseCountLeadingZerosInstruction = true; |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
458 } |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
459 } |
2358 | 460 |
3276
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
461 // some defaults for AMD family 15h |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
462 if ( cpu_family() == 0x15 ) { |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
463 // On family 15h processors default is no sw prefetch |
2358 | 464 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { |
465 AllocatePrefetchStyle = 0; | |
466 } | |
3276
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
467 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
468 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
469 AllocatePrefetchInstr = 3; |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
470 } |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
471 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
472 if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
473 UseXMMForArrayCopy = true; |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
474 } |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
475 if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
476 UseUnalignedLoadStores = true; |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
477 } |
2358 | 478 } |
3276
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
479 |
585 | 480 } |
481 | |
482 if( is_intel() ) { // Intel cpus specific settings | |
483 if( FLAG_IS_DEFAULT(UseStoreImmI16) ) { | |
484 UseStoreImmI16 = false; // don't use it on Intel cpus | |
485 } | |
486 if( cpu_family() == 6 || cpu_family() == 15 ) { | |
487 if( FLAG_IS_DEFAULT(UseAddressNop) ) { | |
488 // Use it on all Intel cpus starting from PentiumPro | |
489 UseAddressNop = true; | |
490 } | |
491 } | |
492 if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { | |
493 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus | |
494 } | |
495 if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { | |
496 if( supports_sse3() ) { | |
497 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus | |
498 } else { | |
499 UseXmmRegToRegMoveAll = false; | |
500 } | |
501 } | |
502 if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus | |
503 #ifdef COMPILER2 | |
504 if( FLAG_IS_DEFAULT(MaxLoopPad) ) { | |
505 // For new Intel cpus do the next optimization: | |
506 // don't align the beginning of a loop if there are enough instructions | |
507 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) | |
508 // in current fetch line (OptoLoopAlignment) or the padding | |
509 // is big (> MaxLoopPad). | |
510 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of | |
511 // generated NOP instructions. 11 is the largest size of one | |
512 // address NOP instruction '0F 1F' (see Assembler::nop(i)). | |
513 MaxLoopPad = 11; | |
514 } | |
515 #endif // COMPILER2 | |
516 if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { | |
517 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus | |
518 } | |
519 if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus | |
520 if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { | |
521 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus | |
522 } | |
523 } | |
681 | 524 if( supports_sse4_2() && UseSSE >= 4 ) { |
525 if( FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { | |
526 UseSSE42Intrinsics = true; | |
527 } | |
528 } | |
585 | 529 } |
530 } | |
531 | |
643
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
532 // Use population count instruction if available. |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
533 if (supports_popcnt()) { |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
534 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
535 UsePopCountInstruction = true; |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
536 } |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
537 } |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
538 |
1730
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
539 #ifdef COMPILER2 |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
540 if (UseFPUForSpilling) { |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
541 if (UseSSE < 2) { |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
542 // Only supported with SSE2+ |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
543 FLAG_SET_DEFAULT(UseFPUForSpilling, false); |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
544 } |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
545 } |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
546 #endif |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
547 |
585 | 548 assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); |
549 assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); | |
550 | |
551 // set valid Prefetch instruction | |
552 if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0; | |
553 if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3; | |
2479 | 554 if( ReadPrefetchInstr == 3 && !supports_3dnow_prefetch() ) ReadPrefetchInstr = 0; |
555 if( !supports_sse() && supports_3dnow_prefetch() ) ReadPrefetchInstr = 3; | |
585 | 556 |
557 if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0; | |
558 if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3; | |
2479 | 559 if( AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch() ) AllocatePrefetchInstr=0; |
560 if( !supports_sse() && supports_3dnow_prefetch() ) AllocatePrefetchInstr = 3; | |
585 | 561 |
562 // Allocation prefetch settings | |
3854 | 563 intx cache_line_size = prefetch_data_size(); |
585 | 564 if( cache_line_size > AllocatePrefetchStepSize ) |
565 AllocatePrefetchStepSize = cache_line_size; | |
3854 | 566 |
585 | 567 assert(AllocatePrefetchLines > 0, "invalid value"); |
3854 | 568 if( AllocatePrefetchLines < 1 ) // set valid value in product VM |
569 AllocatePrefetchLines = 3; | |
570 assert(AllocateInstancePrefetchLines > 0, "invalid value"); | |
571 if( AllocateInstancePrefetchLines < 1 ) // set valid value in product VM | |
572 AllocateInstancePrefetchLines = 1; | |
585 | 573 |
574 AllocatePrefetchDistance = allocate_prefetch_distance(); | |
575 AllocatePrefetchStyle = allocate_prefetch_style(); | |
576 | |
1622 | 577 if( is_intel() && cpu_family() == 6 && supports_sse3() ) { |
578 if( AllocatePrefetchStyle == 2 ) { // watermark prefetching on Core | |
585 | 579 #ifdef _LP64 |
1622 | 580 AllocatePrefetchDistance = 384; |
585 | 581 #else |
1622 | 582 AllocatePrefetchDistance = 320; |
585 | 583 #endif |
1622 | 584 } |
585 if( supports_sse4_2() && supports_ht() ) { // Nehalem based cpus | |
586 AllocatePrefetchDistance = 192; | |
587 AllocatePrefetchLines = 4; | |
1730
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
588 #ifdef COMPILER2 |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
589 if (AggressiveOpts && FLAG_IS_DEFAULT(UseFPUForSpilling)) { |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
590 FLAG_SET_DEFAULT(UseFPUForSpilling, true); |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
591 } |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
592 #endif |
1622 | 593 } |
585 | 594 } |
595 assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); | |
596 | |
597 #ifdef _LP64 | |
598 // Prefetch settings | |
599 PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes(); | |
600 PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes(); | |
601 PrefetchFieldsAhead = prefetch_fields_ahead(); | |
602 #endif | |
603 | |
604 #ifndef PRODUCT | |
605 if (PrintMiscellaneous && Verbose) { | |
606 tty->print_cr("Logical CPUs per core: %u", | |
607 logical_processors_per_package()); | |
608 tty->print_cr("UseSSE=%d",UseSSE); | |
3854 | 609 tty->print("Allocation"); |
2479 | 610 if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) { |
3854 | 611 tty->print_cr(": no prefetching"); |
585 | 612 } else { |
3854 | 613 tty->print(" prefetching: "); |
2479 | 614 if (UseSSE == 0 && supports_3dnow_prefetch()) { |
585 | 615 tty->print("PREFETCHW"); |
616 } else if (UseSSE >= 1) { | |
617 if (AllocatePrefetchInstr == 0) { | |
618 tty->print("PREFETCHNTA"); | |
619 } else if (AllocatePrefetchInstr == 1) { | |
620 tty->print("PREFETCHT0"); | |
621 } else if (AllocatePrefetchInstr == 2) { | |
622 tty->print("PREFETCHT2"); | |
623 } else if (AllocatePrefetchInstr == 3) { | |
624 tty->print("PREFETCHW"); | |
625 } | |
626 } | |
627 if (AllocatePrefetchLines > 1) { | |
3854 | 628 tty->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); |
585 | 629 } else { |
3854 | 630 tty->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize); |
585 | 631 } |
632 } | |
633 | |
634 if (PrefetchCopyIntervalInBytes > 0) { | |
635 tty->print_cr("PrefetchCopyIntervalInBytes %d", PrefetchCopyIntervalInBytes); | |
636 } | |
637 if (PrefetchScanIntervalInBytes > 0) { | |
638 tty->print_cr("PrefetchScanIntervalInBytes %d", PrefetchScanIntervalInBytes); | |
639 } | |
640 if (PrefetchFieldsAhead > 0) { | |
641 tty->print_cr("PrefetchFieldsAhead %d", PrefetchFieldsAhead); | |
642 } | |
643 } | |
644 #endif // !PRODUCT | |
645 } | |
646 | |
647 void VM_Version::initialize() { | |
648 ResourceMark rm; | |
649 // Making this stub must be FIRST use of assembler | |
650 | |
651 stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size); | |
652 if (stub_blob == NULL) { | |
653 vm_exit_during_initialization("Unable to allocate getPsrInfo_stub"); | |
654 } | |
1748 | 655 CodeBuffer c(stub_blob); |
585 | 656 VM_Version_StubGenerator g(&c); |
657 getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t, | |
658 g.generate_getPsrInfo()); | |
659 | |
660 get_processor_features(); | |
661 } |