Mercurial > hg > truffle
annotate src/cpu/x86/vm/vm_version_x86.cpp @ 20304:a22acf6d7598
8048112: G1 Full GC needs to support the case when the very first region is not available
Summary: Refactor preparation for compaction during Full GC so that it lazily initializes the first compaction point. This also avoids problems later when the first region may not be committed. Also reviewed by K. Barrett.
Reviewed-by: brutisso
author | tschatzl |
---|---|
date | Mon, 21 Jul 2014 10:00:31 +0200 |
parents | 78bbf4d43a14 |
children | 52b4284cb496 b1bc1af04c6e |
rev | line source |
---|---|
585 | 1 /* |
17937
78bbf4d43a14
8037816: Fix for 8036122 breaks build with Xcode5/clang
drchase
parents:
17913
diff
changeset
|
2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. |
585 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1060
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1060
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1060
diff
changeset
|
21 * questions. |
585 | 22 * |
23 */ | |
24 | |
1972 | 25 #include "precompiled.hpp" |
7199
cd3d6a6b95d9
8003240: x86: move MacroAssembler into separate file
twisti
parents:
6943
diff
changeset
|
26 #include "asm/macroAssembler.hpp" |
cd3d6a6b95d9
8003240: x86: move MacroAssembler into separate file
twisti
parents:
6943
diff
changeset
|
27 #include "asm/macroAssembler.inline.hpp" |
1972 | 28 #include "memory/resourceArea.hpp" |
29 #include "runtime/java.hpp" | |
30 #include "runtime/stubCodeGenerator.hpp" | |
31 #include "vm_version_x86.hpp" | |
32 #ifdef TARGET_OS_FAMILY_linux | |
33 # include "os_linux.inline.hpp" | |
34 #endif | |
35 #ifdef TARGET_OS_FAMILY_solaris | |
36 # include "os_solaris.inline.hpp" | |
37 #endif | |
38 #ifdef TARGET_OS_FAMILY_windows | |
39 # include "os_windows.inline.hpp" | |
40 #endif | |
3960 | 41 #ifdef TARGET_OS_FAMILY_bsd |
42 # include "os_bsd.inline.hpp" | |
43 #endif | |
585 | 44 |
45 | |
46 int VM_Version::_cpu; | |
47 int VM_Version::_model; | |
48 int VM_Version::_stepping; | |
49 int VM_Version::_cpuFeatures; | |
50 const char* VM_Version::_features_str = ""; | |
51 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; | |
52 | |
17739
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
53 // Address of instruction which causes SEGV |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
54 address VM_Version::_cpuinfo_segv_addr = 0; |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
55 // Address of instruction after the one which causes SEGV |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
56 address VM_Version::_cpuinfo_cont_addr = 0; |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
57 |
585 | 58 static BufferBlob* stub_blob; |
17739
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
59 static const int stub_size = 600; |
585 | 60 |
61 extern "C" { | |
17829
0118c8c7b80f
8038633: crash in VM_Version::get_processor_features() on startup
kvn
parents:
17780
diff
changeset
|
62 typedef void (*get_cpu_info_stub_t)(void*); |
585 | 63 } |
17829
0118c8c7b80f
8038633: crash in VM_Version::get_processor_features() on startup
kvn
parents:
17780
diff
changeset
|
64 static get_cpu_info_stub_t get_cpu_info_stub = NULL; |
585 | 65 |
66 | |
67 class VM_Version_StubGenerator: public StubCodeGenerator { | |
68 public: | |
69 | |
70 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} | |
71 | |
17829
0118c8c7b80f
8038633: crash in VM_Version::get_processor_features() on startup
kvn
parents:
17780
diff
changeset
|
72 address generate_get_cpu_info() { |
585 | 73 // Flags to test CPU type. |
4961
0368109684cb
7132070: Use a mach_port_t as the OSThread thread_id rather than pthread_t on BSD/OSX
sla
parents:
4771
diff
changeset
|
74 const uint32_t HS_EFL_AC = 0x40000; |
0368109684cb
7132070: Use a mach_port_t as the OSThread thread_id rather than pthread_t on BSD/OSX
sla
parents:
4771
diff
changeset
|
75 const uint32_t HS_EFL_ID = 0x200000; |
585 | 76 // Values for when we don't have a CPUID instruction. |
77 const int CPU_FAMILY_SHIFT = 8; | |
78 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); | |
79 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); | |
80 | |
1622 | 81 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; |
4771 | 82 Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done; |
585 | 83 |
17829
0118c8c7b80f
8038633: crash in VM_Version::get_processor_features() on startup
kvn
parents:
17780
diff
changeset
|
84 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); |
585 | 85 # define __ _masm-> |
86 | |
87 address start = __ pc(); | |
88 | |
89 // | |
17829
0118c8c7b80f
8038633: crash in VM_Version::get_processor_features() on startup
kvn
parents:
17780
diff
changeset
|
90 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info); |
585 | 91 // |
92 // LP64: rcx and rdx are first and second argument registers on windows | |
93 | |
94 __ push(rbp); | |
95 #ifdef _LP64 | |
96 __ mov(rbp, c_rarg0); // cpuid_info address | |
97 #else | |
98 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address | |
99 #endif | |
100 __ push(rbx); | |
101 __ push(rsi); | |
102 __ pushf(); // preserve rbx, and flags | |
103 __ pop(rax); | |
104 __ push(rax); | |
105 __ mov(rcx, rax); | |
106 // | |
107 // if we are unable to change the AC flag, we have a 386 | |
108 // | |
4961
0368109684cb
7132070: Use a mach_port_t as the OSThread thread_id rather than pthread_t on BSD/OSX
sla
parents:
4771
diff
changeset
|
109 __ xorl(rax, HS_EFL_AC); |
585 | 110 __ push(rax); |
111 __ popf(); | |
112 __ pushf(); | |
113 __ pop(rax); | |
114 __ cmpptr(rax, rcx); | |
115 __ jccb(Assembler::notEqual, detect_486); | |
116 | |
117 __ movl(rax, CPU_FAMILY_386); | |
118 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); | |
119 __ jmp(done); | |
120 | |
121 // | |
122 // If we are unable to change the ID flag, we have a 486 which does | |
123 // not support the "cpuid" instruction. | |
124 // | |
125 __ bind(detect_486); | |
126 __ mov(rax, rcx); | |
4961
0368109684cb
7132070: Use a mach_port_t as the OSThread thread_id rather than pthread_t on BSD/OSX
sla
parents:
4771
diff
changeset
|
127 __ xorl(rax, HS_EFL_ID); |
585 | 128 __ push(rax); |
129 __ popf(); | |
130 __ pushf(); | |
131 __ pop(rax); | |
132 __ cmpptr(rcx, rax); | |
133 __ jccb(Assembler::notEqual, detect_586); | |
134 | |
135 __ bind(cpu486); | |
136 __ movl(rax, CPU_FAMILY_486); | |
137 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); | |
138 __ jmp(done); | |
139 | |
140 // | |
141 // At this point, we have a chip which supports the "cpuid" instruction | |
142 // | |
143 __ bind(detect_586); | |
144 __ xorl(rax, rax); | |
145 __ cpuid(); | |
146 __ orl(rax, rax); | |
147 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input | |
148 // value of at least 1, we give up and | |
149 // assume a 486 | |
150 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); | |
151 __ movl(Address(rsi, 0), rax); | |
152 __ movl(Address(rsi, 4), rbx); | |
153 __ movl(Address(rsi, 8), rcx); | |
154 __ movl(Address(rsi,12), rdx); | |
155 | |
1622 | 156 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? |
157 __ jccb(Assembler::belowEqual, std_cpuid4); | |
158 | |
159 // | |
160 // cpuid(0xB) Processor Topology | |
161 // | |
162 __ movl(rax, 0xb); | |
163 __ xorl(rcx, rcx); // Threads level | |
164 __ cpuid(); | |
165 | |
166 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); | |
167 __ movl(Address(rsi, 0), rax); | |
168 __ movl(Address(rsi, 4), rbx); | |
169 __ movl(Address(rsi, 8), rcx); | |
170 __ movl(Address(rsi,12), rdx); | |
171 | |
172 __ movl(rax, 0xb); | |
173 __ movl(rcx, 1); // Cores level | |
174 __ cpuid(); | |
175 __ push(rax); | |
176 __ andl(rax, 0x1f); // Determine if valid topology level | |
177 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level | |
178 __ andl(rax, 0xffff); | |
179 __ pop(rax); | |
180 __ jccb(Assembler::equal, std_cpuid4); | |
181 | |
182 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); | |
183 __ movl(Address(rsi, 0), rax); | |
184 __ movl(Address(rsi, 4), rbx); | |
185 __ movl(Address(rsi, 8), rcx); | |
186 __ movl(Address(rsi,12), rdx); | |
187 | |
188 __ movl(rax, 0xb); | |
189 __ movl(rcx, 2); // Packages level | |
190 __ cpuid(); | |
191 __ push(rax); | |
192 __ andl(rax, 0x1f); // Determine if valid topology level | |
193 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level | |
194 __ andl(rax, 0xffff); | |
195 __ pop(rax); | |
196 __ jccb(Assembler::equal, std_cpuid4); | |
197 | |
198 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); | |
199 __ movl(Address(rsi, 0), rax); | |
200 __ movl(Address(rsi, 4), rbx); | |
201 __ movl(Address(rsi, 8), rcx); | |
202 __ movl(Address(rsi,12), rdx); | |
585 | 203 |
204 // | |
205 // cpuid(0x4) Deterministic cache params | |
206 // | |
1622 | 207 __ bind(std_cpuid4); |
585 | 208 __ movl(rax, 4); |
1622 | 209 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? |
210 __ jccb(Assembler::greater, std_cpuid1); | |
211 | |
585 | 212 __ xorl(rcx, rcx); // L1 cache |
213 __ cpuid(); | |
214 __ push(rax); | |
215 __ andl(rax, 0x1f); // Determine if valid cache parameters used | |
216 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache | |
217 __ pop(rax); | |
218 __ jccb(Assembler::equal, std_cpuid1); | |
219 | |
220 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); | |
221 __ movl(Address(rsi, 0), rax); | |
222 __ movl(Address(rsi, 4), rbx); | |
223 __ movl(Address(rsi, 8), rcx); | |
224 __ movl(Address(rsi,12), rdx); | |
225 | |
226 // | |
227 // Standard cpuid(0x1) | |
228 // | |
229 __ bind(std_cpuid1); | |
230 __ movl(rax, 1); | |
231 __ cpuid(); | |
232 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); | |
233 __ movl(Address(rsi, 0), rax); | |
234 __ movl(Address(rsi, 4), rbx); | |
235 __ movl(Address(rsi, 8), rcx); | |
236 __ movl(Address(rsi,12), rdx); | |
237 | |
4759 | 238 // |
239 // Check if OS has enabled XGETBV instruction to access XCR0 | |
240 // (OSXSAVE feature flag) and CPU supports AVX | |
241 // | |
17739
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
242 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx |
4759 | 243 __ cmpl(rcx, 0x18000000); |
17739
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
244 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported |
4759 | 245 |
246 // | |
247 // XCR0, XFEATURE_ENABLED_MASK register | |
248 // | |
249 __ xorl(rcx, rcx); // zero for XCR0 register | |
250 __ xgetbv(); | |
251 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); | |
252 __ movl(Address(rsi, 0), rax); | |
253 __ movl(Address(rsi, 4), rdx); | |
254 | |
17739
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
255 __ andl(rax, 0x6); // xcr0 bits sse | ymm |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
256 __ cmpl(rax, 0x6); |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
257 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
258 |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
259 // |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
260 // Some OSs have a bug when upper 128bits of YMM |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
261 // registers are not restored after a signal processing. |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
262 // Generate SEGV here (reference through NULL) |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
263 // and check upper YMM bits after it. |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
264 // |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
265 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts |
17913 | 266 intx saved_useavx = UseAVX; |
267 intx saved_usesse = UseSSE; | |
268 UseAVX = 1; | |
269 UseSSE = 2; | |
17739
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
270 |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
271 // load value into all 32 bytes of ymm7 register |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
272 __ movl(rcx, VM_Version::ymm_test_value()); |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
273 |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
274 __ movdl(xmm0, rcx); |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
275 __ pshufd(xmm0, xmm0, 0x00); |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
276 __ vinsertf128h(xmm0, xmm0, xmm0); |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
277 __ vmovdqu(xmm7, xmm0); |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
278 #ifdef _LP64 |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
279 __ vmovdqu(xmm8, xmm0); |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
280 __ vmovdqu(xmm15, xmm0); |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
281 #endif |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
282 |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
283 __ xorl(rsi, rsi); |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
284 VM_Version::set_cpuinfo_segv_addr( __ pc() ); |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
285 // Generate SEGV |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
286 __ movl(rax, Address(rsi, 0)); |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
287 |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
288 VM_Version::set_cpuinfo_cont_addr( __ pc() ); |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
289 // Returns here after signal. Save xmm0 to check it later. |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
290 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
291 __ vmovdqu(Address(rsi, 0), xmm0); |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
292 __ vmovdqu(Address(rsi, 32), xmm7); |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
293 #ifdef _LP64 |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
294 __ vmovdqu(Address(rsi, 64), xmm8); |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
295 __ vmovdqu(Address(rsi, 96), xmm15); |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
296 #endif |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
297 |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
298 VM_Version::clean_cpuFeatures(); |
17913 | 299 UseAVX = saved_useavx; |
300 UseSSE = saved_usesse; | |
17739
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
301 |
4759 | 302 // |
303 // cpuid(0x7) Structured Extended Features | |
304 // | |
305 __ bind(sef_cpuid); | |
306 __ movl(rax, 7); | |
307 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? | |
308 __ jccb(Assembler::greater, ext_cpuid); | |
309 | |
310 __ xorl(rcx, rcx); | |
311 __ cpuid(); | |
312 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); | |
313 __ movl(Address(rsi, 0), rax); | |
314 __ movl(Address(rsi, 4), rbx); | |
315 | |
316 // | |
317 // Extended cpuid(0x80000000) | |
318 // | |
319 __ bind(ext_cpuid); | |
585 | 320 __ movl(rax, 0x80000000); |
321 __ cpuid(); | |
322 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? | |
323 __ jcc(Assembler::belowEqual, done); | |
324 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? | |
325 __ jccb(Assembler::belowEqual, ext_cpuid1); | |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
326 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported? |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
327 __ jccb(Assembler::belowEqual, ext_cpuid5); |
585 | 328 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
329 __ jccb(Assembler::belowEqual, ext_cpuid7); |
585 | 330 // |
331 // Extended cpuid(0x80000008) | |
332 // | |
333 __ movl(rax, 0x80000008); | |
334 __ cpuid(); | |
335 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); | |
336 __ movl(Address(rsi, 0), rax); | |
337 __ movl(Address(rsi, 4), rbx); | |
338 __ movl(Address(rsi, 8), rcx); | |
339 __ movl(Address(rsi,12), rdx); | |
340 | |
341 // | |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
342 // Extended cpuid(0x80000007) |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
343 // |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
344 __ bind(ext_cpuid7); |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
345 __ movl(rax, 0x80000007); |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
346 __ cpuid(); |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
347 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset()))); |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
348 __ movl(Address(rsi, 0), rax); |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
349 __ movl(Address(rsi, 4), rbx); |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
350 __ movl(Address(rsi, 8), rcx); |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
351 __ movl(Address(rsi,12), rdx); |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
352 |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
353 // |
585 | 354 // Extended cpuid(0x80000005) |
355 // | |
356 __ bind(ext_cpuid5); | |
357 __ movl(rax, 0x80000005); | |
358 __ cpuid(); | |
359 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); | |
360 __ movl(Address(rsi, 0), rax); | |
361 __ movl(Address(rsi, 4), rbx); | |
362 __ movl(Address(rsi, 8), rcx); | |
363 __ movl(Address(rsi,12), rdx); | |
364 | |
365 // | |
366 // Extended cpuid(0x80000001) | |
367 // | |
368 __ bind(ext_cpuid1); | |
369 __ movl(rax, 0x80000001); | |
370 __ cpuid(); | |
371 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); | |
372 __ movl(Address(rsi, 0), rax); | |
373 __ movl(Address(rsi, 4), rbx); | |
374 __ movl(Address(rsi, 8), rcx); | |
375 __ movl(Address(rsi,12), rdx); | |
376 | |
377 // | |
378 // return | |
379 // | |
380 __ bind(done); | |
381 __ popf(); | |
382 __ pop(rsi); | |
383 __ pop(rbx); | |
384 __ pop(rbp); | |
385 __ ret(0); | |
386 | |
387 # undef __ | |
388 | |
389 return start; | |
390 }; | |
391 }; | |
392 | |
393 | |
17829
0118c8c7b80f
8038633: crash in VM_Version::get_processor_features() on startup
kvn
parents:
17780
diff
changeset
|
394 void VM_Version::get_cpu_info_wrapper() { |
0118c8c7b80f
8038633: crash in VM_Version::get_processor_features() on startup
kvn
parents:
17780
diff
changeset
|
395 get_cpu_info_stub(&_cpuid_info); |
0118c8c7b80f
8038633: crash in VM_Version::get_processor_features() on startup
kvn
parents:
17780
diff
changeset
|
396 } |
0118c8c7b80f
8038633: crash in VM_Version::get_processor_features() on startup
kvn
parents:
17780
diff
changeset
|
397 |
0118c8c7b80f
8038633: crash in VM_Version::get_processor_features() on startup
kvn
parents:
17780
diff
changeset
|
398 #ifndef CALL_TEST_FUNC_WITH_WRAPPER_IF_NEEDED |
0118c8c7b80f
8038633: crash in VM_Version::get_processor_features() on startup
kvn
parents:
17780
diff
changeset
|
399 #define CALL_TEST_FUNC_WITH_WRAPPER_IF_NEEDED(f) f() |
0118c8c7b80f
8038633: crash in VM_Version::get_processor_features() on startup
kvn
parents:
17780
diff
changeset
|
400 #endif |
0118c8c7b80f
8038633: crash in VM_Version::get_processor_features() on startup
kvn
parents:
17780
diff
changeset
|
401 |
585 | 402 void VM_Version::get_processor_features() { |
403 | |
404 _cpu = 4; // 486 by default | |
405 _model = 0; | |
406 _stepping = 0; | |
407 _cpuFeatures = 0; | |
408 _logical_processors_per_package = 1; | |
409 | |
410 if (!Use486InstrsOnly) { | |
411 // Get raw processor info | |
17829
0118c8c7b80f
8038633: crash in VM_Version::get_processor_features() on startup
kvn
parents:
17780
diff
changeset
|
412 |
0118c8c7b80f
8038633: crash in VM_Version::get_processor_features() on startup
kvn
parents:
17780
diff
changeset
|
413 // Some platforms (like Win*) need a wrapper around here |
0118c8c7b80f
8038633: crash in VM_Version::get_processor_features() on startup
kvn
parents:
17780
diff
changeset
|
414 // in order to properly handle SEGV for YMM registers test. |
0118c8c7b80f
8038633: crash in VM_Version::get_processor_features() on startup
kvn
parents:
17780
diff
changeset
|
415 CALL_TEST_FUNC_WITH_WRAPPER_IF_NEEDED(get_cpu_info_wrapper); |
0118c8c7b80f
8038633: crash in VM_Version::get_processor_features() on startup
kvn
parents:
17780
diff
changeset
|
416 |
585 | 417 assert_is_initialized(); |
418 _cpu = extended_cpu_family(); | |
419 _model = extended_cpu_model(); | |
420 _stepping = cpu_stepping(); | |
421 | |
422 if (cpu_family() > 4) { // it supports CPUID | |
423 _cpuFeatures = feature_flags(); | |
424 // Logical processors are only available on P4s and above, | |
425 // and only if hyperthreading is available. | |
426 _logical_processors_per_package = logical_processor_count(); | |
427 } | |
428 } | |
429 | |
430 _supports_cx8 = supports_cmpxchg8(); | |
6795
7eca5de9e0b6
7023898: Intrinsify AtomicLongFieldUpdater.getAndIncrement()
roland
parents:
6794
diff
changeset
|
431 // xchg and xadd instructions |
7eca5de9e0b6
7023898: Intrinsify AtomicLongFieldUpdater.getAndIncrement()
roland
parents:
6794
diff
changeset
|
432 _supports_atomic_getset4 = true; |
7eca5de9e0b6
7023898: Intrinsify AtomicLongFieldUpdater.getAndIncrement()
roland
parents:
6794
diff
changeset
|
433 _supports_atomic_getadd4 = true; |
7eca5de9e0b6
7023898: Intrinsify AtomicLongFieldUpdater.getAndIncrement()
roland
parents:
6794
diff
changeset
|
434 LP64_ONLY(_supports_atomic_getset8 = true); |
7eca5de9e0b6
7023898: Intrinsify AtomicLongFieldUpdater.getAndIncrement()
roland
parents:
6794
diff
changeset
|
435 LP64_ONLY(_supports_atomic_getadd8 = true); |
585 | 436 |
437 #ifdef _LP64 | |
438 // OS should support SSE for x64 and hardware should support at least SSE2. | |
439 if (!VM_Version::supports_sse2()) { | |
440 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); | |
441 } | |
1060 | 442 // in 64 bit the use of SSE2 is the minimum |
443 if (UseSSE < 2) UseSSE = 2; | |
585 | 444 #endif |
445 | |
3787
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
446 #ifdef AMD64 |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
447 // flush_icache_stub have to be generated first. |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
448 // That is why Icache line size is hard coded in ICache class, |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
449 // see icache_x86.hpp. It is also the reason why we can't use |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
450 // clflush instruction in 32-bit VM since it could be running |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
451 // on CPU which does not support it. |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
452 // |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
453 // The only thing we can do is to verify that flushed |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
454 // ICache::line_size has correct value. |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
455 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported"); |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
456 // clflush_size is size in quadwords (8 bytes). |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
457 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported"); |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
458 #endif |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
459 |
585 | 460 // If the OS doesn't support SSE, we can't use this feature even if the HW does |
461 if (!os::supports_sse()) | |
462 _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2); | |
463 | |
464 if (UseSSE < 4) { | |
465 _cpuFeatures &= ~CPU_SSE4_1; | |
466 _cpuFeatures &= ~CPU_SSE4_2; | |
467 } | |
468 | |
469 if (UseSSE < 3) { | |
470 _cpuFeatures &= ~CPU_SSE3; | |
471 _cpuFeatures &= ~CPU_SSSE3; | |
472 _cpuFeatures &= ~CPU_SSE4A; | |
473 } | |
474 | |
475 if (UseSSE < 2) | |
476 _cpuFeatures &= ~CPU_SSE2; | |
477 | |
478 if (UseSSE < 1) | |
479 _cpuFeatures &= ~CPU_SSE; | |
480 | |
4759 | 481 if (UseAVX < 2) |
482 _cpuFeatures &= ~CPU_AVX2; | |
483 | |
484 if (UseAVX < 1) | |
485 _cpuFeatures &= ~CPU_AVX; | |
486 | |
6894 | 487 if (!UseAES && !FLAG_IS_DEFAULT(UseAES)) |
488 _cpuFeatures &= ~CPU_AES; | |
489 | |
585 | 490 if (logical_processors_per_package() == 1) { |
491 // HT processor could be installed on a system which doesn't support HT. | |
492 _cpuFeatures &= ~CPU_HT; | |
493 } | |
494 | |
495 char buf[256]; | |
17780 | 496 jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", |
585 | 497 cores_per_cpu(), threads_per_core(), |
498 cpu_family(), _model, _stepping, | |
499 (supports_cmov() ? ", cmov" : ""), | |
500 (supports_cmpxchg8() ? ", cx8" : ""), | |
501 (supports_fxsr() ? ", fxsr" : ""), | |
502 (supports_mmx() ? ", mmx" : ""), | |
503 (supports_sse() ? ", sse" : ""), | |
504 (supports_sse2() ? ", sse2" : ""), | |
505 (supports_sse3() ? ", sse3" : ""), | |
506 (supports_ssse3()? ", ssse3": ""), | |
507 (supports_sse4_1() ? ", sse4.1" : ""), | |
508 (supports_sse4_2() ? ", sse4.2" : ""), | |
643
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
509 (supports_popcnt() ? ", popcnt" : ""), |
4759 | 510 (supports_avx() ? ", avx" : ""), |
511 (supports_avx2() ? ", avx2" : ""), | |
6894 | 512 (supports_aes() ? ", aes" : ""), |
17780 | 513 (supports_clmul() ? ", clmul" : ""), |
7474
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7427
diff
changeset
|
514 (supports_erms() ? ", erms" : ""), |
17780 | 515 (supports_rtm() ? ", rtm" : ""), |
585 | 516 (supports_mmx_ext() ? ", mmxext" : ""), |
2479 | 517 (supports_3dnow_prefetch() ? ", 3dnowpref" : ""), |
775
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
518 (supports_lzcnt() ? ", lzcnt": ""), |
585 | 519 (supports_sse4a() ? ", sse4a": ""), |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
520 (supports_ht() ? ", ht": ""), |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
521 (supports_tsc() ? ", tsc": ""), |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
522 (supports_tscinv_bit() ? ", tscinvbit": ""), |
17729
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
523 (supports_tscinv() ? ", tscinv": ""), |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
524 (supports_bmi1() ? ", bmi1" : ""), |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
525 (supports_bmi2() ? ", bmi2" : "")); |
585 | 526 _features_str = strdup(buf); |
527 | |
528 // UseSSE is set to the smaller of what hardware supports and what | |
529 // the command line requires. I.e., you cannot set UseSSE to 2 on | |
530 // older Pentiums which do not support it. | |
4759 | 531 if (UseSSE > 4) UseSSE=4; |
532 if (UseSSE < 0) UseSSE=0; | |
533 if (!supports_sse4_1()) // Drop to 3 if no SSE4 support | |
585 | 534 UseSSE = MIN2((intx)3,UseSSE); |
4759 | 535 if (!supports_sse3()) // Drop to 2 if no SSE3 support |
585 | 536 UseSSE = MIN2((intx)2,UseSSE); |
4759 | 537 if (!supports_sse2()) // Drop to 1 if no SSE2 support |
585 | 538 UseSSE = MIN2((intx)1,UseSSE); |
4759 | 539 if (!supports_sse ()) // Drop to 0 if no SSE support |
585 | 540 UseSSE = 0; |
541 | |
4759 | 542 if (UseAVX > 2) UseAVX=2; |
543 if (UseAVX < 0) UseAVX=0; | |
544 if (!supports_avx2()) // Drop to 1 if no AVX2 support | |
545 UseAVX = MIN2((intx)1,UseAVX); | |
546 if (!supports_avx ()) // Drop to 0 if no AVX support | |
547 UseAVX = 0; | |
548 | |
6894 | 549 // Use AES instructions if available. |
550 if (supports_aes()) { | |
551 if (FLAG_IS_DEFAULT(UseAES)) { | |
552 UseAES = true; | |
553 } | |
554 } else if (UseAES) { | |
555 if (!FLAG_IS_DEFAULT(UseAES)) | |
17780 | 556 warning("AES instructions are not available on this CPU"); |
6894 | 557 FLAG_SET_DEFAULT(UseAES, false); |
558 } | |
559 | |
11080
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
560 // Use CLMUL instructions if available. |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
561 if (supports_clmul()) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
562 if (FLAG_IS_DEFAULT(UseCLMUL)) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
563 UseCLMUL = true; |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
564 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
565 } else if (UseCLMUL) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
566 if (!FLAG_IS_DEFAULT(UseCLMUL)) |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
567 warning("CLMUL instructions not available on this CPU (AVX may also be required)"); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
568 FLAG_SET_DEFAULT(UseCLMUL, false); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
569 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
570 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
571 if (UseCLMUL && (UseAVX > 0) && (UseSSE > 2)) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
572 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
573 UseCRC32Intrinsics = true; |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
574 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
575 } else if (UseCRC32Intrinsics) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
576 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
577 warning("CRC32 Intrinsics requires AVX and CLMUL instructions (not available on this CPU)"); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
578 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
579 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
580 |
6894 | 581 // The AES intrinsic stubs require AES instruction support (of course) |
7427 | 582 // but also require sse3 mode for instructions it use. |
583 if (UseAES && (UseSSE > 2)) { | |
6894 | 584 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { |
585 UseAESIntrinsics = true; | |
586 } | |
587 } else if (UseAESIntrinsics) { | |
588 if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) | |
17780 | 589 warning("AES intrinsics are not available on this CPU"); |
6894 | 590 FLAG_SET_DEFAULT(UseAESIntrinsics, false); |
591 } | |
592 | |
17780 | 593 // Adjust RTM (Restricted Transactional Memory) flags |
594 if (!supports_rtm() && UseRTMLocking) { | |
595 // Can't continue because UseRTMLocking affects UseBiasedLocking flag | |
596 // setting during arguments processing. See use_biased_locking(). | |
597 // VM_Version_init() is executed after UseBiasedLocking is used | |
598 // in Thread::allocate(). | |
599 vm_exit_during_initialization("RTM instructions are not available on this CPU"); | |
600 } | |
601 | |
602 #if INCLUDE_RTM_OPT | |
603 if (UseRTMLocking) { | |
604 if (!FLAG_IS_CMDLINE(UseRTMLocking)) { | |
605 // RTM locking should be used only for applications with | |
606 // high lock contention. For now we do not use it by default. | |
607 vm_exit_during_initialization("UseRTMLocking flag should be only set on command line"); | |
608 } | |
609 if (!is_power_of_2(RTMTotalCountIncrRate)) { | |
610 warning("RTMTotalCountIncrRate must be a power of 2, resetting it to 64"); | |
611 FLAG_SET_DEFAULT(RTMTotalCountIncrRate, 64); | |
612 } | |
613 if (RTMAbortRatio < 0 || RTMAbortRatio > 100) { | |
614 warning("RTMAbortRatio must be in the range 0 to 100, resetting it to 50"); | |
615 FLAG_SET_DEFAULT(RTMAbortRatio, 50); | |
616 } | |
617 } else { // !UseRTMLocking | |
618 if (UseRTMForStackLocks) { | |
619 if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) { | |
620 warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off"); | |
621 } | |
622 FLAG_SET_DEFAULT(UseRTMForStackLocks, false); | |
623 } | |
624 if (UseRTMDeopt) { | |
625 FLAG_SET_DEFAULT(UseRTMDeopt, false); | |
626 } | |
627 if (PrintPreciseRTMLockingStatistics) { | |
628 FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false); | |
629 } | |
630 } | |
631 #else | |
632 if (UseRTMLocking) { | |
633 // Only C2 does RTM locking optimization. | |
634 // Can't continue because UseRTMLocking affects UseBiasedLocking flag | |
635 // setting during arguments processing. See use_biased_locking(). | |
636 vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); | |
637 } | |
638 #endif | |
639 | |
6179
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
640 #ifdef COMPILER2 |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
641 if (UseFPUForSpilling) { |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
642 if (UseSSE < 2) { |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
643 // Only supported with SSE2+ |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
644 FLAG_SET_DEFAULT(UseFPUForSpilling, false); |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
645 } |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
646 } |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
647 if (MaxVectorSize > 0) { |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
648 if (!is_power_of_2(MaxVectorSize)) { |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
649 warning("MaxVectorSize must be a power of 2"); |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
650 FLAG_SET_DEFAULT(MaxVectorSize, 32); |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
651 } |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
652 if (MaxVectorSize > 32) { |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
653 FLAG_SET_DEFAULT(MaxVectorSize, 32); |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
654 } |
17739
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
655 if (MaxVectorSize > 16 && (UseAVX == 0 || !os_supports_avx_vectors())) { |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
656 // 32 bytes vectors (in YMM) are only supported with AVX+ |
6179
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
657 FLAG_SET_DEFAULT(MaxVectorSize, 16); |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
658 } |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
659 if (UseSSE < 2) { |
17739
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
660 // Vectors (in XMM) are only supported with SSE2+ |
6179
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
661 FLAG_SET_DEFAULT(MaxVectorSize, 0); |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
662 } |
17739
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
663 #ifdef ASSERT |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
664 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
665 tty->print_cr("State of YMM registers after signal handle:"); |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
666 int nreg = 2 LP64_ONLY(+2); |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
667 const char* ymm_name[4] = {"0", "7", "8", "15"}; |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
668 for (int i = 0; i < nreg; i++) { |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
669 tty->print("YMM%s:", ymm_name[i]); |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
670 for (int j = 7; j >=0; j--) { |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
671 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]); |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
672 } |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
673 tty->cr(); |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
674 } |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
675 } |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
676 #endif |
6179
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
677 } |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
678 #endif |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
679 |
585 | 680 // On new cpus instructions which update whole XMM register should be used |
681 // to prevent partial register stall due to dependencies on high half. | |
682 // | |
683 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) | |
684 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) | |
685 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). | |
686 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). | |
687 | |
688 if( is_amd() ) { // AMD cpus specific settings | |
689 if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) { | |
690 // Use it on new AMD cpus starting from Opteron. | |
691 UseAddressNop = true; | |
692 } | |
693 if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) { | |
694 // Use it on new AMD cpus starting from Opteron. | |
695 UseNewLongLShift = true; | |
696 } | |
697 if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { | |
698 if( supports_sse4a() ) { | |
699 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron | |
700 } else { | |
701 UseXmmLoadAndClearUpper = false; | |
702 } | |
703 } | |
704 if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { | |
705 if( supports_sse4a() ) { | |
706 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' | |
707 } else { | |
708 UseXmmRegToRegMoveAll = false; | |
709 } | |
710 } | |
711 if( FLAG_IS_DEFAULT(UseXmmI2F) ) { | |
712 if( supports_sse4a() ) { | |
713 UseXmmI2F = true; | |
714 } else { | |
715 UseXmmI2F = false; | |
716 } | |
717 } | |
718 if( FLAG_IS_DEFAULT(UseXmmI2D) ) { | |
719 if( supports_sse4a() ) { | |
720 UseXmmI2D = true; | |
721 } else { | |
722 UseXmmI2D = false; | |
723 } | |
724 } | |
2406 | 725 if( FLAG_IS_DEFAULT(UseSSE42Intrinsics) ) { |
726 if( supports_sse4_2() && UseSSE >= 4 ) { | |
727 UseSSE42Intrinsics = true; | |
728 } | |
729 } | |
775
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
730 |
3276
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
731 // some defaults for AMD family 15h |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
732 if ( cpu_family() == 0x15 ) { |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
733 // On family 15h processors default is no sw prefetch |
2358 | 734 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { |
735 AllocatePrefetchStyle = 0; | |
736 } | |
3276
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
737 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
738 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
739 AllocatePrefetchInstr = 3; |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
740 } |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
741 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy |
6794 | 742 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { |
3276
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
743 UseXMMForArrayCopy = true; |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
744 } |
6794 | 745 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { |
3276
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
746 UseUnalignedLoadStores = true; |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
747 } |
2358 | 748 } |
3276
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
749 |
6179
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
750 #ifdef COMPILER2 |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
751 if (MaxVectorSize > 16) { |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
752 // Limit vectors size to 16 bytes on current AMD cpus. |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
753 FLAG_SET_DEFAULT(MaxVectorSize, 16); |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
754 } |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
755 #endif // COMPILER2 |
585 | 756 } |
757 | |
758 if( is_intel() ) { // Intel cpus specific settings | |
759 if( FLAG_IS_DEFAULT(UseStoreImmI16) ) { | |
760 UseStoreImmI16 = false; // don't use it on Intel cpus | |
761 } | |
762 if( cpu_family() == 6 || cpu_family() == 15 ) { | |
763 if( FLAG_IS_DEFAULT(UseAddressNop) ) { | |
764 // Use it on all Intel cpus starting from PentiumPro | |
765 UseAddressNop = true; | |
766 } | |
767 } | |
768 if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { | |
769 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus | |
770 } | |
771 if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { | |
772 if( supports_sse3() ) { | |
773 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus | |
774 } else { | |
775 UseXmmRegToRegMoveAll = false; | |
776 } | |
777 } | |
778 if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus | |
779 #ifdef COMPILER2 | |
780 if( FLAG_IS_DEFAULT(MaxLoopPad) ) { | |
781 // For new Intel cpus do the next optimization: | |
782 // don't align the beginning of a loop if there are enough instructions | |
783 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) | |
784 // in current fetch line (OptoLoopAlignment) or the padding | |
785 // is big (> MaxLoopPad). | |
786 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of | |
787 // generated NOP instructions. 11 is the largest size of one | |
788 // address NOP instruction '0F 1F' (see Assembler::nop(i)). | |
789 MaxLoopPad = 11; | |
790 } | |
791 #endif // COMPILER2 | |
6794 | 792 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { |
585 | 793 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus |
794 } | |
6794 | 795 if (supports_sse4_2() && supports_ht()) { // Newest Intel cpus |
796 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { | |
585 | 797 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus |
798 } | |
799 } | |
6794 | 800 if (supports_sse4_2() && UseSSE >= 4) { |
801 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { | |
681 | 802 UseSSE42Intrinsics = true; |
803 } | |
804 } | |
585 | 805 } |
806 } | |
807 | |
17729
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
808 // Use count leading zeros count instruction if available. |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
809 if (supports_lzcnt()) { |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
810 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
811 UseCountLeadingZerosInstruction = true; |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
812 } |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
813 } else if (UseCountLeadingZerosInstruction) { |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
814 warning("lzcnt instruction is not available on this CPU"); |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
815 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false); |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
816 } |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
817 |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
818 if (supports_bmi1()) { |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
819 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
820 UseBMI1Instructions = true; |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
821 } |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
822 } else if (UseBMI1Instructions) { |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
823 warning("BMI1 instructions are not available on this CPU"); |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
824 FLAG_SET_DEFAULT(UseBMI1Instructions, false); |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
825 } |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
826 |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
827 // Use count trailing zeros instruction if available |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
828 if (supports_bmi1()) { |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
829 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
830 UseCountTrailingZerosInstruction = UseBMI1Instructions; |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
831 } |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
832 } else if (UseCountTrailingZerosInstruction) { |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
833 warning("tzcnt instruction is not available on this CPU"); |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
834 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
835 } |
8a8ff6b577ed
8031321: Support Intel bit manipulation instructions
iveresov
parents:
11080
diff
changeset
|
836 |
643
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
837 // Use population count instruction if available. |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
838 if (supports_popcnt()) { |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
839 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
840 UsePopCountInstruction = true; |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
841 } |
4759 | 842 } else if (UsePopCountInstruction) { |
843 warning("POPCNT instruction is not available on this CPU"); | |
844 FLAG_SET_DEFAULT(UsePopCountInstruction, false); | |
643
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
845 } |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
846 |
7474
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7427
diff
changeset
|
847 // Use fast-string operations if available. |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7427
diff
changeset
|
848 if (supports_erms()) { |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7427
diff
changeset
|
849 if (FLAG_IS_DEFAULT(UseFastStosb)) { |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7427
diff
changeset
|
850 UseFastStosb = true; |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7427
diff
changeset
|
851 } |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7427
diff
changeset
|
852 } else if (UseFastStosb) { |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7427
diff
changeset
|
853 warning("fast-string operations are not available on this CPU"); |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7427
diff
changeset
|
854 FLAG_SET_DEFAULT(UseFastStosb, false); |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7427
diff
changeset
|
855 } |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7427
diff
changeset
|
856 |
6794 | 857 #ifdef COMPILER2 |
858 if (FLAG_IS_DEFAULT(AlignVector)) { | |
859 // Modern processors allow misaligned memory operations for vectors. | |
860 AlignVector = !UseUnalignedLoadStores; | |
861 } | |
862 #endif // COMPILER2 | |
863 | |
585 | 864 assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); |
865 assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); | |
866 | |
867 // set valid Prefetch instruction | |
868 if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0; | |
869 if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3; | |
2479 | 870 if( ReadPrefetchInstr == 3 && !supports_3dnow_prefetch() ) ReadPrefetchInstr = 0; |
871 if( !supports_sse() && supports_3dnow_prefetch() ) ReadPrefetchInstr = 3; | |
585 | 872 |
873 if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0; | |
874 if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3; | |
2479 | 875 if( AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch() ) AllocatePrefetchInstr=0; |
876 if( !supports_sse() && supports_3dnow_prefetch() ) AllocatePrefetchInstr = 3; | |
585 | 877 |
878 // Allocation prefetch settings | |
3854 | 879 intx cache_line_size = prefetch_data_size(); |
585 | 880 if( cache_line_size > AllocatePrefetchStepSize ) |
881 AllocatePrefetchStepSize = cache_line_size; | |
3854 | 882 |
585 | 883 assert(AllocatePrefetchLines > 0, "invalid value"); |
3854 | 884 if( AllocatePrefetchLines < 1 ) // set valid value in product VM |
885 AllocatePrefetchLines = 3; | |
886 assert(AllocateInstancePrefetchLines > 0, "invalid value"); | |
887 if( AllocateInstancePrefetchLines < 1 ) // set valid value in product VM | |
888 AllocateInstancePrefetchLines = 1; | |
585 | 889 |
890 AllocatePrefetchDistance = allocate_prefetch_distance(); | |
891 AllocatePrefetchStyle = allocate_prefetch_style(); | |
892 | |
1622 | 893 if( is_intel() && cpu_family() == 6 && supports_sse3() ) { |
894 if( AllocatePrefetchStyle == 2 ) { // watermark prefetching on Core | |
585 | 895 #ifdef _LP64 |
1622 | 896 AllocatePrefetchDistance = 384; |
585 | 897 #else |
1622 | 898 AllocatePrefetchDistance = 320; |
585 | 899 #endif |
1622 | 900 } |
901 if( supports_sse4_2() && supports_ht() ) { // Nehalem based cpus | |
902 AllocatePrefetchDistance = 192; | |
903 AllocatePrefetchLines = 4; | |
1730
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
904 #ifdef COMPILER2 |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
905 if (AggressiveOpts && FLAG_IS_DEFAULT(UseFPUForSpilling)) { |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
906 FLAG_SET_DEFAULT(UseFPUForSpilling, true); |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
907 } |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
908 #endif |
1622 | 909 } |
585 | 910 } |
911 assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); | |
912 | |
913 #ifdef _LP64 | |
914 // Prefetch settings | |
915 PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes(); | |
916 PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes(); | |
917 PrefetchFieldsAhead = prefetch_fields_ahead(); | |
918 #endif | |
919 | |
7587 | 920 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && |
921 (cache_line_size > ContendedPaddingWidth)) | |
922 ContendedPaddingWidth = cache_line_size; | |
923 | |
585 | 924 #ifndef PRODUCT |
925 if (PrintMiscellaneous && Verbose) { | |
926 tty->print_cr("Logical CPUs per core: %u", | |
927 logical_processors_per_package()); | |
17937
78bbf4d43a14
8037816: Fix for 8036122 breaks build with Xcode5/clang
drchase
parents:
17913
diff
changeset
|
928 tty->print("UseSSE=%d", (int) UseSSE); |
4759 | 929 if (UseAVX > 0) { |
17937
78bbf4d43a14
8037816: Fix for 8036122 breaks build with Xcode5/clang
drchase
parents:
17913
diff
changeset
|
930 tty->print(" UseAVX=%d", (int) UseAVX); |
4759 | 931 } |
6894 | 932 if (UseAES) { |
933 tty->print(" UseAES=1"); | |
934 } | |
17739
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
935 #ifdef COMPILER2 |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
936 if (MaxVectorSize > 0) { |
17937
78bbf4d43a14
8037816: Fix for 8036122 breaks build with Xcode5/clang
drchase
parents:
17913
diff
changeset
|
937 tty->print(" MaxVectorSize=%d", (int) MaxVectorSize); |
17739
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
938 } |
98af1e198e73
8037226: compiler/7196199/Test7196199.java fails on 32-bit linux with MaxVectorSize > 16
kvn
parents:
17729
diff
changeset
|
939 #endif |
4759 | 940 tty->cr(); |
3854 | 941 tty->print("Allocation"); |
2479 | 942 if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) { |
3854 | 943 tty->print_cr(": no prefetching"); |
585 | 944 } else { |
3854 | 945 tty->print(" prefetching: "); |
2479 | 946 if (UseSSE == 0 && supports_3dnow_prefetch()) { |
585 | 947 tty->print("PREFETCHW"); |
948 } else if (UseSSE >= 1) { | |
949 if (AllocatePrefetchInstr == 0) { | |
950 tty->print("PREFETCHNTA"); | |
951 } else if (AllocatePrefetchInstr == 1) { | |
952 tty->print("PREFETCHT0"); | |
953 } else if (AllocatePrefetchInstr == 2) { | |
954 tty->print("PREFETCHT2"); | |
955 } else if (AllocatePrefetchInstr == 3) { | |
956 tty->print("PREFETCHW"); | |
957 } | |
958 } | |
959 if (AllocatePrefetchLines > 1) { | |
17937
78bbf4d43a14
8037816: Fix for 8036122 breaks build with Xcode5/clang
drchase
parents:
17913
diff
changeset
|
960 tty->print_cr(" at distance %d, %d lines of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchLines, (int) AllocatePrefetchStepSize); |
585 | 961 } else { |
17937
78bbf4d43a14
8037816: Fix for 8036122 breaks build with Xcode5/clang
drchase
parents:
17913
diff
changeset
|
962 tty->print_cr(" at distance %d, one line of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchStepSize); |
585 | 963 } |
964 } | |
965 | |
966 if (PrefetchCopyIntervalInBytes > 0) { | |
17937
78bbf4d43a14
8037816: Fix for 8036122 breaks build with Xcode5/clang
drchase
parents:
17913
diff
changeset
|
967 tty->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes); |
585 | 968 } |
969 if (PrefetchScanIntervalInBytes > 0) { | |
17937
78bbf4d43a14
8037816: Fix for 8036122 breaks build with Xcode5/clang
drchase
parents:
17913
diff
changeset
|
970 tty->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes); |
585 | 971 } |
972 if (PrefetchFieldsAhead > 0) { | |
17937
78bbf4d43a14
8037816: Fix for 8036122 breaks build with Xcode5/clang
drchase
parents:
17913
diff
changeset
|
973 tty->print_cr("PrefetchFieldsAhead %d", (int) PrefetchFieldsAhead); |
585 | 974 } |
7587 | 975 if (ContendedPaddingWidth > 0) { |
17937
78bbf4d43a14
8037816: Fix for 8036122 breaks build with Xcode5/clang
drchase
parents:
17913
diff
changeset
|
976 tty->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); |
7587 | 977 } |
585 | 978 } |
979 #endif // !PRODUCT | |
980 } | |
981 | |
17780 | 982 bool VM_Version::use_biased_locking() { |
983 #if INCLUDE_RTM_OPT | |
984 // RTM locking is most useful when there is high lock contention and | |
985 // low data contention. With high lock contention the lock is usually | |
986 // inflated and biased locking is not suitable for that case. | |
987 // RTM locking code requires that biased locking is off. | |
988 // Note: we can't switch off UseBiasedLocking in get_processor_features() | |
989 // because it is used by Thread::allocate() which is called before | |
990 // VM_Version::initialize(). | |
991 if (UseRTMLocking && UseBiasedLocking) { | |
992 if (FLAG_IS_DEFAULT(UseBiasedLocking)) { | |
993 FLAG_SET_DEFAULT(UseBiasedLocking, false); | |
994 } else { | |
995 warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." ); | |
996 UseBiasedLocking = false; | |
997 } | |
998 } | |
999 #endif | |
1000 return UseBiasedLocking; | |
1001 } | |
1002 | |
585 | 1003 void VM_Version::initialize() { |
1004 ResourceMark rm; | |
1005 // Making this stub must be FIRST use of assembler | |
1006 | |
17829
0118c8c7b80f
8038633: crash in VM_Version::get_processor_features() on startup
kvn
parents:
17780
diff
changeset
|
1007 stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size); |
585 | 1008 if (stub_blob == NULL) { |
17829
0118c8c7b80f
8038633: crash in VM_Version::get_processor_features() on startup
kvn
parents:
17780
diff
changeset
|
1009 vm_exit_during_initialization("Unable to allocate get_cpu_info_stub"); |
585 | 1010 } |
1748 | 1011 CodeBuffer c(stub_blob); |
585 | 1012 VM_Version_StubGenerator g(&c); |
17829
0118c8c7b80f
8038633: crash in VM_Version::get_processor_features() on startup
kvn
parents:
17780
diff
changeset
|
1013 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, |
0118c8c7b80f
8038633: crash in VM_Version::get_processor_features() on startup
kvn
parents:
17780
diff
changeset
|
1014 g.generate_get_cpu_info()); |
585 | 1015 |
1016 get_processor_features(); | |
1017 } |