Mercurial > hg > truffle
annotate src/cpu/x86/vm/vm_version_x86.cpp @ 15388:769fc3629f59
Add phase FlowSensitiveReductionPhase.
It is possible to remove GuardingPiNodes, CheckCastNodes, and FixedGuards during
HighTier under certain conditions (control-flow sensitive conditions).
The phase added in this commit (FlowSensitiveReductionPhase) does that,
and in addition replaces usages with "downcasting" PiNodes when possible
thus resulting in more precise object stamps (e.g., non-null).
Finally, usages of floating, side-effects free, expressions are also simplified
(as per control-flow sensitive conditions).
The newly added phase runs only during HighTier and can be deactivated
using Graal option FlowSensitiveReduction (it is active by default).
author | Miguel Garcia <miguel.m.garcia@oracle.com> |
---|---|
date | Fri, 25 Apr 2014 16:50:52 +0200 |
parents | 6b0fd0964b87 |
children | b51e29501f30 52b4284cb496 |
rev | line source |
---|---|
585 | 1 /* |
11080
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
2 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. |
585 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1060
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1060
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1060
diff
changeset
|
21 * questions. |
585 | 22 * |
23 */ | |
24 | |
1972 | 25 #include "precompiled.hpp" |
7199
cd3d6a6b95d9
8003240: x86: move MacroAssembler into separate file
twisti
parents:
6943
diff
changeset
|
26 #include "asm/macroAssembler.hpp" |
cd3d6a6b95d9
8003240: x86: move MacroAssembler into separate file
twisti
parents:
6943
diff
changeset
|
27 #include "asm/macroAssembler.inline.hpp" |
1972 | 28 #include "memory/resourceArea.hpp" |
29 #include "runtime/java.hpp" | |
30 #include "runtime/stubCodeGenerator.hpp" | |
31 #include "vm_version_x86.hpp" | |
32 #ifdef TARGET_OS_FAMILY_linux | |
33 # include "os_linux.inline.hpp" | |
34 #endif | |
35 #ifdef TARGET_OS_FAMILY_solaris | |
36 # include "os_solaris.inline.hpp" | |
37 #endif | |
38 #ifdef TARGET_OS_FAMILY_windows | |
39 # include "os_windows.inline.hpp" | |
40 #endif | |
3960 | 41 #ifdef TARGET_OS_FAMILY_bsd |
42 # include "os_bsd.inline.hpp" | |
43 #endif | |
585 | 44 |
45 | |
46 int VM_Version::_cpu; | |
47 int VM_Version::_model; | |
48 int VM_Version::_stepping; | |
49 int VM_Version::_cpuFeatures; | |
50 const char* VM_Version::_features_str = ""; | |
51 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; | |
52 | |
53 static BufferBlob* stub_blob; | |
4771 | 54 static const int stub_size = 550; |
585 | 55 |
56 extern "C" { | |
57 typedef void (*getPsrInfo_stub_t)(void*); | |
58 } | |
59 static getPsrInfo_stub_t getPsrInfo_stub = NULL; | |
60 | |
61 | |
62 class VM_Version_StubGenerator: public StubCodeGenerator { | |
63 public: | |
64 | |
65 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} | |
66 | |
67 address generate_getPsrInfo() { | |
68 // Flags to test CPU type. | |
4961
0368109684cb
7132070: Use a mach_port_t as the OSThread thread_id rather than pthread_t on BSD/OSX
sla
parents:
4771
diff
changeset
|
69 const uint32_t HS_EFL_AC = 0x40000; |
0368109684cb
7132070: Use a mach_port_t as the OSThread thread_id rather than pthread_t on BSD/OSX
sla
parents:
4771
diff
changeset
|
70 const uint32_t HS_EFL_ID = 0x200000; |
585 | 71 // Values for when we don't have a CPUID instruction. |
72 const int CPU_FAMILY_SHIFT = 8; | |
73 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); | |
74 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); | |
75 | |
1622 | 76 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; |
4771 | 77 Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done; |
585 | 78 |
79 StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); | |
80 # define __ _masm-> | |
81 | |
82 address start = __ pc(); | |
83 | |
84 // | |
85 // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info); | |
86 // | |
87 // LP64: rcx and rdx are first and second argument registers on windows | |
88 | |
89 __ push(rbp); | |
90 #ifdef _LP64 | |
91 __ mov(rbp, c_rarg0); // cpuid_info address | |
92 #else | |
93 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address | |
94 #endif | |
95 __ push(rbx); | |
96 __ push(rsi); | |
97 __ pushf(); // preserve rbx, and flags | |
98 __ pop(rax); | |
99 __ push(rax); | |
100 __ mov(rcx, rax); | |
101 // | |
102 // if we are unable to change the AC flag, we have a 386 | |
103 // | |
4961
0368109684cb
7132070: Use a mach_port_t as the OSThread thread_id rather than pthread_t on BSD/OSX
sla
parents:
4771
diff
changeset
|
104 __ xorl(rax, HS_EFL_AC); |
585 | 105 __ push(rax); |
106 __ popf(); | |
107 __ pushf(); | |
108 __ pop(rax); | |
109 __ cmpptr(rax, rcx); | |
110 __ jccb(Assembler::notEqual, detect_486); | |
111 | |
112 __ movl(rax, CPU_FAMILY_386); | |
113 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); | |
114 __ jmp(done); | |
115 | |
116 // | |
117 // If we are unable to change the ID flag, we have a 486 which does | |
118 // not support the "cpuid" instruction. | |
119 // | |
120 __ bind(detect_486); | |
121 __ mov(rax, rcx); | |
4961
0368109684cb
7132070: Use a mach_port_t as the OSThread thread_id rather than pthread_t on BSD/OSX
sla
parents:
4771
diff
changeset
|
122 __ xorl(rax, HS_EFL_ID); |
585 | 123 __ push(rax); |
124 __ popf(); | |
125 __ pushf(); | |
126 __ pop(rax); | |
127 __ cmpptr(rcx, rax); | |
128 __ jccb(Assembler::notEqual, detect_586); | |
129 | |
130 __ bind(cpu486); | |
131 __ movl(rax, CPU_FAMILY_486); | |
132 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); | |
133 __ jmp(done); | |
134 | |
135 // | |
136 // At this point, we have a chip which supports the "cpuid" instruction | |
137 // | |
138 __ bind(detect_586); | |
139 __ xorl(rax, rax); | |
140 __ cpuid(); | |
141 __ orl(rax, rax); | |
142 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input | |
143 // value of at least 1, we give up and | |
144 // assume a 486 | |
145 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); | |
146 __ movl(Address(rsi, 0), rax); | |
147 __ movl(Address(rsi, 4), rbx); | |
148 __ movl(Address(rsi, 8), rcx); | |
149 __ movl(Address(rsi,12), rdx); | |
150 | |
1622 | 151 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? |
152 __ jccb(Assembler::belowEqual, std_cpuid4); | |
153 | |
154 // | |
155 // cpuid(0xB) Processor Topology | |
156 // | |
157 __ movl(rax, 0xb); | |
158 __ xorl(rcx, rcx); // Threads level | |
159 __ cpuid(); | |
160 | |
161 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); | |
162 __ movl(Address(rsi, 0), rax); | |
163 __ movl(Address(rsi, 4), rbx); | |
164 __ movl(Address(rsi, 8), rcx); | |
165 __ movl(Address(rsi,12), rdx); | |
166 | |
167 __ movl(rax, 0xb); | |
168 __ movl(rcx, 1); // Cores level | |
169 __ cpuid(); | |
170 __ push(rax); | |
171 __ andl(rax, 0x1f); // Determine if valid topology level | |
172 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level | |
173 __ andl(rax, 0xffff); | |
174 __ pop(rax); | |
175 __ jccb(Assembler::equal, std_cpuid4); | |
176 | |
177 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); | |
178 __ movl(Address(rsi, 0), rax); | |
179 __ movl(Address(rsi, 4), rbx); | |
180 __ movl(Address(rsi, 8), rcx); | |
181 __ movl(Address(rsi,12), rdx); | |
182 | |
183 __ movl(rax, 0xb); | |
184 __ movl(rcx, 2); // Packages level | |
185 __ cpuid(); | |
186 __ push(rax); | |
187 __ andl(rax, 0x1f); // Determine if valid topology level | |
188 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level | |
189 __ andl(rax, 0xffff); | |
190 __ pop(rax); | |
191 __ jccb(Assembler::equal, std_cpuid4); | |
192 | |
193 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); | |
194 __ movl(Address(rsi, 0), rax); | |
195 __ movl(Address(rsi, 4), rbx); | |
196 __ movl(Address(rsi, 8), rcx); | |
197 __ movl(Address(rsi,12), rdx); | |
585 | 198 |
199 // | |
200 // cpuid(0x4) Deterministic cache params | |
201 // | |
1622 | 202 __ bind(std_cpuid4); |
585 | 203 __ movl(rax, 4); |
1622 | 204 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? |
205 __ jccb(Assembler::greater, std_cpuid1); | |
206 | |
585 | 207 __ xorl(rcx, rcx); // L1 cache |
208 __ cpuid(); | |
209 __ push(rax); | |
210 __ andl(rax, 0x1f); // Determine if valid cache parameters used | |
211 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache | |
212 __ pop(rax); | |
213 __ jccb(Assembler::equal, std_cpuid1); | |
214 | |
215 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); | |
216 __ movl(Address(rsi, 0), rax); | |
217 __ movl(Address(rsi, 4), rbx); | |
218 __ movl(Address(rsi, 8), rcx); | |
219 __ movl(Address(rsi,12), rdx); | |
220 | |
221 // | |
222 // Standard cpuid(0x1) | |
223 // | |
224 __ bind(std_cpuid1); | |
225 __ movl(rax, 1); | |
226 __ cpuid(); | |
227 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); | |
228 __ movl(Address(rsi, 0), rax); | |
229 __ movl(Address(rsi, 4), rbx); | |
230 __ movl(Address(rsi, 8), rcx); | |
231 __ movl(Address(rsi,12), rdx); | |
232 | |
4759 | 233 // |
234 // Check if OS has enabled XGETBV instruction to access XCR0 | |
235 // (OSXSAVE feature flag) and CPU supports AVX | |
236 // | |
237 __ andl(rcx, 0x18000000); | |
238 __ cmpl(rcx, 0x18000000); | |
239 __ jccb(Assembler::notEqual, sef_cpuid); | |
240 | |
241 // | |
242 // XCR0, XFEATURE_ENABLED_MASK register | |
243 // | |
244 __ xorl(rcx, rcx); // zero for XCR0 register | |
245 __ xgetbv(); | |
246 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); | |
247 __ movl(Address(rsi, 0), rax); | |
248 __ movl(Address(rsi, 4), rdx); | |
249 | |
250 // | |
251 // cpuid(0x7) Structured Extended Features | |
252 // | |
253 __ bind(sef_cpuid); | |
254 __ movl(rax, 7); | |
255 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? | |
256 __ jccb(Assembler::greater, ext_cpuid); | |
257 | |
258 __ xorl(rcx, rcx); | |
259 __ cpuid(); | |
260 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); | |
261 __ movl(Address(rsi, 0), rax); | |
262 __ movl(Address(rsi, 4), rbx); | |
263 | |
264 // | |
265 // Extended cpuid(0x80000000) | |
266 // | |
267 __ bind(ext_cpuid); | |
585 | 268 __ movl(rax, 0x80000000); |
269 __ cpuid(); | |
270 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? | |
271 __ jcc(Assembler::belowEqual, done); | |
272 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? | |
273 __ jccb(Assembler::belowEqual, ext_cpuid1); | |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
274 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported? |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
275 __ jccb(Assembler::belowEqual, ext_cpuid5); |
585 | 276 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
277 __ jccb(Assembler::belowEqual, ext_cpuid7); |
585 | 278 // |
279 // Extended cpuid(0x80000008) | |
280 // | |
281 __ movl(rax, 0x80000008); | |
282 __ cpuid(); | |
283 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); | |
284 __ movl(Address(rsi, 0), rax); | |
285 __ movl(Address(rsi, 4), rbx); | |
286 __ movl(Address(rsi, 8), rcx); | |
287 __ movl(Address(rsi,12), rdx); | |
288 | |
289 // | |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
290 // Extended cpuid(0x80000007) |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
291 // |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
292 __ bind(ext_cpuid7); |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
293 __ movl(rax, 0x80000007); |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
294 __ cpuid(); |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
295 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset()))); |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
296 __ movl(Address(rsi, 0), rax); |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
297 __ movl(Address(rsi, 4), rbx); |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
298 __ movl(Address(rsi, 8), rcx); |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
299 __ movl(Address(rsi,12), rdx); |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
300 |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
301 // |
585 | 302 // Extended cpuid(0x80000005) |
303 // | |
304 __ bind(ext_cpuid5); | |
305 __ movl(rax, 0x80000005); | |
306 __ cpuid(); | |
307 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); | |
308 __ movl(Address(rsi, 0), rax); | |
309 __ movl(Address(rsi, 4), rbx); | |
310 __ movl(Address(rsi, 8), rcx); | |
311 __ movl(Address(rsi,12), rdx); | |
312 | |
313 // | |
314 // Extended cpuid(0x80000001) | |
315 // | |
316 __ bind(ext_cpuid1); | |
317 __ movl(rax, 0x80000001); | |
318 __ cpuid(); | |
319 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); | |
320 __ movl(Address(rsi, 0), rax); | |
321 __ movl(Address(rsi, 4), rbx); | |
322 __ movl(Address(rsi, 8), rcx); | |
323 __ movl(Address(rsi,12), rdx); | |
324 | |
325 // | |
326 // return | |
327 // | |
328 __ bind(done); | |
329 __ popf(); | |
330 __ pop(rsi); | |
331 __ pop(rbx); | |
332 __ pop(rbp); | |
333 __ ret(0); | |
334 | |
335 # undef __ | |
336 | |
337 return start; | |
338 }; | |
339 }; | |
340 | |
341 | |
342 void VM_Version::get_processor_features() { | |
343 | |
344 _cpu = 4; // 486 by default | |
345 _model = 0; | |
346 _stepping = 0; | |
347 _cpuFeatures = 0; | |
348 _logical_processors_per_package = 1; | |
349 | |
350 if (!Use486InstrsOnly) { | |
351 // Get raw processor info | |
352 getPsrInfo_stub(&_cpuid_info); | |
353 assert_is_initialized(); | |
354 _cpu = extended_cpu_family(); | |
355 _model = extended_cpu_model(); | |
356 _stepping = cpu_stepping(); | |
357 | |
358 if (cpu_family() > 4) { // it supports CPUID | |
359 _cpuFeatures = feature_flags(); | |
360 // Logical processors are only available on P4s and above, | |
361 // and only if hyperthreading is available. | |
362 _logical_processors_per_package = logical_processor_count(); | |
363 } | |
364 } | |
365 | |
366 _supports_cx8 = supports_cmpxchg8(); | |
6795
7eca5de9e0b6
7023898: Intrinsify AtomicLongFieldUpdater.getAndIncrement()
roland
parents:
6794
diff
changeset
|
367 // xchg and xadd instructions |
7eca5de9e0b6
7023898: Intrinsify AtomicLongFieldUpdater.getAndIncrement()
roland
parents:
6794
diff
changeset
|
368 _supports_atomic_getset4 = true; |
7eca5de9e0b6
7023898: Intrinsify AtomicLongFieldUpdater.getAndIncrement()
roland
parents:
6794
diff
changeset
|
369 _supports_atomic_getadd4 = true; |
7eca5de9e0b6
7023898: Intrinsify AtomicLongFieldUpdater.getAndIncrement()
roland
parents:
6794
diff
changeset
|
370 LP64_ONLY(_supports_atomic_getset8 = true); |
7eca5de9e0b6
7023898: Intrinsify AtomicLongFieldUpdater.getAndIncrement()
roland
parents:
6794
diff
changeset
|
371 LP64_ONLY(_supports_atomic_getadd8 = true); |
585 | 372 |
373 #ifdef _LP64 | |
374 // OS should support SSE for x64 and hardware should support at least SSE2. | |
375 if (!VM_Version::supports_sse2()) { | |
376 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); | |
377 } | |
1060 | 378 // in 64 bit the use of SSE2 is the minimum |
379 if (UseSSE < 2) UseSSE = 2; | |
585 | 380 #endif |
381 | |
3787
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
382 #ifdef AMD64 |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
383 // flush_icache_stub have to be generated first. |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
384 // That is why Icache line size is hard coded in ICache class, |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
385 // see icache_x86.hpp. It is also the reason why we can't use |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
386 // clflush instruction in 32-bit VM since it could be running |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
387 // on CPU which does not support it. |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
388 // |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
389 // The only thing we can do is to verify that flushed |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
390 // ICache::line_size has correct value. |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
391 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported"); |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
392 // clflush_size is size in quadwords (8 bytes). |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
393 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported"); |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
394 #endif |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
3276
diff
changeset
|
395 |
585 | 396 // If the OS doesn't support SSE, we can't use this feature even if the HW does |
397 if (!os::supports_sse()) | |
398 _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2); | |
399 | |
400 if (UseSSE < 4) { | |
401 _cpuFeatures &= ~CPU_SSE4_1; | |
402 _cpuFeatures &= ~CPU_SSE4_2; | |
403 } | |
404 | |
405 if (UseSSE < 3) { | |
406 _cpuFeatures &= ~CPU_SSE3; | |
407 _cpuFeatures &= ~CPU_SSSE3; | |
408 _cpuFeatures &= ~CPU_SSE4A; | |
409 } | |
410 | |
411 if (UseSSE < 2) | |
412 _cpuFeatures &= ~CPU_SSE2; | |
413 | |
414 if (UseSSE < 1) | |
415 _cpuFeatures &= ~CPU_SSE; | |
416 | |
4759 | 417 if (UseAVX < 2) |
418 _cpuFeatures &= ~CPU_AVX2; | |
419 | |
420 if (UseAVX < 1) | |
421 _cpuFeatures &= ~CPU_AVX; | |
422 | |
6894 | 423 if (!UseAES && !FLAG_IS_DEFAULT(UseAES)) |
424 _cpuFeatures &= ~CPU_AES; | |
425 | |
585 | 426 if (logical_processors_per_package() == 1) { |
427 // HT processor could be installed on a system which doesn't support HT. | |
428 _cpuFeatures &= ~CPU_HT; | |
429 } | |
430 | |
431 char buf[256]; | |
7474
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7427
diff
changeset
|
432 jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", |
585 | 433 cores_per_cpu(), threads_per_core(), |
434 cpu_family(), _model, _stepping, | |
435 (supports_cmov() ? ", cmov" : ""), | |
436 (supports_cmpxchg8() ? ", cx8" : ""), | |
437 (supports_fxsr() ? ", fxsr" : ""), | |
438 (supports_mmx() ? ", mmx" : ""), | |
439 (supports_sse() ? ", sse" : ""), | |
440 (supports_sse2() ? ", sse2" : ""), | |
441 (supports_sse3() ? ", sse3" : ""), | |
442 (supports_ssse3()? ", ssse3": ""), | |
443 (supports_sse4_1() ? ", sse4.1" : ""), | |
444 (supports_sse4_2() ? ", sse4.2" : ""), | |
643
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
445 (supports_popcnt() ? ", popcnt" : ""), |
4759 | 446 (supports_avx() ? ", avx" : ""), |
447 (supports_avx2() ? ", avx2" : ""), | |
6894 | 448 (supports_aes() ? ", aes" : ""), |
11080
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
449 (supports_clmul() ? ", clmul" : ""), |
7474
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7427
diff
changeset
|
450 (supports_erms() ? ", erms" : ""), |
585 | 451 (supports_mmx_ext() ? ", mmxext" : ""), |
2479 | 452 (supports_3dnow_prefetch() ? ", 3dnowpref" : ""), |
775
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
453 (supports_lzcnt() ? ", lzcnt": ""), |
585 | 454 (supports_sse4a() ? ", sse4a": ""), |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
455 (supports_ht() ? ", ht": ""), |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
456 (supports_tsc() ? ", tsc": ""), |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
457 (supports_tscinv_bit() ? ", tscinvbit": ""), |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3960
diff
changeset
|
458 (supports_tscinv() ? ", tscinv": "")); |
585 | 459 _features_str = strdup(buf); |
460 | |
461 // UseSSE is set to the smaller of what hardware supports and what | |
462 // the command line requires. I.e., you cannot set UseSSE to 2 on | |
463 // older Pentiums which do not support it. | |
4759 | 464 if (UseSSE > 4) UseSSE=4; |
465 if (UseSSE < 0) UseSSE=0; | |
466 if (!supports_sse4_1()) // Drop to 3 if no SSE4 support | |
585 | 467 UseSSE = MIN2((intx)3,UseSSE); |
4759 | 468 if (!supports_sse3()) // Drop to 2 if no SSE3 support |
585 | 469 UseSSE = MIN2((intx)2,UseSSE); |
4759 | 470 if (!supports_sse2()) // Drop to 1 if no SSE2 support |
585 | 471 UseSSE = MIN2((intx)1,UseSSE); |
4759 | 472 if (!supports_sse ()) // Drop to 0 if no SSE support |
585 | 473 UseSSE = 0; |
474 | |
4759 | 475 if (UseAVX > 2) UseAVX=2; |
476 if (UseAVX < 0) UseAVX=0; | |
477 if (!supports_avx2()) // Drop to 1 if no AVX2 support | |
478 UseAVX = MIN2((intx)1,UseAVX); | |
479 if (!supports_avx ()) // Drop to 0 if no AVX support | |
480 UseAVX = 0; | |
481 | |
6894 | 482 // Use AES instructions if available. |
483 if (supports_aes()) { | |
484 if (FLAG_IS_DEFAULT(UseAES)) { | |
485 UseAES = true; | |
486 } | |
487 } else if (UseAES) { | |
488 if (!FLAG_IS_DEFAULT(UseAES)) | |
489 warning("AES instructions not available on this CPU"); | |
490 FLAG_SET_DEFAULT(UseAES, false); | |
491 } | |
492 | |
11080
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
493 // Use CLMUL instructions if available. |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
494 if (supports_clmul()) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
495 if (FLAG_IS_DEFAULT(UseCLMUL)) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
496 UseCLMUL = true; |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
497 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
498 } else if (UseCLMUL) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
499 if (!FLAG_IS_DEFAULT(UseCLMUL)) |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
500 warning("CLMUL instructions not available on this CPU (AVX may also be required)"); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
501 FLAG_SET_DEFAULT(UseCLMUL, false); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
502 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
503 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
504 if (UseCLMUL && (UseAVX > 0) && (UseSSE > 2)) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
505 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
506 UseCRC32Intrinsics = true; |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
507 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
508 } else if (UseCRC32Intrinsics) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
509 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
510 warning("CRC32 Intrinsics requires AVX and CLMUL instructions (not available on this CPU)"); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
511 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
512 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7638
diff
changeset
|
513 |
6894 | 514 // The AES intrinsic stubs require AES instruction support (of course) |
7427 | 515 // but also require sse3 mode for instructions it use. |
516 if (UseAES && (UseSSE > 2)) { | |
6894 | 517 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { |
518 UseAESIntrinsics = true; | |
519 } | |
520 } else if (UseAESIntrinsics) { | |
521 if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) | |
522 warning("AES intrinsics not available on this CPU"); | |
523 FLAG_SET_DEFAULT(UseAESIntrinsics, false); | |
524 } | |
525 | |
6179
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
526 #ifdef COMPILER2 |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
527 if (UseFPUForSpilling) { |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
528 if (UseSSE < 2) { |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
529 // Only supported with SSE2+ |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
530 FLAG_SET_DEFAULT(UseFPUForSpilling, false); |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
531 } |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
532 } |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
533 if (MaxVectorSize > 0) { |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
534 if (!is_power_of_2(MaxVectorSize)) { |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
535 warning("MaxVectorSize must be a power of 2"); |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
536 FLAG_SET_DEFAULT(MaxVectorSize, 32); |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
537 } |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
538 if (MaxVectorSize > 32) { |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
539 FLAG_SET_DEFAULT(MaxVectorSize, 32); |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
540 } |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
541 if (MaxVectorSize > 16 && UseAVX == 0) { |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
542 // Only supported with AVX+ |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
543 FLAG_SET_DEFAULT(MaxVectorSize, 16); |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
544 } |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
545 if (UseSSE < 2) { |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
546 // Only supported with SSE2+ |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
547 FLAG_SET_DEFAULT(MaxVectorSize, 0); |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
548 } |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
549 } |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
550 #endif |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
551 |
585 | 552 // On new cpus instructions which update whole XMM register should be used |
553 // to prevent partial register stall due to dependencies on high half. | |
554 // | |
555 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) | |
556 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) | |
557 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). | |
558 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). | |
559 | |
560 if( is_amd() ) { // AMD cpus specific settings | |
561 if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) { | |
562 // Use it on new AMD cpus starting from Opteron. | |
563 UseAddressNop = true; | |
564 } | |
565 if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) { | |
566 // Use it on new AMD cpus starting from Opteron. | |
567 UseNewLongLShift = true; | |
568 } | |
569 if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { | |
570 if( supports_sse4a() ) { | |
571 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron | |
572 } else { | |
573 UseXmmLoadAndClearUpper = false; | |
574 } | |
575 } | |
576 if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { | |
577 if( supports_sse4a() ) { | |
578 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' | |
579 } else { | |
580 UseXmmRegToRegMoveAll = false; | |
581 } | |
582 } | |
583 if( FLAG_IS_DEFAULT(UseXmmI2F) ) { | |
584 if( supports_sse4a() ) { | |
585 UseXmmI2F = true; | |
586 } else { | |
587 UseXmmI2F = false; | |
588 } | |
589 } | |
590 if( FLAG_IS_DEFAULT(UseXmmI2D) ) { | |
591 if( supports_sse4a() ) { | |
592 UseXmmI2D = true; | |
593 } else { | |
594 UseXmmI2D = false; | |
595 } | |
596 } | |
2406 | 597 if( FLAG_IS_DEFAULT(UseSSE42Intrinsics) ) { |
598 if( supports_sse4_2() && UseSSE >= 4 ) { | |
599 UseSSE42Intrinsics = true; | |
600 } | |
601 } | |
775
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
602 |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
603 // Use count leading zeros count instruction if available. |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
604 if (supports_lzcnt()) { |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
605 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
606 UseCountLeadingZerosInstruction = true; |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
607 } |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
681
diff
changeset
|
608 } |
2358 | 609 |
3276
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
610 // some defaults for AMD family 15h |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
611 if ( cpu_family() == 0x15 ) { |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
612 // On family 15h processors default is no sw prefetch |
2358 | 613 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { |
614 AllocatePrefetchStyle = 0; | |
615 } | |
3276
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
616 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
617 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
618 AllocatePrefetchInstr = 3; |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
619 } |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
620 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy |
6794 | 621 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { |
3276
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
622 UseXMMForArrayCopy = true; |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
623 } |
6794 | 624 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { |
3276
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
625 UseUnalignedLoadStores = true; |
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
626 } |
2358 | 627 } |
3276
2a34a4fbc52c
7037812: few more defaults changes for new AMD processors
kvn
parents:
2479
diff
changeset
|
628 |
6179
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
629 #ifdef COMPILER2 |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
630 if (MaxVectorSize > 16) { |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
631 // Limit vectors size to 16 bytes on current AMD cpus. |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
632 FLAG_SET_DEFAULT(MaxVectorSize, 16); |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
633 } |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
4961
diff
changeset
|
634 #endif // COMPILER2 |
585 | 635 } |
636 | |
637 if( is_intel() ) { // Intel cpus specific settings | |
638 if( FLAG_IS_DEFAULT(UseStoreImmI16) ) { | |
639 UseStoreImmI16 = false; // don't use it on Intel cpus | |
640 } | |
641 if( cpu_family() == 6 || cpu_family() == 15 ) { | |
642 if( FLAG_IS_DEFAULT(UseAddressNop) ) { | |
643 // Use it on all Intel cpus starting from PentiumPro | |
644 UseAddressNop = true; | |
645 } | |
646 } | |
647 if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { | |
648 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus | |
649 } | |
650 if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { | |
651 if( supports_sse3() ) { | |
652 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus | |
653 } else { | |
654 UseXmmRegToRegMoveAll = false; | |
655 } | |
656 } | |
657 if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus | |
658 #ifdef COMPILER2 | |
659 if( FLAG_IS_DEFAULT(MaxLoopPad) ) { | |
660 // For new Intel cpus do the next optimization: | |
661 // don't align the beginning of a loop if there are enough instructions | |
662 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) | |
663 // in current fetch line (OptoLoopAlignment) or the padding | |
664 // is big (> MaxLoopPad). | |
665 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of | |
666 // generated NOP instructions. 11 is the largest size of one | |
667 // address NOP instruction '0F 1F' (see Assembler::nop(i)). | |
668 MaxLoopPad = 11; | |
669 } | |
670 #endif // COMPILER2 | |
6794 | 671 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { |
585 | 672 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus |
673 } | |
6794 | 674 if (supports_sse4_2() && supports_ht()) { // Newest Intel cpus |
675 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { | |
585 | 676 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus |
677 } | |
678 } | |
6794 | 679 if (supports_sse4_2() && UseSSE >= 4) { |
680 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { | |
681 | 681 UseSSE42Intrinsics = true; |
682 } | |
683 } | |
585 | 684 } |
685 } | |
7638
522c328b8b77
8003878: compiler/7196199 test failed on OS X since 8b54, jdk7u12b01
kvn
parents:
7592
diff
changeset
|
686 #if defined(COMPILER2) && defined(_ALLBSD_SOURCE) |
522c328b8b77
8003878: compiler/7196199 test failed on OS X since 8b54, jdk7u12b01
kvn
parents:
7592
diff
changeset
|
687 if (MaxVectorSize > 16) { |
522c328b8b77
8003878: compiler/7196199 test failed on OS X since 8b54, jdk7u12b01
kvn
parents:
7592
diff
changeset
|
688 // Limit vectors size to 16 bytes on BSD until it fixes |
522c328b8b77
8003878: compiler/7196199 test failed on OS X since 8b54, jdk7u12b01
kvn
parents:
7592
diff
changeset
|
689 // restoring upper 128bit of YMM registers on return |
522c328b8b77
8003878: compiler/7196199 test failed on OS X since 8b54, jdk7u12b01
kvn
parents:
7592
diff
changeset
|
690 // from signal handler. |
522c328b8b77
8003878: compiler/7196199 test failed on OS X since 8b54, jdk7u12b01
kvn
parents:
7592
diff
changeset
|
691 FLAG_SET_DEFAULT(MaxVectorSize, 16); |
522c328b8b77
8003878: compiler/7196199 test failed on OS X since 8b54, jdk7u12b01
kvn
parents:
7592
diff
changeset
|
692 } |
522c328b8b77
8003878: compiler/7196199 test failed on OS X since 8b54, jdk7u12b01
kvn
parents:
7592
diff
changeset
|
693 #endif // COMPILER2 |
585 | 694 |
643
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
695 // Use population count instruction if available. |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
696 if (supports_popcnt()) { |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
697 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
698 UsePopCountInstruction = true; |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
699 } |
4759 | 700 } else if (UsePopCountInstruction) { |
701 warning("POPCNT instruction is not available on this CPU"); | |
702 FLAG_SET_DEFAULT(UsePopCountInstruction, false); | |
643
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
703 } |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
704 |
7474
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7427
diff
changeset
|
705 // Use fast-string operations if available. |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7427
diff
changeset
|
706 if (supports_erms()) { |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7427
diff
changeset
|
707 if (FLAG_IS_DEFAULT(UseFastStosb)) { |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7427
diff
changeset
|
708 UseFastStosb = true; |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7427
diff
changeset
|
709 } |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7427
diff
changeset
|
710 } else if (UseFastStosb) { |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7427
diff
changeset
|
711 warning("fast-string operations are not available on this CPU"); |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7427
diff
changeset
|
712 FLAG_SET_DEFAULT(UseFastStosb, false); |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7427
diff
changeset
|
713 } |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7427
diff
changeset
|
714 |
6794 | 715 #ifdef COMPILER2 |
716 if (FLAG_IS_DEFAULT(AlignVector)) { | |
717 // Modern processors allow misaligned memory operations for vectors. | |
718 AlignVector = !UseUnalignedLoadStores; | |
719 } | |
720 #endif // COMPILER2 | |
721 | |
585 | 722 assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); |
723 assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); | |
724 | |
725 // set valid Prefetch instruction | |
726 if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0; | |
727 if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3; | |
2479 | 728 if( ReadPrefetchInstr == 3 && !supports_3dnow_prefetch() ) ReadPrefetchInstr = 0; |
729 if( !supports_sse() && supports_3dnow_prefetch() ) ReadPrefetchInstr = 3; | |
585 | 730 |
731 if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0; | |
732 if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3; | |
2479 | 733 if( AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch() ) AllocatePrefetchInstr=0; |
734 if( !supports_sse() && supports_3dnow_prefetch() ) AllocatePrefetchInstr = 3; | |
585 | 735 |
736 // Allocation prefetch settings | |
3854 | 737 intx cache_line_size = prefetch_data_size(); |
585 | 738 if( cache_line_size > AllocatePrefetchStepSize ) |
739 AllocatePrefetchStepSize = cache_line_size; | |
3854 | 740 |
585 | 741 assert(AllocatePrefetchLines > 0, "invalid value"); |
3854 | 742 if( AllocatePrefetchLines < 1 ) // set valid value in product VM |
743 AllocatePrefetchLines = 3; | |
744 assert(AllocateInstancePrefetchLines > 0, "invalid value"); | |
745 if( AllocateInstancePrefetchLines < 1 ) // set valid value in product VM | |
746 AllocateInstancePrefetchLines = 1; | |
585 | 747 |
748 AllocatePrefetchDistance = allocate_prefetch_distance(); | |
749 AllocatePrefetchStyle = allocate_prefetch_style(); | |
750 | |
1622 | 751 if( is_intel() && cpu_family() == 6 && supports_sse3() ) { |
752 if( AllocatePrefetchStyle == 2 ) { // watermark prefetching on Core | |
585 | 753 #ifdef _LP64 |
1622 | 754 AllocatePrefetchDistance = 384; |
585 | 755 #else |
1622 | 756 AllocatePrefetchDistance = 320; |
585 | 757 #endif |
1622 | 758 } |
759 if( supports_sse4_2() && supports_ht() ) { // Nehalem based cpus | |
760 AllocatePrefetchDistance = 192; | |
761 AllocatePrefetchLines = 4; | |
1730
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
762 #ifdef COMPILER2 |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
763 if (AggressiveOpts && FLAG_IS_DEFAULT(UseFPUForSpilling)) { |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
764 FLAG_SET_DEFAULT(UseFPUForSpilling, true); |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
765 } |
f55c4f82ab9d
6978249: spill between cpu and fpu registers when those moves are fast
never
parents:
1622
diff
changeset
|
766 #endif |
1622 | 767 } |
585 | 768 } |
769 assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); | |
770 | |
771 #ifdef _LP64 | |
772 // Prefetch settings | |
773 PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes(); | |
774 PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes(); | |
775 PrefetchFieldsAhead = prefetch_fields_ahead(); | |
776 #endif | |
777 | |
7587 | 778 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && |
779 (cache_line_size > ContendedPaddingWidth)) | |
780 ContendedPaddingWidth = cache_line_size; | |
781 | |
585 | 782 #ifndef PRODUCT |
783 if (PrintMiscellaneous && Verbose) { | |
784 tty->print_cr("Logical CPUs per core: %u", | |
785 logical_processors_per_package()); | |
4759 | 786 tty->print("UseSSE=%d",UseSSE); |
787 if (UseAVX > 0) { | |
788 tty->print(" UseAVX=%d",UseAVX); | |
789 } | |
6894 | 790 if (UseAES) { |
791 tty->print(" UseAES=1"); | |
792 } | |
4759 | 793 tty->cr(); |
3854 | 794 tty->print("Allocation"); |
2479 | 795 if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) { |
3854 | 796 tty->print_cr(": no prefetching"); |
585 | 797 } else { |
3854 | 798 tty->print(" prefetching: "); |
2479 | 799 if (UseSSE == 0 && supports_3dnow_prefetch()) { |
585 | 800 tty->print("PREFETCHW"); |
801 } else if (UseSSE >= 1) { | |
802 if (AllocatePrefetchInstr == 0) { | |
803 tty->print("PREFETCHNTA"); | |
804 } else if (AllocatePrefetchInstr == 1) { | |
805 tty->print("PREFETCHT0"); | |
806 } else if (AllocatePrefetchInstr == 2) { | |
807 tty->print("PREFETCHT2"); | |
808 } else if (AllocatePrefetchInstr == 3) { | |
809 tty->print("PREFETCHW"); | |
810 } | |
811 } | |
812 if (AllocatePrefetchLines > 1) { | |
3854 | 813 tty->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); |
585 | 814 } else { |
3854 | 815 tty->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize); |
585 | 816 } |
817 } | |
818 | |
819 if (PrefetchCopyIntervalInBytes > 0) { | |
820 tty->print_cr("PrefetchCopyIntervalInBytes %d", PrefetchCopyIntervalInBytes); | |
821 } | |
822 if (PrefetchScanIntervalInBytes > 0) { | |
823 tty->print_cr("PrefetchScanIntervalInBytes %d", PrefetchScanIntervalInBytes); | |
824 } | |
825 if (PrefetchFieldsAhead > 0) { | |
826 tty->print_cr("PrefetchFieldsAhead %d", PrefetchFieldsAhead); | |
827 } | |
7587 | 828 if (ContendedPaddingWidth > 0) { |
829 tty->print_cr("ContendedPaddingWidth %d", ContendedPaddingWidth); | |
830 } | |
585 | 831 } |
832 #endif // !PRODUCT | |
833 } | |
834 | |
835 void VM_Version::initialize() { | |
836 ResourceMark rm; | |
837 // Making this stub must be FIRST use of assembler | |
838 | |
839 stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size); | |
840 if (stub_blob == NULL) { | |
841 vm_exit_during_initialization("Unable to allocate getPsrInfo_stub"); | |
842 } | |
1748 | 843 CodeBuffer c(stub_blob); |
585 | 844 VM_Version_StubGenerator g(&c); |
845 getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t, | |
846 g.generate_getPsrInfo()); | |
847 | |
848 get_processor_features(); | |
849 } |