Mercurial > hg > truffle
annotate src/cpu/x86/vm/vm_version_x86.hpp @ 15388:769fc3629f59
Add phase FlowSensitiveReductionPhase.
It is possible to remove GuardingPiNodes, CheckCastNodes, and FixedGuards during
HighTier under certain conditions (control-flow sensitive conditions).
The phase added in this commit (FlowSensitiveReductionPhase) does that,
and in addition replaces usages with "downcasting" PiNodes when possible
thus resulting in more precise object stamps (e.g., non-null).
Finally, usages of floating, side-effects free, expressions are also simplified
(as per control-flow sensitive conditions).
The newly added phase runs only during HighTier and can be deactivated
using Graal option FlowSensitiveReduction (it is active by default).
author | Miguel Garcia <miguel.m.garcia@oracle.com> |
---|---|
date | Fri, 25 Apr 2014 16:50:52 +0200 |
parents | eefb0224149d |
children | b51e29501f30 52b4284cb496 |
rev | line source |
---|---|
585 | 1 /* |
11080
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7474
diff
changeset
|
2 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. |
585 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
775
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
775
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
775
diff
changeset
|
21 * questions. |
585 | 22 * |
23 */ | |
24 | |
1972 | 25 #ifndef CPU_X86_VM_VM_VERSION_X86_HPP |
26 #define CPU_X86_VM_VM_VERSION_X86_HPP | |
27 | |
28 #include "runtime/globals_extension.hpp" | |
29 #include "runtime/vm_version.hpp" | |
30 | |
585 | 31 class VM_Version : public Abstract_VM_Version { |
13517
eefb0224149d
derive CPUFeatures from VM_Version::_cpuFeatures
Tom Rodriguez <tom.rodriguez@oracle.com>
parents:
11173
diff
changeset
|
32 friend class VMStructs; |
585 | 33 public: |
34 // cpuid result register layouts. These are all unions of a uint32_t | |
35 // (in case anyone wants access to the register as a whole) and a bitfield. | |
36 | |
37 union StdCpuid1Eax { | |
38 uint32_t value; | |
39 struct { | |
40 uint32_t stepping : 4, | |
41 model : 4, | |
42 family : 4, | |
43 proc_type : 2, | |
44 : 2, | |
45 ext_model : 4, | |
46 ext_family : 8, | |
47 : 4; | |
48 } bits; | |
49 }; | |
50 | |
51 union StdCpuid1Ebx { // example, unused | |
52 uint32_t value; | |
53 struct { | |
54 uint32_t brand_id : 8, | |
55 clflush_size : 8, | |
56 threads_per_cpu : 8, | |
57 apic_id : 8; | |
58 } bits; | |
59 }; | |
60 | |
61 union StdCpuid1Ecx { | |
62 uint32_t value; | |
63 struct { | |
64 uint32_t sse3 : 1, | |
11080
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7474
diff
changeset
|
65 clmul : 1, |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7474
diff
changeset
|
66 : 1, |
585 | 67 monitor : 1, |
68 : 1, | |
69 vmx : 1, | |
70 : 1, | |
71 est : 1, | |
72 : 1, | |
73 ssse3 : 1, | |
74 cid : 1, | |
75 : 2, | |
76 cmpxchg16: 1, | |
77 : 4, | |
78 dca : 1, | |
79 sse4_1 : 1, | |
80 sse4_2 : 1, | |
643
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
81 : 2, |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
82 popcnt : 1, |
6894 | 83 : 1, |
84 aes : 1, | |
85 : 1, | |
4759 | 86 osxsave : 1, |
87 avx : 1, | |
88 : 3; | |
585 | 89 } bits; |
90 }; | |
91 | |
92 union StdCpuid1Edx { | |
93 uint32_t value; | |
94 struct { | |
95 uint32_t : 4, | |
96 tsc : 1, | |
97 : 3, | |
98 cmpxchg8 : 1, | |
99 : 6, | |
100 cmov : 1, | |
3787
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
2479
diff
changeset
|
101 : 3, |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
2479
diff
changeset
|
102 clflush : 1, |
6ae7a1561b53
6990015: Incorrect Icache line size is used for 64 bit x86
kvn
parents:
2479
diff
changeset
|
103 : 3, |
585 | 104 mmx : 1, |
105 fxsr : 1, | |
106 sse : 1, | |
107 sse2 : 1, | |
108 : 1, | |
109 ht : 1, | |
110 : 3; | |
111 } bits; | |
112 }; | |
113 | |
114 union DcpCpuid4Eax { | |
115 uint32_t value; | |
116 struct { | |
117 uint32_t cache_type : 5, | |
118 : 21, | |
119 cores_per_cpu : 6; | |
120 } bits; | |
121 }; | |
122 | |
123 union DcpCpuid4Ebx { | |
124 uint32_t value; | |
125 struct { | |
126 uint32_t L1_line_size : 12, | |
127 partitions : 10, | |
128 associativity : 10; | |
129 } bits; | |
130 }; | |
131 | |
1622 | 132 union TplCpuidBEbx { |
133 uint32_t value; | |
134 struct { | |
135 uint32_t logical_cpus : 16, | |
136 : 16; | |
137 } bits; | |
138 }; | |
139 | |
585 | 140 union ExtCpuid1Ecx { |
141 uint32_t value; | |
142 struct { | |
143 uint32_t LahfSahf : 1, | |
144 CmpLegacy : 1, | |
145 : 4, | |
775
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
146 lzcnt : 1, |
585 | 147 sse4a : 1, |
148 misalignsse : 1, | |
149 prefetchw : 1, | |
150 : 22; | |
151 } bits; | |
152 }; | |
153 | |
154 union ExtCpuid1Edx { | |
155 uint32_t value; | |
156 struct { | |
157 uint32_t : 22, | |
158 mmx_amd : 1, | |
159 mmx : 1, | |
160 fxsr : 1, | |
161 : 4, | |
162 long_mode : 1, | |
163 tdnow2 : 1, | |
164 tdnow : 1; | |
165 } bits; | |
166 }; | |
167 | |
168 union ExtCpuid5Ex { | |
169 uint32_t value; | |
170 struct { | |
171 uint32_t L1_line_size : 8, | |
172 L1_tag_lines : 8, | |
173 L1_assoc : 8, | |
174 L1_size : 8; | |
175 } bits; | |
176 }; | |
177 | |
4771 | 178 union ExtCpuid7Edx { |
179 uint32_t value; | |
180 struct { | |
181 uint32_t : 8, | |
182 tsc_invariance : 1, | |
183 : 23; | |
184 } bits; | |
185 }; | |
186 | |
585 | 187 union ExtCpuid8Ecx { |
188 uint32_t value; | |
189 struct { | |
190 uint32_t cores_per_cpu : 8, | |
191 : 24; | |
192 } bits; | |
193 }; | |
194 | |
4759 | 195 union SefCpuid7Eax { |
196 uint32_t value; | |
197 }; | |
198 | |
199 union SefCpuid7Ebx { | |
200 uint32_t value; | |
201 struct { | |
202 uint32_t fsgsbase : 1, | |
203 : 2, | |
204 bmi1 : 1, | |
205 : 1, | |
206 avx2 : 1, | |
207 : 2, | |
208 bmi2 : 1, | |
7474
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
6894
diff
changeset
|
209 erms : 1, |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
6894
diff
changeset
|
210 : 22; |
4759 | 211 } bits; |
212 }; | |
213 | |
214 union XemXcr0Eax { | |
215 uint32_t value; | |
216 struct { | |
217 uint32_t x87 : 1, | |
218 sse : 1, | |
219 ymm : 1, | |
220 : 29; | |
221 } bits; | |
222 }; | |
223 | |
585 | 224 protected: |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
225 static int _cpu; |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
226 static int _model; |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
227 static int _stepping; |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
228 static int _cpuFeatures; // features returned by the "cpuid" instruction |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
229 // 0 if this instruction is not available |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
230 static const char* _features_str; |
585 | 231 |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
232 enum { |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
233 CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX) |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
234 CPU_CMOV = (1 << 1), |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
235 CPU_FXSR = (1 << 2), |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
236 CPU_HT = (1 << 3), |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
237 CPU_MMX = (1 << 4), |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
238 CPU_3DNOW_PREFETCH = (1 << 5), // Processor supports 3dnow prefetch and prefetchw instructions |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
239 // may not necessarily support other 3dnow instructions |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
240 CPU_SSE = (1 << 6), |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
241 CPU_SSE2 = (1 << 7), |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
242 CPU_SSE3 = (1 << 8), // SSE3 comes from cpuid 1 (ECX) |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
243 CPU_SSSE3 = (1 << 9), |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
244 CPU_SSE4A = (1 << 10), |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
245 CPU_SSE4_1 = (1 << 11), |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
246 CPU_SSE4_2 = (1 << 12), |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
247 CPU_POPCNT = (1 << 13), |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
248 CPU_LZCNT = (1 << 14), |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
249 CPU_TSC = (1 << 15), |
4771 | 250 CPU_TSCINV = (1 << 16), |
251 CPU_AVX = (1 << 17), | |
6894 | 252 CPU_AVX2 = (1 << 18), |
7474
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
6894
diff
changeset
|
253 CPU_AES = (1 << 19), |
11080
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7474
diff
changeset
|
254 CPU_ERMS = (1 << 20), // enhanced 'rep movsb/stosb' instructions |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7474
diff
changeset
|
255 CPU_CLMUL = (1 << 21) // carryless multiply for CRC |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
256 } cpuFeatureFlags; |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
257 |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
258 enum { |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
259 // AMD |
4933
4a24c4f648bd
7142113: Add Ivy Bridge to the known Intel x86 cpu families
phh
parents:
4771
diff
changeset
|
260 CPU_FAMILY_AMD_11H = 0x11, |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
261 // Intel |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
262 CPU_FAMILY_INTEL_CORE = 6, |
4933
4a24c4f648bd
7142113: Add Ivy Bridge to the known Intel x86 cpu families
phh
parents:
4771
diff
changeset
|
263 CPU_MODEL_NEHALEM = 0x1e, |
4a24c4f648bd
7142113: Add Ivy Bridge to the known Intel x86 cpu families
phh
parents:
4771
diff
changeset
|
264 CPU_MODEL_NEHALEM_EP = 0x1a, |
4a24c4f648bd
7142113: Add Ivy Bridge to the known Intel x86 cpu families
phh
parents:
4771
diff
changeset
|
265 CPU_MODEL_NEHALEM_EX = 0x2e, |
4a24c4f648bd
7142113: Add Ivy Bridge to the known Intel x86 cpu families
phh
parents:
4771
diff
changeset
|
266 CPU_MODEL_WESTMERE = 0x25, |
4a24c4f648bd
7142113: Add Ivy Bridge to the known Intel x86 cpu families
phh
parents:
4771
diff
changeset
|
267 CPU_MODEL_WESTMERE_EP = 0x2c, |
4a24c4f648bd
7142113: Add Ivy Bridge to the known Intel x86 cpu families
phh
parents:
4771
diff
changeset
|
268 CPU_MODEL_WESTMERE_EX = 0x2f, |
4a24c4f648bd
7142113: Add Ivy Bridge to the known Intel x86 cpu families
phh
parents:
4771
diff
changeset
|
269 CPU_MODEL_SANDYBRIDGE = 0x2a, |
4a24c4f648bd
7142113: Add Ivy Bridge to the known Intel x86 cpu families
phh
parents:
4771
diff
changeset
|
270 CPU_MODEL_SANDYBRIDGE_EP = 0x2d, |
4a24c4f648bd
7142113: Add Ivy Bridge to the known Intel x86 cpu families
phh
parents:
4771
diff
changeset
|
271 CPU_MODEL_IVYBRIDGE_EP = 0x3a |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
272 } cpuExtendedFamily; |
585 | 273 |
274 // cpuid information block. All info derived from executing cpuid with | |
275 // various function numbers is stored here. Intel and AMD info is | |
276 // merged in this block: accessor methods disentangle it. | |
277 // | |
278 // The info block is laid out in subblocks of 4 dwords corresponding to | |
279 // eax, ebx, ecx and edx, whether or not they contain anything useful. | |
280 struct CpuidInfo { | |
281 // cpuid function 0 | |
282 uint32_t std_max_function; | |
283 uint32_t std_vendor_name_0; | |
284 uint32_t std_vendor_name_1; | |
285 uint32_t std_vendor_name_2; | |
286 | |
287 // cpuid function 1 | |
288 StdCpuid1Eax std_cpuid1_eax; | |
289 StdCpuid1Ebx std_cpuid1_ebx; | |
290 StdCpuid1Ecx std_cpuid1_ecx; | |
291 StdCpuid1Edx std_cpuid1_edx; | |
292 | |
293 // cpuid function 4 (deterministic cache parameters) | |
294 DcpCpuid4Eax dcp_cpuid4_eax; | |
295 DcpCpuid4Ebx dcp_cpuid4_ebx; | |
296 uint32_t dcp_cpuid4_ecx; // unused currently | |
297 uint32_t dcp_cpuid4_edx; // unused currently | |
298 | |
4759 | 299 // cpuid function 7 (structured extended features) |
300 SefCpuid7Eax sef_cpuid7_eax; | |
301 SefCpuid7Ebx sef_cpuid7_ebx; | |
302 uint32_t sef_cpuid7_ecx; // unused currently | |
303 uint32_t sef_cpuid7_edx; // unused currently | |
304 | |
1622 | 305 // cpuid function 0xB (processor topology) |
306 // ecx = 0 | |
307 uint32_t tpl_cpuidB0_eax; | |
308 TplCpuidBEbx tpl_cpuidB0_ebx; | |
309 uint32_t tpl_cpuidB0_ecx; // unused currently | |
310 uint32_t tpl_cpuidB0_edx; // unused currently | |
311 | |
312 // ecx = 1 | |
313 uint32_t tpl_cpuidB1_eax; | |
314 TplCpuidBEbx tpl_cpuidB1_ebx; | |
315 uint32_t tpl_cpuidB1_ecx; // unused currently | |
316 uint32_t tpl_cpuidB1_edx; // unused currently | |
317 | |
318 // ecx = 2 | |
319 uint32_t tpl_cpuidB2_eax; | |
320 TplCpuidBEbx tpl_cpuidB2_ebx; | |
321 uint32_t tpl_cpuidB2_ecx; // unused currently | |
322 uint32_t tpl_cpuidB2_edx; // unused currently | |
323 | |
585 | 324 // cpuid function 0x80000000 // example, unused |
325 uint32_t ext_max_function; | |
326 uint32_t ext_vendor_name_0; | |
327 uint32_t ext_vendor_name_1; | |
328 uint32_t ext_vendor_name_2; | |
329 | |
330 // cpuid function 0x80000001 | |
331 uint32_t ext_cpuid1_eax; // reserved | |
332 uint32_t ext_cpuid1_ebx; // reserved | |
333 ExtCpuid1Ecx ext_cpuid1_ecx; | |
334 ExtCpuid1Edx ext_cpuid1_edx; | |
335 | |
336 // cpuid functions 0x80000002 thru 0x80000004: example, unused | |
337 uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3; | |
338 uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7; | |
339 uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11; | |
340 | |
4933
4a24c4f648bd
7142113: Add Ivy Bridge to the known Intel x86 cpu families
phh
parents:
4771
diff
changeset
|
341 // cpuid function 0x80000005 // AMD L1, Intel reserved |
585 | 342 uint32_t ext_cpuid5_eax; // unused currently |
343 uint32_t ext_cpuid5_ebx; // reserved | |
344 ExtCpuid5Ex ext_cpuid5_ecx; // L1 data cache info (AMD) | |
345 ExtCpuid5Ex ext_cpuid5_edx; // L1 instruction cache info (AMD) | |
346 | |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
347 // cpuid function 0x80000007 |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
348 uint32_t ext_cpuid7_eax; // reserved |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
349 uint32_t ext_cpuid7_ebx; // reserved |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
350 uint32_t ext_cpuid7_ecx; // reserved |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
351 ExtCpuid7Edx ext_cpuid7_edx; // tscinv |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
352 |
585 | 353 // cpuid function 0x80000008 |
354 uint32_t ext_cpuid8_eax; // unused currently | |
355 uint32_t ext_cpuid8_ebx; // reserved | |
356 ExtCpuid8Ecx ext_cpuid8_ecx; | |
357 uint32_t ext_cpuid8_edx; // reserved | |
4759 | 358 |
359 // extended control register XCR0 (the XFEATURE_ENABLED_MASK register) | |
360 XemXcr0Eax xem_xcr0_eax; | |
361 uint32_t xem_xcr0_edx; // reserved | |
585 | 362 }; |
363 | |
364 // The actual cpuid info block | |
365 static CpuidInfo _cpuid_info; | |
366 | |
367 // Extractors and predicates | |
368 static uint32_t extended_cpu_family() { | |
369 uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family; | |
370 result += _cpuid_info.std_cpuid1_eax.bits.ext_family; | |
371 return result; | |
372 } | |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
373 |
585 | 374 static uint32_t extended_cpu_model() { |
375 uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model; | |
376 result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4; | |
377 return result; | |
378 } | |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
379 |
585 | 380 static uint32_t cpu_stepping() { |
381 uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping; | |
382 return result; | |
383 } | |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
384 |
585 | 385 static uint logical_processor_count() { |
386 uint result = threads_per_core(); | |
387 return result; | |
388 } | |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
389 |
585 | 390 static uint32_t feature_flags() { |
391 uint32_t result = 0; | |
392 if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0) | |
393 result |= CPU_CX8; | |
394 if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0) | |
395 result |= CPU_CMOV; | |
1789
a83b0246bb77
6934483: GCC 4.5 errors "suggest parentheses around something..." when compiling with -Werror and -Wall
twisti
parents:
1647
diff
changeset
|
396 if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || (is_amd() && |
a83b0246bb77
6934483: GCC 4.5 errors "suggest parentheses around something..." when compiling with -Werror and -Wall
twisti
parents:
1647
diff
changeset
|
397 _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0)) |
585 | 398 result |= CPU_FXSR; |
399 // HT flag is set for multi-core processors also. | |
400 if (threads_per_core() > 1) | |
401 result |= CPU_HT; | |
1789
a83b0246bb77
6934483: GCC 4.5 errors "suggest parentheses around something..." when compiling with -Werror and -Wall
twisti
parents:
1647
diff
changeset
|
402 if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || (is_amd() && |
a83b0246bb77
6934483: GCC 4.5 errors "suggest parentheses around something..." when compiling with -Werror and -Wall
twisti
parents:
1647
diff
changeset
|
403 _cpuid_info.ext_cpuid1_edx.bits.mmx != 0)) |
585 | 404 result |= CPU_MMX; |
405 if (_cpuid_info.std_cpuid1_edx.bits.sse != 0) | |
406 result |= CPU_SSE; | |
407 if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0) | |
408 result |= CPU_SSE2; | |
409 if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0) | |
410 result |= CPU_SSE3; | |
411 if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0) | |
412 result |= CPU_SSSE3; | |
413 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) | |
414 result |= CPU_SSE4_1; | |
415 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) | |
416 result |= CPU_SSE4_2; | |
643
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
417 if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0) |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
418 result |= CPU_POPCNT; |
4759 | 419 if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 && |
420 _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 && | |
421 _cpuid_info.xem_xcr0_eax.bits.sse != 0 && | |
422 _cpuid_info.xem_xcr0_eax.bits.ymm != 0) { | |
423 result |= CPU_AVX; | |
424 if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0) | |
425 result |= CPU_AVX2; | |
426 } | |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
427 if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0) |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
428 result |= CPU_TSC; |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
429 if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0) |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
430 result |= CPU_TSCINV; |
6894 | 431 if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0) |
432 result |= CPU_AES; | |
7474
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
6894
diff
changeset
|
433 if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0) |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
6894
diff
changeset
|
434 result |= CPU_ERMS; |
11080
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7474
diff
changeset
|
435 if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0) |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7474
diff
changeset
|
436 result |= CPU_CLMUL; |
775
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
437 |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
438 // AMD features. |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
439 if (is_amd()) { |
2479 | 440 if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) || |
441 (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0)) | |
442 result |= CPU_3DNOW_PREFETCH; | |
775
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
443 if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
444 result |= CPU_LZCNT; |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
445 if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
446 result |= CPU_SSE4A; |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
447 } |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
448 |
585 | 449 return result; |
450 } | |
451 | |
452 static void get_processor_features(); | |
453 | |
454 public: | |
455 // Offsets for cpuid asm stub | |
456 static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } | |
457 static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); } | |
458 static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); } | |
4759 | 459 static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax); } |
585 | 460 static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); } |
461 static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); } | |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
462 static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax); } |
585 | 463 static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); } |
1622 | 464 static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); } |
465 static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); } | |
466 static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); } | |
4759 | 467 static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); } |
585 | 468 |
469 // Initialization | |
470 static void initialize(); | |
471 | |
472 // Asserts | |
473 static void assert_is_initialized() { | |
474 assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized"); | |
475 } | |
476 | |
477 // | |
478 // Processor family: | |
479 // 3 - 386 | |
480 // 4 - 486 | |
481 // 5 - Pentium | |
482 // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon, | |
483 // Pentium M, Core Solo, Core Duo, Core2 Duo | |
484 // family 6 model: 9, 13, 14, 15 | |
485 // 0x0f - Pentium 4, Opteron | |
486 // | |
487 // Note: The cpu family should be used to select between | |
488 // instruction sequences which are valid on all Intel | |
489 // processors. Use the feature test functions below to | |
490 // determine whether a particular instruction is supported. | |
491 // | |
492 static int cpu_family() { return _cpu;} | |
493 static bool is_P6() { return cpu_family() >= 6; } | |
494 static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA' | |
495 static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG' | |
496 | |
1647 | 497 static bool supports_processor_topology() { |
498 return (_cpuid_info.std_max_function >= 0xB) && | |
499 // eax[4:0] | ebx[0:15] == 0 indicates invalid topology level. | |
500 // Some cpus have max cpuid >= 0xB but do not support processor topology. | |
7474
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
6894
diff
changeset
|
501 (((_cpuid_info.tpl_cpuidB0_eax & 0x1f) | _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus) != 0); |
1647 | 502 } |
503 | |
585 | 504 static uint cores_per_cpu() { |
505 uint result = 1; | |
506 if (is_intel()) { | |
1647 | 507 if (supports_processor_topology()) { |
1622 | 508 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / |
509 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; | |
510 } else { | |
511 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); | |
512 } | |
585 | 513 } else if (is_amd()) { |
514 result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); | |
515 } | |
516 return result; | |
517 } | |
518 | |
519 static uint threads_per_core() { | |
520 uint result = 1; | |
1647 | 521 if (is_intel() && supports_processor_topology()) { |
1622 | 522 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; |
523 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { | |
585 | 524 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / |
525 cores_per_cpu(); | |
526 } | |
527 return result; | |
528 } | |
529 | |
3854 | 530 static intx prefetch_data_size() { |
585 | 531 intx result = 0; |
532 if (is_intel()) { | |
533 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); | |
534 } else if (is_amd()) { | |
535 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; | |
536 } | |
537 if (result < 32) // not defined ? | |
538 result = 32; // 32 bytes by default on x86 and other x64 | |
539 return result; | |
540 } | |
541 | |
542 // | |
543 // Feature identification | |
544 // | |
545 static bool supports_cpuid() { return _cpuFeatures != 0; } | |
546 static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; } | |
547 static bool supports_cmov() { return (_cpuFeatures & CPU_CMOV) != 0; } | |
548 static bool supports_fxsr() { return (_cpuFeatures & CPU_FXSR) != 0; } | |
549 static bool supports_ht() { return (_cpuFeatures & CPU_HT) != 0; } | |
550 static bool supports_mmx() { return (_cpuFeatures & CPU_MMX) != 0; } | |
551 static bool supports_sse() { return (_cpuFeatures & CPU_SSE) != 0; } | |
552 static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; } | |
553 static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } | |
554 static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } | |
555 static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } | |
556 static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } | |
643
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
557 static bool supports_popcnt() { return (_cpuFeatures & CPU_POPCNT) != 0; } |
4759 | 558 static bool supports_avx() { return (_cpuFeatures & CPU_AVX) != 0; } |
559 static bool supports_avx2() { return (_cpuFeatures & CPU_AVX2) != 0; } | |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
560 static bool supports_tsc() { return (_cpuFeatures & CPU_TSC) != 0; } |
6894 | 561 static bool supports_aes() { return (_cpuFeatures & CPU_AES) != 0; } |
7474
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
6894
diff
changeset
|
562 static bool supports_erms() { return (_cpuFeatures & CPU_ERMS) != 0; } |
11080
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7474
diff
changeset
|
563 static bool supports_clmul() { return (_cpuFeatures & CPU_CLMUL) != 0; } |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
564 |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
565 // Intel features |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
566 static bool is_intel_family_core() { return is_intel() && |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
567 extended_cpu_family() == CPU_FAMILY_INTEL_CORE; } |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
568 |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
569 static bool is_intel_tsc_synched_at_init() { |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
570 if (is_intel_family_core()) { |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
571 uint32_t ext_model = extended_cpu_model(); |
4933
4a24c4f648bd
7142113: Add Ivy Bridge to the known Intel x86 cpu families
phh
parents:
4771
diff
changeset
|
572 if (ext_model == CPU_MODEL_NEHALEM_EP || |
4a24c4f648bd
7142113: Add Ivy Bridge to the known Intel x86 cpu families
phh
parents:
4771
diff
changeset
|
573 ext_model == CPU_MODEL_WESTMERE_EP || |
4a24c4f648bd
7142113: Add Ivy Bridge to the known Intel x86 cpu families
phh
parents:
4771
diff
changeset
|
574 ext_model == CPU_MODEL_SANDYBRIDGE_EP || |
4a24c4f648bd
7142113: Add Ivy Bridge to the known Intel x86 cpu families
phh
parents:
4771
diff
changeset
|
575 ext_model == CPU_MODEL_IVYBRIDGE_EP) { |
4a24c4f648bd
7142113: Add Ivy Bridge to the known Intel x86 cpu families
phh
parents:
4771
diff
changeset
|
576 // <= 2-socket invariant tsc support. EX versions are usually used |
4a24c4f648bd
7142113: Add Ivy Bridge to the known Intel x86 cpu families
phh
parents:
4771
diff
changeset
|
577 // in > 2-socket systems and likely don't synchronize tscs at |
4a24c4f648bd
7142113: Add Ivy Bridge to the known Intel x86 cpu families
phh
parents:
4771
diff
changeset
|
578 // initialization. |
4a24c4f648bd
7142113: Add Ivy Bridge to the known Intel x86 cpu families
phh
parents:
4771
diff
changeset
|
579 // Code that uses tsc values must be prepared for them to arbitrarily |
4a24c4f648bd
7142113: Add Ivy Bridge to the known Intel x86 cpu families
phh
parents:
4771
diff
changeset
|
580 // jump forward or backward. |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
581 return true; |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
582 } |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
583 } |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
584 return false; |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
585 } |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
586 |
585 | 587 // AMD features |
2479 | 588 static bool supports_3dnow_prefetch() { return (_cpuFeatures & CPU_3DNOW_PREFETCH) != 0; } |
585 | 589 static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; } |
775
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
590 static bool supports_lzcnt() { return (_cpuFeatures & CPU_LZCNT) != 0; } |
585 | 591 static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } |
592 | |
4749
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
593 static bool is_amd_Barcelona() { return is_amd() && |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
594 extended_cpu_family() == CPU_FAMILY_AMD_11H; } |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
595 |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
596 // Intel and AMD newer cores support fast timestamps well |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
597 static bool supports_tscinv_bit() { |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
598 return (_cpuFeatures & CPU_TSCINV) != 0; |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
599 } |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
600 static bool supports_tscinv() { |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
601 return supports_tscinv_bit() && |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
602 ( (is_amd() && !is_amd_Barcelona()) || |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
603 is_intel_tsc_synched_at_init() ); |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
604 } |
7ab5f6318694
7125934: Add a fast unordered timestamp capability to Hotspot on x86/x64
phh
parents:
3865
diff
changeset
|
605 |
1914
ae065c367d93
6987135: Performance regression on Intel platform with 32-bits edition between 6u13 and 6u14.
kvn
parents:
1789
diff
changeset
|
606 // Intel Core and newer cpus have fast IDIV instruction (excluding Atom). |
ae065c367d93
6987135: Performance regression on Intel platform with 32-bits edition between 6u13 and 6u14.
kvn
parents:
1789
diff
changeset
|
607 static bool has_fast_idiv() { return is_intel() && cpu_family() == 6 && |
ae065c367d93
6987135: Performance regression on Intel platform with 32-bits edition between 6u13 and 6u14.
kvn
parents:
1789
diff
changeset
|
608 supports_sse3() && _model != 0x1C; } |
ae065c367d93
6987135: Performance regression on Intel platform with 32-bits edition between 6u13 and 6u14.
kvn
parents:
1789
diff
changeset
|
609 |
585 | 610 static bool supports_compare_and_exchange() { return true; } |
611 | |
612 static const char* cpu_features() { return _features_str; } | |
613 | |
614 static intx allocate_prefetch_distance() { | |
615 // This method should be called before allocate_prefetch_style(). | |
616 // | |
617 // Hardware prefetching (distance/size in bytes): | |
618 // Pentium 3 - 64 / 32 | |
619 // Pentium 4 - 256 / 128 | |
620 // Athlon - 64 / 32 ???? | |
621 // Opteron - 128 / 64 only when 2 sequential cache lines accessed | |
622 // Core - 128 / 64 | |
623 // | |
624 // Software prefetching (distance in bytes / instruction with best score): | |
625 // Pentium 3 - 128 / prefetchnta | |
626 // Pentium 4 - 512 / prefetchnta | |
627 // Athlon - 128 / prefetchnta | |
628 // Opteron - 256 / prefetchnta | |
629 // Core - 256 / prefetchnta | |
630 // It will be used only when AllocatePrefetchStyle > 0 | |
631 | |
632 intx count = AllocatePrefetchDistance; | |
633 if (count < 0) { // default ? | |
634 if (is_amd()) { // AMD | |
635 if (supports_sse2()) | |
636 count = 256; // Opteron | |
637 else | |
638 count = 128; // Athlon | |
639 } else { // Intel | |
640 if (supports_sse2()) | |
641 if (cpu_family() == 6) { | |
642 count = 256; // Pentium M, Core, Core2 | |
643 } else { | |
644 count = 512; // Pentium 4 | |
645 } | |
646 else | |
647 count = 128; // Pentium 3 (and all other old CPUs) | |
648 } | |
649 } | |
650 return count; | |
651 } | |
652 static intx allocate_prefetch_style() { | |
653 assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); | |
654 // Return 0 if AllocatePrefetchDistance was not defined. | |
655 return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0; | |
656 } | |
657 | |
658 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from | |
659 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. | |
660 // Tested intervals from 128 to 2048 in increments of 64 == one cache line. | |
661 // 256 bytes (4 dcache lines) was the nearest runner-up to 576. | |
662 | |
663 // gc copy/scan is disabled if prefetchw isn't supported, because | |
664 // Prefetch::write emits an inlined prefetchw on Linux. | |
665 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. | |
666 // The used prefetcht0 instruction works for both amd64 and em64t. | |
667 static intx prefetch_copy_interval_in_bytes() { | |
668 intx interval = PrefetchCopyIntervalInBytes; | |
669 return interval >= 0 ? interval : 576; | |
670 } | |
671 static intx prefetch_scan_interval_in_bytes() { | |
672 intx interval = PrefetchScanIntervalInBytes; | |
673 return interval >= 0 ? interval : 576; | |
674 } | |
675 static intx prefetch_fields_ahead() { | |
676 intx count = PrefetchFieldsAhead; | |
677 return count >= 0 ? count : 1; | |
678 } | |
679 }; | |
1972 | 680 |
681 #endif // CPU_X86_VM_VM_VERSION_X86_HPP |