Mercurial > hg > truffle
annotate src/cpu/x86/vm/vm_version_x86.hpp @ 3096:8073f5ad1d87
IdealGraphVisualizer: Rename predecessors to "Nodes Above" and successors to "Nodes Below" and actions "Expand Predecessors" and "Expand Successors" to "Expand Above" and "Expand Below" to avoid ambiguity with the Graal concept of successors and predecessors
author | Peter Hofer <peter.hofer@jku.at> |
---|---|
date | Wed, 29 Jun 2011 18:27:14 +0200 |
parents | 15c9a0e16269 |
children | fe189d4a44e9 6ae7a1561b53 |
rev | line source |
---|---|
585 | 1 /* |
1622 | 2 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All Rights Reserved. |
585 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
775
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
775
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
775
diff
changeset
|
21 * questions. |
585 | 22 * |
23 */ | |
24 | |
1972 | 25 #ifndef CPU_X86_VM_VM_VERSION_X86_HPP |
26 #define CPU_X86_VM_VM_VERSION_X86_HPP | |
27 | |
28 #include "runtime/globals_extension.hpp" | |
29 #include "runtime/vm_version.hpp" | |
30 | |
585 | 31 class VM_Version : public Abstract_VM_Version { |
32 public: | |
33 // cpuid result register layouts. These are all unions of a uint32_t | |
34 // (in case anyone wants access to the register as a whole) and a bitfield. | |
35 | |
36 union StdCpuid1Eax { | |
37 uint32_t value; | |
38 struct { | |
39 uint32_t stepping : 4, | |
40 model : 4, | |
41 family : 4, | |
42 proc_type : 2, | |
43 : 2, | |
44 ext_model : 4, | |
45 ext_family : 8, | |
46 : 4; | |
47 } bits; | |
48 }; | |
49 | |
50 union StdCpuid1Ebx { // example, unused | |
51 uint32_t value; | |
52 struct { | |
53 uint32_t brand_id : 8, | |
54 clflush_size : 8, | |
55 threads_per_cpu : 8, | |
56 apic_id : 8; | |
57 } bits; | |
58 }; | |
59 | |
60 union StdCpuid1Ecx { | |
61 uint32_t value; | |
62 struct { | |
63 uint32_t sse3 : 1, | |
64 : 2, | |
65 monitor : 1, | |
66 : 1, | |
67 vmx : 1, | |
68 : 1, | |
69 est : 1, | |
70 : 1, | |
71 ssse3 : 1, | |
72 cid : 1, | |
73 : 2, | |
74 cmpxchg16: 1, | |
75 : 4, | |
76 dca : 1, | |
77 sse4_1 : 1, | |
78 sse4_2 : 1, | |
643
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
79 : 2, |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
80 popcnt : 1, |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
81 : 8; |
585 | 82 } bits; |
83 }; | |
84 | |
85 union StdCpuid1Edx { | |
86 uint32_t value; | |
87 struct { | |
88 uint32_t : 4, | |
89 tsc : 1, | |
90 : 3, | |
91 cmpxchg8 : 1, | |
92 : 6, | |
93 cmov : 1, | |
94 : 7, | |
95 mmx : 1, | |
96 fxsr : 1, | |
97 sse : 1, | |
98 sse2 : 1, | |
99 : 1, | |
100 ht : 1, | |
101 : 3; | |
102 } bits; | |
103 }; | |
104 | |
105 union DcpCpuid4Eax { | |
106 uint32_t value; | |
107 struct { | |
108 uint32_t cache_type : 5, | |
109 : 21, | |
110 cores_per_cpu : 6; | |
111 } bits; | |
112 }; | |
113 | |
114 union DcpCpuid4Ebx { | |
115 uint32_t value; | |
116 struct { | |
117 uint32_t L1_line_size : 12, | |
118 partitions : 10, | |
119 associativity : 10; | |
120 } bits; | |
121 }; | |
122 | |
1622 | 123 union TplCpuidBEbx { |
124 uint32_t value; | |
125 struct { | |
126 uint32_t logical_cpus : 16, | |
127 : 16; | |
128 } bits; | |
129 }; | |
130 | |
585 | 131 union ExtCpuid1Ecx { |
132 uint32_t value; | |
133 struct { | |
134 uint32_t LahfSahf : 1, | |
135 CmpLegacy : 1, | |
136 : 4, | |
775
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
137 lzcnt : 1, |
585 | 138 sse4a : 1, |
139 misalignsse : 1, | |
140 prefetchw : 1, | |
141 : 22; | |
142 } bits; | |
143 }; | |
144 | |
145 union ExtCpuid1Edx { | |
146 uint32_t value; | |
147 struct { | |
148 uint32_t : 22, | |
149 mmx_amd : 1, | |
150 mmx : 1, | |
151 fxsr : 1, | |
152 : 4, | |
153 long_mode : 1, | |
154 tdnow2 : 1, | |
155 tdnow : 1; | |
156 } bits; | |
157 }; | |
158 | |
159 union ExtCpuid5Ex { | |
160 uint32_t value; | |
161 struct { | |
162 uint32_t L1_line_size : 8, | |
163 L1_tag_lines : 8, | |
164 L1_assoc : 8, | |
165 L1_size : 8; | |
166 } bits; | |
167 }; | |
168 | |
169 union ExtCpuid8Ecx { | |
170 uint32_t value; | |
171 struct { | |
172 uint32_t cores_per_cpu : 8, | |
173 : 24; | |
174 } bits; | |
175 }; | |
176 | |
177 protected: | |
178 static int _cpu; | |
179 static int _model; | |
180 static int _stepping; | |
181 static int _cpuFeatures; // features returned by the "cpuid" instruction | |
182 // 0 if this instruction is not available | |
183 static const char* _features_str; | |
184 | |
185 enum { | |
186 CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX) | |
187 CPU_CMOV = (1 << 1), | |
188 CPU_FXSR = (1 << 2), | |
189 CPU_HT = (1 << 3), | |
190 CPU_MMX = (1 << 4), | |
2479 | 191 CPU_3DNOW_PREFETCH = (1 << 5), // Processor supports 3dnow prefetch and prefetchw instructions |
192 // may not necessarily support other 3dnow instructions | |
585 | 193 CPU_SSE = (1 << 6), |
194 CPU_SSE2 = (1 << 7), | |
195 CPU_SSE3 = (1 << 8), // SSE3 comes from cpuid 1 (ECX) | |
196 CPU_SSSE3 = (1 << 9), | |
197 CPU_SSE4A = (1 << 10), | |
198 CPU_SSE4_1 = (1 << 11), | |
643
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
199 CPU_SSE4_2 = (1 << 12), |
775
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
200 CPU_POPCNT = (1 << 13), |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
201 CPU_LZCNT = (1 << 14) |
585 | 202 } cpuFeatureFlags; |
203 | |
204 // cpuid information block. All info derived from executing cpuid with | |
205 // various function numbers is stored here. Intel and AMD info is | |
206 // merged in this block: accessor methods disentangle it. | |
207 // | |
208 // The info block is laid out in subblocks of 4 dwords corresponding to | |
209 // eax, ebx, ecx and edx, whether or not they contain anything useful. | |
210 struct CpuidInfo { | |
211 // cpuid function 0 | |
212 uint32_t std_max_function; | |
213 uint32_t std_vendor_name_0; | |
214 uint32_t std_vendor_name_1; | |
215 uint32_t std_vendor_name_2; | |
216 | |
217 // cpuid function 1 | |
218 StdCpuid1Eax std_cpuid1_eax; | |
219 StdCpuid1Ebx std_cpuid1_ebx; | |
220 StdCpuid1Ecx std_cpuid1_ecx; | |
221 StdCpuid1Edx std_cpuid1_edx; | |
222 | |
223 // cpuid function 4 (deterministic cache parameters) | |
224 DcpCpuid4Eax dcp_cpuid4_eax; | |
225 DcpCpuid4Ebx dcp_cpuid4_ebx; | |
226 uint32_t dcp_cpuid4_ecx; // unused currently | |
227 uint32_t dcp_cpuid4_edx; // unused currently | |
228 | |
1622 | 229 // cpuid function 0xB (processor topology) |
230 // ecx = 0 | |
231 uint32_t tpl_cpuidB0_eax; | |
232 TplCpuidBEbx tpl_cpuidB0_ebx; | |
233 uint32_t tpl_cpuidB0_ecx; // unused currently | |
234 uint32_t tpl_cpuidB0_edx; // unused currently | |
235 | |
236 // ecx = 1 | |
237 uint32_t tpl_cpuidB1_eax; | |
238 TplCpuidBEbx tpl_cpuidB1_ebx; | |
239 uint32_t tpl_cpuidB1_ecx; // unused currently | |
240 uint32_t tpl_cpuidB1_edx; // unused currently | |
241 | |
242 // ecx = 2 | |
243 uint32_t tpl_cpuidB2_eax; | |
244 TplCpuidBEbx tpl_cpuidB2_ebx; | |
245 uint32_t tpl_cpuidB2_ecx; // unused currently | |
246 uint32_t tpl_cpuidB2_edx; // unused currently | |
247 | |
585 | 248 // cpuid function 0x80000000 // example, unused |
249 uint32_t ext_max_function; | |
250 uint32_t ext_vendor_name_0; | |
251 uint32_t ext_vendor_name_1; | |
252 uint32_t ext_vendor_name_2; | |
253 | |
254 // cpuid function 0x80000001 | |
255 uint32_t ext_cpuid1_eax; // reserved | |
256 uint32_t ext_cpuid1_ebx; // reserved | |
257 ExtCpuid1Ecx ext_cpuid1_ecx; | |
258 ExtCpuid1Edx ext_cpuid1_edx; | |
259 | |
260 // cpuid functions 0x80000002 thru 0x80000004: example, unused | |
261 uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3; | |
262 uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7; | |
263 uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11; | |
264 | |
265 // cpuid function 0x80000005 //AMD L1, Intel reserved | |
266 uint32_t ext_cpuid5_eax; // unused currently | |
267 uint32_t ext_cpuid5_ebx; // reserved | |
268 ExtCpuid5Ex ext_cpuid5_ecx; // L1 data cache info (AMD) | |
269 ExtCpuid5Ex ext_cpuid5_edx; // L1 instruction cache info (AMD) | |
270 | |
271 // cpuid function 0x80000008 | |
272 uint32_t ext_cpuid8_eax; // unused currently | |
273 uint32_t ext_cpuid8_ebx; // reserved | |
274 ExtCpuid8Ecx ext_cpuid8_ecx; | |
275 uint32_t ext_cpuid8_edx; // reserved | |
276 }; | |
277 | |
278 // The actual cpuid info block | |
279 static CpuidInfo _cpuid_info; | |
280 | |
281 // Extractors and predicates | |
282 static uint32_t extended_cpu_family() { | |
283 uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family; | |
284 result += _cpuid_info.std_cpuid1_eax.bits.ext_family; | |
285 return result; | |
286 } | |
287 static uint32_t extended_cpu_model() { | |
288 uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model; | |
289 result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4; | |
290 return result; | |
291 } | |
292 static uint32_t cpu_stepping() { | |
293 uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping; | |
294 return result; | |
295 } | |
296 static uint logical_processor_count() { | |
297 uint result = threads_per_core(); | |
298 return result; | |
299 } | |
300 static uint32_t feature_flags() { | |
301 uint32_t result = 0; | |
302 if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0) | |
303 result |= CPU_CX8; | |
304 if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0) | |
305 result |= CPU_CMOV; | |
1789
a83b0246bb77
6934483: GCC 4.5 errors "suggest parentheses around something..." when compiling with -Werror and -Wall
twisti
parents:
1647
diff
changeset
|
306 if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || (is_amd() && |
a83b0246bb77
6934483: GCC 4.5 errors "suggest parentheses around something..." when compiling with -Werror and -Wall
twisti
parents:
1647
diff
changeset
|
307 _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0)) |
585 | 308 result |= CPU_FXSR; |
309 // HT flag is set for multi-core processors also. | |
310 if (threads_per_core() > 1) | |
311 result |= CPU_HT; | |
1789
a83b0246bb77
6934483: GCC 4.5 errors "suggest parentheses around something..." when compiling with -Werror and -Wall
twisti
parents:
1647
diff
changeset
|
312 if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || (is_amd() && |
a83b0246bb77
6934483: GCC 4.5 errors "suggest parentheses around something..." when compiling with -Werror and -Wall
twisti
parents:
1647
diff
changeset
|
313 _cpuid_info.ext_cpuid1_edx.bits.mmx != 0)) |
585 | 314 result |= CPU_MMX; |
315 if (_cpuid_info.std_cpuid1_edx.bits.sse != 0) | |
316 result |= CPU_SSE; | |
317 if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0) | |
318 result |= CPU_SSE2; | |
319 if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0) | |
320 result |= CPU_SSE3; | |
321 if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0) | |
322 result |= CPU_SSSE3; | |
323 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) | |
324 result |= CPU_SSE4_1; | |
325 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) | |
326 result |= CPU_SSE4_2; | |
643
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
327 if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0) |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
328 result |= CPU_POPCNT; |
775
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
329 |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
330 // AMD features. |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
331 if (is_amd()) { |
2479 | 332 if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) || |
333 (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0)) | |
334 result |= CPU_3DNOW_PREFETCH; | |
775
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
335 if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
336 result |= CPU_LZCNT; |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
337 if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
338 result |= CPU_SSE4A; |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
339 } |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
340 |
585 | 341 return result; |
342 } | |
343 | |
344 static void get_processor_features(); | |
345 | |
346 public: | |
347 // Offsets for cpuid asm stub | |
348 static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } | |
349 static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); } | |
350 static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); } | |
351 static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); } | |
352 static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); } | |
353 static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); } | |
1622 | 354 static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); } |
355 static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); } | |
356 static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); } | |
585 | 357 |
358 // Initialization | |
359 static void initialize(); | |
360 | |
361 // Asserts | |
362 static void assert_is_initialized() { | |
363 assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized"); | |
364 } | |
365 | |
366 // | |
367 // Processor family: | |
368 // 3 - 386 | |
369 // 4 - 486 | |
370 // 5 - Pentium | |
371 // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon, | |
372 // Pentium M, Core Solo, Core Duo, Core2 Duo | |
373 // family 6 model: 9, 13, 14, 15 | |
374 // 0x0f - Pentium 4, Opteron | |
375 // | |
376 // Note: The cpu family should be used to select between | |
377 // instruction sequences which are valid on all Intel | |
378 // processors. Use the feature test functions below to | |
379 // determine whether a particular instruction is supported. | |
380 // | |
381 static int cpu_family() { return _cpu;} | |
382 static bool is_P6() { return cpu_family() >= 6; } | |
383 | |
384 static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA' | |
385 static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG' | |
386 | |
1647 | 387 static bool supports_processor_topology() { |
388 return (_cpuid_info.std_max_function >= 0xB) && | |
389 // eax[4:0] | ebx[0:15] == 0 indicates invalid topology level. | |
390 // Some cpus have max cpuid >= 0xB but do not support processor topology. | |
391 ((_cpuid_info.tpl_cpuidB0_eax & 0x1f | _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus) != 0); | |
392 } | |
393 | |
585 | 394 static uint cores_per_cpu() { |
395 uint result = 1; | |
396 if (is_intel()) { | |
1647 | 397 if (supports_processor_topology()) { |
1622 | 398 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / |
399 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; | |
400 } else { | |
401 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); | |
402 } | |
585 | 403 } else if (is_amd()) { |
404 result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); | |
405 } | |
406 return result; | |
407 } | |
408 | |
409 static uint threads_per_core() { | |
410 uint result = 1; | |
1647 | 411 if (is_intel() && supports_processor_topology()) { |
1622 | 412 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; |
413 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { | |
585 | 414 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / |
415 cores_per_cpu(); | |
416 } | |
417 return result; | |
418 } | |
419 | |
420 static intx L1_data_cache_line_size() { | |
421 intx result = 0; | |
422 if (is_intel()) { | |
423 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); | |
424 } else if (is_amd()) { | |
425 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; | |
426 } | |
427 if (result < 32) // not defined ? | |
428 result = 32; // 32 bytes by default on x86 and other x64 | |
429 return result; | |
430 } | |
431 | |
432 // | |
433 // Feature identification | |
434 // | |
435 static bool supports_cpuid() { return _cpuFeatures != 0; } | |
436 static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; } | |
437 static bool supports_cmov() { return (_cpuFeatures & CPU_CMOV) != 0; } | |
438 static bool supports_fxsr() { return (_cpuFeatures & CPU_FXSR) != 0; } | |
439 static bool supports_ht() { return (_cpuFeatures & CPU_HT) != 0; } | |
440 static bool supports_mmx() { return (_cpuFeatures & CPU_MMX) != 0; } | |
441 static bool supports_sse() { return (_cpuFeatures & CPU_SSE) != 0; } | |
442 static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; } | |
443 static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } | |
444 static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } | |
445 static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } | |
446 static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } | |
643
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
585
diff
changeset
|
447 static bool supports_popcnt() { return (_cpuFeatures & CPU_POPCNT) != 0; } |
585 | 448 // |
449 // AMD features | |
450 // | |
2479 | 451 static bool supports_3dnow_prefetch() { return (_cpuFeatures & CPU_3DNOW_PREFETCH) != 0; } |
585 | 452 static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; } |
775
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
643
diff
changeset
|
453 static bool supports_lzcnt() { return (_cpuFeatures & CPU_LZCNT) != 0; } |
585 | 454 static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } |
455 | |
1914
ae065c367d93
6987135: Performance regression on Intel platform with 32-bits edition between 6u13 and 6u14.
kvn
parents:
1789
diff
changeset
|
456 // Intel Core and newer cpus have fast IDIV instruction (excluding Atom). |
ae065c367d93
6987135: Performance regression on Intel platform with 32-bits edition between 6u13 and 6u14.
kvn
parents:
1789
diff
changeset
|
457 static bool has_fast_idiv() { return is_intel() && cpu_family() == 6 && |
ae065c367d93
6987135: Performance regression on Intel platform with 32-bits edition between 6u13 and 6u14.
kvn
parents:
1789
diff
changeset
|
458 supports_sse3() && _model != 0x1C; } |
ae065c367d93
6987135: Performance regression on Intel platform with 32-bits edition between 6u13 and 6u14.
kvn
parents:
1789
diff
changeset
|
459 |
585 | 460 static bool supports_compare_and_exchange() { return true; } |
461 | |
462 static const char* cpu_features() { return _features_str; } | |
463 | |
464 static intx allocate_prefetch_distance() { | |
465 // This method should be called before allocate_prefetch_style(). | |
466 // | |
467 // Hardware prefetching (distance/size in bytes): | |
468 // Pentium 3 - 64 / 32 | |
469 // Pentium 4 - 256 / 128 | |
470 // Athlon - 64 / 32 ???? | |
471 // Opteron - 128 / 64 only when 2 sequential cache lines accessed | |
472 // Core - 128 / 64 | |
473 // | |
474 // Software prefetching (distance in bytes / instruction with best score): | |
475 // Pentium 3 - 128 / prefetchnta | |
476 // Pentium 4 - 512 / prefetchnta | |
477 // Athlon - 128 / prefetchnta | |
478 // Opteron - 256 / prefetchnta | |
479 // Core - 256 / prefetchnta | |
480 // It will be used only when AllocatePrefetchStyle > 0 | |
481 | |
482 intx count = AllocatePrefetchDistance; | |
483 if (count < 0) { // default ? | |
484 if (is_amd()) { // AMD | |
485 if (supports_sse2()) | |
486 count = 256; // Opteron | |
487 else | |
488 count = 128; // Athlon | |
489 } else { // Intel | |
490 if (supports_sse2()) | |
491 if (cpu_family() == 6) { | |
492 count = 256; // Pentium M, Core, Core2 | |
493 } else { | |
494 count = 512; // Pentium 4 | |
495 } | |
496 else | |
497 count = 128; // Pentium 3 (and all other old CPUs) | |
498 } | |
499 } | |
500 return count; | |
501 } | |
502 static intx allocate_prefetch_style() { | |
503 assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); | |
504 // Return 0 if AllocatePrefetchDistance was not defined. | |
505 return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0; | |
506 } | |
507 | |
508 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from | |
509 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. | |
510 // Tested intervals from 128 to 2048 in increments of 64 == one cache line. | |
511 // 256 bytes (4 dcache lines) was the nearest runner-up to 576. | |
512 | |
513 // gc copy/scan is disabled if prefetchw isn't supported, because | |
514 // Prefetch::write emits an inlined prefetchw on Linux. | |
515 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. | |
516 // The used prefetcht0 instruction works for both amd64 and em64t. | |
517 static intx prefetch_copy_interval_in_bytes() { | |
518 intx interval = PrefetchCopyIntervalInBytes; | |
519 return interval >= 0 ? interval : 576; | |
520 } | |
521 static intx prefetch_scan_interval_in_bytes() { | |
522 intx interval = PrefetchScanIntervalInBytes; | |
523 return interval >= 0 ? interval : 576; | |
524 } | |
525 static intx prefetch_fields_ahead() { | |
526 intx count = PrefetchFieldsAhead; | |
527 return count >= 0 ? count : 1; | |
528 } | |
529 }; | |
1972 | 530 |
531 #endif // CPU_X86_VM_VM_VERSION_X86_HPP |