Mercurial > hg > truffle
comparison src/cpu/ppc/vm/vm_version_ppc.cpp @ 14408:ec28f9c041ff
8019972: PPC64 (part 9): platform files for interpreter only VM.
Summary: With this change the HotSpot core build works on Linux/PPC64. The VM succesfully executes simple test programs.
Reviewed-by: kvn
author | goetz |
---|---|
date | Fri, 02 Aug 2013 16:46:45 +0200 |
parents | |
children | 41b780b43b74 |
comparison
equal
deleted
inserted
replaced
14407:94c202aa2646 | 14408:ec28f9c041ff |
---|---|
1 /* | |
2 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. | |
3 * Copyright 2012, 2013 SAP AG. All rights reserved. | |
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 * | |
6 * This code is free software; you can redistribute it and/or modify it | |
7 * under the terms of the GNU General Public License version 2 only, as | |
8 * published by the Free Software Foundation. | |
9 * | |
10 * This code is distributed in the hope that it will be useful, but WITHOUT | |
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
13 * version 2 for more details (a copy is included in the LICENSE file that | |
14 * accompanied this code). | |
15 * | |
16 * You should have received a copy of the GNU General Public License version | |
17 * 2 along with this work; if not, write to the Free Software Foundation, | |
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
19 * | |
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | |
21 * or visit www.oracle.com if you need additional information or have any | |
22 * questions. | |
23 * | |
24 */ | |
25 | |
26 #include "precompiled.hpp" | |
27 #include "assembler_ppc.inline.hpp" | |
28 #include "compiler/disassembler.hpp" | |
29 #include "memory/resourceArea.hpp" | |
30 #include "runtime/java.hpp" | |
31 #include "runtime/stubCodeGenerator.hpp" | |
32 #include "utilities/defaultStream.hpp" | |
33 #include "vm_version_ppc.hpp" | |
34 #ifdef TARGET_OS_FAMILY_aix | |
35 # include "os_aix.inline.hpp" | |
36 #endif | |
37 #ifdef TARGET_OS_FAMILY_linux | |
38 # include "os_linux.inline.hpp" | |
39 #endif | |
40 | |
41 # include <sys/sysinfo.h> | |
42 | |
43 int VM_Version::_features = VM_Version::unknown_m; | |
44 int VM_Version::_measured_cache_line_size = 128; // default value | |
45 const char* VM_Version::_features_str = ""; | |
46 bool VM_Version::_is_determine_features_test_running = false; | |
47 | |
48 | |
49 #define MSG(flag) \ | |
50 if (flag && !FLAG_IS_DEFAULT(flag)) \ | |
51 jio_fprintf(defaultStream::error_stream(), \ | |
52 "warning: -XX:+" #flag " requires -XX:+UseSIGTRAP\n" \ | |
53 " -XX:+" #flag " will be disabled!\n"); | |
54 | |
55 void VM_Version::initialize() { | |
56 | |
57 // Test which instructions are supported and measure cache line size. | |
58 determine_features(); | |
59 | |
60 // If PowerArchitecturePPC64 hasn't been specified explicitly determine from features. | |
61 if (FLAG_IS_DEFAULT(PowerArchitecturePPC64)) { | |
62 if (VM_Version::has_popcntw()) { | |
63 FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 7); | |
64 } else if (VM_Version::has_cmpb()) { | |
65 FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 6); | |
66 } else if (VM_Version::has_popcntb()) { | |
67 FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 5); | |
68 } else { | |
69 FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 0); | |
70 } | |
71 } | |
72 guarantee(PowerArchitecturePPC64 == 0 || PowerArchitecturePPC64 == 5 || | |
73 PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7, | |
74 "PowerArchitecturePPC64 should be 0, 5, 6 or 7"); | |
75 | |
76 if (!UseSIGTRAP) { | |
77 MSG(TrapBasedICMissChecks); | |
78 MSG(TrapBasedNotEntrantChecks); | |
79 MSG(TrapBasedNullChecks); | |
80 MSG(TrapBasedRangeChecks); | |
81 FLAG_SET_ERGO(bool, TrapBasedNotEntrantChecks, false); | |
82 FLAG_SET_ERGO(bool, TrapBasedNullChecks, false); | |
83 FLAG_SET_ERGO(bool, TrapBasedICMissChecks, false); | |
84 FLAG_SET_ERGO(bool, TrapBasedRangeChecks, false); | |
85 } | |
86 | |
87 #ifdef COMPILER2 | |
88 // On Power6 test for section size. | |
89 if (PowerArchitecturePPC64 == 6) | |
90 determine_section_size(); | |
91 // TODO: PPC port else | |
92 // TODO: PPC port PdScheduling::power6SectorSize = 0x20; | |
93 | |
94 MaxVectorSize = 8; | |
95 #endif | |
96 | |
97 // Create and print feature-string. | |
98 char buf[(num_features+1) * 16]; // max 16 chars per feature | |
99 jio_snprintf(buf, sizeof(buf), | |
100 "ppc64%s%s%s%s%s%s%s%s", | |
101 (has_fsqrt() ? " fsqrt" : ""), | |
102 (has_isel() ? " isel" : ""), | |
103 (has_lxarxeh() ? " lxarxeh" : ""), | |
104 (has_cmpb() ? " cmpb" : ""), | |
105 //(has_mftgpr()? " mftgpr" : ""), | |
106 (has_popcntb() ? " popcntb" : ""), | |
107 (has_popcntw() ? " popcntw" : ""), | |
108 (has_fcfids() ? " fcfids" : ""), | |
109 (has_vand() ? " vand" : "") | |
110 // Make sure number of %s matches num_features! | |
111 ); | |
112 _features_str = strdup(buf); | |
113 NOT_PRODUCT(if (Verbose) print_features();); | |
114 | |
115 // PPC64 supports 8-byte compare-exchange operations (see | |
116 // Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr) | |
117 // and 'atomic long memory ops' (see Unsafe_GetLongVolatile). | |
118 _supports_cx8 = true; | |
119 | |
120 UseSSE = 0; // Only on x86 and x64 | |
121 | |
122 intx cache_line_size = _measured_cache_line_size; | |
123 | |
124 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) AllocatePrefetchStyle = 1; | |
125 | |
126 if (AllocatePrefetchStyle == 4) { | |
127 AllocatePrefetchStepSize = cache_line_size; // need exact value | |
128 if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 12; // use larger blocks by default | |
129 if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 2*cache_line_size; // default is not defined ? | |
130 } else { | |
131 if (cache_line_size > AllocatePrefetchStepSize) AllocatePrefetchStepSize = cache_line_size; | |
132 if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 3; // Optimistic value | |
133 if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 3*cache_line_size; // default is not defined ? | |
134 } | |
135 | |
136 assert(AllocatePrefetchLines > 0, "invalid value"); | |
137 if (AllocatePrefetchLines < 1) // Set valid value in product VM. | |
138 AllocatePrefetchLines = 1; // Conservative value | |
139 | |
140 if (AllocatePrefetchStyle == 3 && AllocatePrefetchDistance < cache_line_size) | |
141 AllocatePrefetchStyle = 1; // fall back if inappropriate | |
142 | |
143 assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); | |
144 } | |
145 | |
146 void VM_Version::print_features() { | |
147 tty->print_cr("Version: %s cache_line_size = %d", cpu_features(), get_cache_line_size()); | |
148 } | |
149 | |
150 #ifdef COMPILER2 | |
151 // Determine section size on power6: If section size is 8 instructions, | |
152 // there should be a difference between the two testloops of ~15 %. If | |
153 // no difference is detected the section is assumed to be 32 instructions. | |
154 void VM_Version::determine_section_size() { | |
155 | |
156 int unroll = 80; | |
157 | |
158 const int code_size = (2* unroll * 32 + 100)*BytesPerInstWord; | |
159 | |
160 // Allocate space for the code | |
161 ResourceMark rm; | |
162 CodeBuffer cb("detect_section_size", code_size, 0); | |
163 MacroAssembler* a = new MacroAssembler(&cb); | |
164 | |
165 uint32_t *code = (uint32_t *)a->pc(); | |
166 // emit code. | |
167 void (*test1)() = (void(*)())(void *)a->emit_fd(); | |
168 | |
169 Label l1; | |
170 | |
171 a->li(R4, 1); | |
172 a->sldi(R4, R4, 28); | |
173 a->b(l1); | |
174 a->align(CodeEntryAlignment); | |
175 | |
176 a->bind(l1); | |
177 | |
178 for (int i = 0; i < unroll; i++) { | |
179 // Schleife 1 | |
180 // ------- sector 0 ------------ | |
181 // ;; 0 | |
182 a->nop(); // 1 | |
183 a->fpnop0(); // 2 | |
184 a->fpnop1(); // 3 | |
185 a->addi(R4,R4, -1); // 4 | |
186 | |
187 // ;; 1 | |
188 a->nop(); // 5 | |
189 a->fmr(F6, F6); // 6 | |
190 a->fmr(F7, F7); // 7 | |
191 a->endgroup(); // 8 | |
192 // ------- sector 8 ------------ | |
193 | |
194 // ;; 2 | |
195 a->nop(); // 9 | |
196 a->nop(); // 10 | |
197 a->fmr(F8, F8); // 11 | |
198 a->fmr(F9, F9); // 12 | |
199 | |
200 // ;; 3 | |
201 a->nop(); // 13 | |
202 a->fmr(F10, F10); // 14 | |
203 a->fmr(F11, F11); // 15 | |
204 a->endgroup(); // 16 | |
205 // -------- sector 16 ------------- | |
206 | |
207 // ;; 4 | |
208 a->nop(); // 17 | |
209 a->nop(); // 18 | |
210 a->fmr(F15, F15); // 19 | |
211 a->fmr(F16, F16); // 20 | |
212 | |
213 // ;; 5 | |
214 a->nop(); // 21 | |
215 a->fmr(F17, F17); // 22 | |
216 a->fmr(F18, F18); // 23 | |
217 a->endgroup(); // 24 | |
218 // ------- sector 24 ------------ | |
219 | |
220 // ;; 6 | |
221 a->nop(); // 25 | |
222 a->nop(); // 26 | |
223 a->fmr(F19, F19); // 27 | |
224 a->fmr(F20, F20); // 28 | |
225 | |
226 // ;; 7 | |
227 a->nop(); // 29 | |
228 a->fmr(F21, F21); // 30 | |
229 a->fmr(F22, F22); // 31 | |
230 a->brnop0(); // 32 | |
231 | |
232 // ------- sector 32 ------------ | |
233 } | |
234 | |
235 // ;; 8 | |
236 a->cmpdi(CCR0, R4, unroll);// 33 | |
237 a->bge(CCR0, l1); // 34 | |
238 a->blr(); | |
239 | |
240 // emit code. | |
241 void (*test2)() = (void(*)())(void *)a->emit_fd(); | |
242 // uint32_t *code = (uint32_t *)a->pc(); | |
243 | |
244 Label l2; | |
245 | |
246 a->li(R4, 1); | |
247 a->sldi(R4, R4, 28); | |
248 a->b(l2); | |
249 a->align(CodeEntryAlignment); | |
250 | |
251 a->bind(l2); | |
252 | |
253 for (int i = 0; i < unroll; i++) { | |
254 // Schleife 2 | |
255 // ------- sector 0 ------------ | |
256 // ;; 0 | |
257 a->brnop0(); // 1 | |
258 a->nop(); // 2 | |
259 //a->cmpdi(CCR0, R4, unroll); | |
260 a->fpnop0(); // 3 | |
261 a->fpnop1(); // 4 | |
262 a->addi(R4,R4, -1); // 5 | |
263 | |
264 // ;; 1 | |
265 | |
266 a->nop(); // 6 | |
267 a->fmr(F6, F6); // 7 | |
268 a->fmr(F7, F7); // 8 | |
269 // ------- sector 8 --------------- | |
270 | |
271 // ;; 2 | |
272 a->endgroup(); // 9 | |
273 | |
274 // ;; 3 | |
275 a->nop(); // 10 | |
276 a->nop(); // 11 | |
277 a->fmr(F8, F8); // 12 | |
278 | |
279 // ;; 4 | |
280 a->fmr(F9, F9); // 13 | |
281 a->nop(); // 14 | |
282 a->fmr(F10, F10); // 15 | |
283 | |
284 // ;; 5 | |
285 a->fmr(F11, F11); // 16 | |
286 // -------- sector 16 ------------- | |
287 | |
288 // ;; 6 | |
289 a->endgroup(); // 17 | |
290 | |
291 // ;; 7 | |
292 a->nop(); // 18 | |
293 a->nop(); // 19 | |
294 a->fmr(F15, F15); // 20 | |
295 | |
296 // ;; 8 | |
297 a->fmr(F16, F16); // 21 | |
298 a->nop(); // 22 | |
299 a->fmr(F17, F17); // 23 | |
300 | |
301 // ;; 9 | |
302 a->fmr(F18, F18); // 24 | |
303 // -------- sector 24 ------------- | |
304 | |
305 // ;; 10 | |
306 a->endgroup(); // 25 | |
307 | |
308 // ;; 11 | |
309 a->nop(); // 26 | |
310 a->nop(); // 27 | |
311 a->fmr(F19, F19); // 28 | |
312 | |
313 // ;; 12 | |
314 a->fmr(F20, F20); // 29 | |
315 a->nop(); // 30 | |
316 a->fmr(F21, F21); // 31 | |
317 | |
318 // ;; 13 | |
319 a->fmr(F22, F22); // 32 | |
320 } | |
321 | |
322 // -------- sector 32 ------------- | |
323 // ;; 14 | |
324 a->cmpdi(CCR0, R4, unroll); // 33 | |
325 a->bge(CCR0, l2); // 34 | |
326 | |
327 a->blr(); | |
328 uint32_t *code_end = (uint32_t *)a->pc(); | |
329 a->flush(); | |
330 | |
331 double loop1_seconds,loop2_seconds, rel_diff; | |
332 uint64_t start1, stop1; | |
333 | |
334 start1 = os::current_thread_cpu_time(false); | |
335 (*test1)(); | |
336 stop1 = os::current_thread_cpu_time(false); | |
337 loop1_seconds = (stop1- start1) / (1000 *1000 *1000.0); | |
338 | |
339 | |
340 start1 = os::current_thread_cpu_time(false); | |
341 (*test2)(); | |
342 stop1 = os::current_thread_cpu_time(false); | |
343 | |
344 loop2_seconds = (stop1 - start1) / (1000 *1000 *1000.0); | |
345 | |
346 rel_diff = (loop2_seconds - loop1_seconds) / loop1_seconds *100; | |
347 | |
348 if (PrintAssembly) { | |
349 ttyLocker ttyl; | |
350 tty->print_cr("Decoding section size detection stub at " INTPTR_FORMAT " before execution:", code); | |
351 Disassembler::decode((u_char*)code, (u_char*)code_end, tty); | |
352 tty->print_cr("Time loop1 :%f", loop1_seconds); | |
353 tty->print_cr("Time loop2 :%f", loop2_seconds); | |
354 tty->print_cr("(time2 - time1) / time1 = %f %%", rel_diff); | |
355 | |
356 if (rel_diff > 12.0) { | |
357 tty->print_cr("Section Size 8 Instructions"); | |
358 } else{ | |
359 tty->print_cr("Section Size 32 Instructions or Power5"); | |
360 } | |
361 } | |
362 | |
363 #if 0 // TODO: PPC port | |
364 // Set sector size (if not set explicitly). | |
365 if (FLAG_IS_DEFAULT(Power6SectorSize128PPC64)) { | |
366 if (rel_diff > 12.0) { | |
367 PdScheduling::power6SectorSize = 0x20; | |
368 } else { | |
369 PdScheduling::power6SectorSize = 0x80; | |
370 } | |
371 } else if (Power6SectorSize128PPC64) { | |
372 PdScheduling::power6SectorSize = 0x80; | |
373 } else { | |
374 PdScheduling::power6SectorSize = 0x20; | |
375 } | |
376 #endif | |
377 if (UsePower6SchedulerPPC64) Unimplemented(); | |
378 } | |
379 #endif // COMPILER2 | |
380 | |
381 void VM_Version::determine_features() { | |
382 const int code_size = (num_features+1+2*7)*BytesPerInstWord; // 7 InstWords for each call (function descriptor + blr instruction) | |
383 int features = 0; | |
384 | |
385 // create test area | |
386 enum { BUFFER_SIZE = 2*4*K }; // needs to be >=2* max cache line size (cache line size can't exceed min page size) | |
387 char test_area[BUFFER_SIZE]; | |
388 char *mid_of_test_area = &test_area[BUFFER_SIZE>>1]; | |
389 | |
390 // Allocate space for the code | |
391 ResourceMark rm; | |
392 CodeBuffer cb("detect_cpu_features", code_size, 0); | |
393 MacroAssembler* a = new MacroAssembler(&cb); | |
394 | |
395 // emit code. | |
396 void (*test)(address addr, uint64_t offset)=(void(*)(address addr, uint64_t offset))(void *)a->emit_fd(); | |
397 uint32_t *code = (uint32_t *)a->pc(); | |
398 // Don't use R0 in ldarx. | |
399 // keep R3_ARG1 = R3 unmodified, it contains &field (see below) | |
400 // keep R4_ARG2 = R4 unmodified, it contains offset = 0 (see below) | |
401 a->fsqrt(F3, F4); // code[0] -> fsqrt_m | |
402 a->isel(R7, R5, R6, 0); // code[1] -> isel_m | |
403 a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1);// code[2] -> lxarx_m | |
404 a->cmpb(R7, R5, R6); // code[3] -> bcmp | |
405 //a->mftgpr(R7, F3); // code[4] -> mftgpr | |
406 a->popcntb(R7, R5); // code[5] -> popcntb | |
407 a->popcntw(R7, R5); // code[6] -> popcntw | |
408 a->fcfids(F3, F4); // code[7] -> fcfids | |
409 a->vand(VR0, VR0, VR0); // code[8] -> vand | |
410 a->blr(); | |
411 | |
412 // Emit function to set one cache line to zero | |
413 void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->emit_fd(); // emit function descriptor and get pointer to it | |
414 a->dcbz(R3_ARG1); // R3_ARG1 = R3 = addr | |
415 a->blr(); | |
416 | |
417 uint32_t *code_end = (uint32_t *)a->pc(); | |
418 a->flush(); | |
419 | |
420 // Print the detection code. | |
421 if (PrintAssembly) { | |
422 ttyLocker ttyl; | |
423 tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " before execution:", code); | |
424 Disassembler::decode((u_char*)code, (u_char*)code_end, tty); | |
425 } | |
426 | |
427 // Measure cache line size. | |
428 memset(test_area, 0xFF, BUFFER_SIZE); // fill test area with 0xFF | |
429 (*zero_cacheline_func_ptr)(mid_of_test_area); // call function which executes dcbz to the middle | |
430 int count = 0; // count zeroed bytes | |
431 for (int i = 0; i < BUFFER_SIZE; i++) if (test_area[i] == 0) count++; | |
432 guarantee(is_power_of_2(count), "cache line size needs to be a power of 2"); | |
433 _measured_cache_line_size = count; | |
434 | |
435 // Execute code. Illegal instructions will be replaced by 0 in the signal handler. | |
436 VM_Version::_is_determine_features_test_running = true; | |
437 (*test)((address)mid_of_test_area, (uint64_t)0); | |
438 VM_Version::_is_determine_features_test_running = false; | |
439 | |
440 // determine which instructions are legal. | |
441 int feature_cntr = 0; | |
442 if (code[feature_cntr++]) features |= fsqrt_m; | |
443 if (code[feature_cntr++]) features |= isel_m; | |
444 if (code[feature_cntr++]) features |= lxarxeh_m; | |
445 if (code[feature_cntr++]) features |= cmpb_m; | |
446 //if(code[feature_cntr++])features |= mftgpr_m; | |
447 if (code[feature_cntr++]) features |= popcntb_m; | |
448 if (code[feature_cntr++]) features |= popcntw_m; | |
449 if (code[feature_cntr++]) features |= fcfids_m; | |
450 if (code[feature_cntr++]) features |= vand_m; | |
451 | |
452 // Print the detection code. | |
453 if (PrintAssembly) { | |
454 ttyLocker ttyl; | |
455 tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " after execution:", code); | |
456 Disassembler::decode((u_char*)code, (u_char*)code_end, tty); | |
457 } | |
458 | |
459 _features = features; | |
460 } | |
461 | |
462 | |
463 static int saved_features = 0; | |
464 | |
465 void VM_Version::allow_all() { | |
466 saved_features = _features; | |
467 _features = all_features_m; | |
468 } | |
469 | |
470 void VM_Version::revert() { | |
471 _features = saved_features; | |
472 } |