Mercurial > hg > graal-jvmci-8
comparison src/cpu/ppc/vm/vm_version_ppc.cpp @ 14445:67fa91961822
8029940: PPC64 (part 122): C2 compiler port
Reviewed-by: kvn
author | goetz |
---|---|
date | Wed, 11 Dec 2013 00:06:11 +0100 |
parents | 41b780b43b74 |
children | e5e8aa897002 |
comparison
equal
deleted
inserted
replaced
14444:492e67693373 | 14445:67fa91961822 |
---|---|
87 MSG(TrapBasedRangeChecks); | 87 MSG(TrapBasedRangeChecks); |
88 FLAG_SET_ERGO(bool, TrapBasedRangeChecks, false); | 88 FLAG_SET_ERGO(bool, TrapBasedRangeChecks, false); |
89 } | 89 } |
90 | 90 |
91 // On Power6 test for section size. | 91 // On Power6 test for section size. |
92 if (PowerArchitecturePPC64 == 6) | 92 if (PowerArchitecturePPC64 == 6) { |
93 determine_section_size(); | 93 determine_section_size(); |
94 // TODO: PPC port else | 94 // TODO: PPC port } else { |
95 // TODO: PPC port PdScheduling::power6SectorSize = 0x20; | 95 // TODO: PPC port PdScheduling::power6SectorSize = 0x20; |
96 } | |
96 | 97 |
97 MaxVectorSize = 8; | 98 MaxVectorSize = 8; |
98 #endif | 99 #endif |
99 | 100 |
100 // Create and print feature-string. | 101 // Create and print feature-string. |
101 char buf[(num_features+1) * 16]; // max 16 chars per feature | 102 char buf[(num_features+1) * 16]; // Max 16 chars per feature. |
102 jio_snprintf(buf, sizeof(buf), | 103 jio_snprintf(buf, sizeof(buf), |
103 "ppc64%s%s%s%s%s%s%s%s", | 104 "ppc64%s%s%s%s%s%s%s%s", |
104 (has_fsqrt() ? " fsqrt" : ""), | 105 (has_fsqrt() ? " fsqrt" : ""), |
105 (has_isel() ? " isel" : ""), | 106 (has_isel() ? " isel" : ""), |
106 (has_lxarxeh() ? " lxarxeh" : ""), | 107 (has_lxarxeh() ? " lxarxeh" : ""), |
125 intx cache_line_size = _measured_cache_line_size; | 126 intx cache_line_size = _measured_cache_line_size; |
126 | 127 |
127 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) AllocatePrefetchStyle = 1; | 128 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) AllocatePrefetchStyle = 1; |
128 | 129 |
129 if (AllocatePrefetchStyle == 4) { | 130 if (AllocatePrefetchStyle == 4) { |
130 AllocatePrefetchStepSize = cache_line_size; // need exact value | 131 AllocatePrefetchStepSize = cache_line_size; // Need exact value. |
131 if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 12; // use larger blocks by default | 132 if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 12; // Use larger blocks by default. |
132 if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 2*cache_line_size; // default is not defined ? | 133 if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 2*cache_line_size; // Default is not defined? |
133 } else { | 134 } else { |
134 if (cache_line_size > AllocatePrefetchStepSize) AllocatePrefetchStepSize = cache_line_size; | 135 if (cache_line_size > AllocatePrefetchStepSize) AllocatePrefetchStepSize = cache_line_size; |
135 if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 3; // Optimistic value | 136 if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 3; // Optimistic value. |
136 if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 3*cache_line_size; // default is not defined ? | 137 if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 3*cache_line_size; // Default is not defined? |
137 } | 138 } |
138 | 139 |
139 assert(AllocatePrefetchLines > 0, "invalid value"); | 140 assert(AllocatePrefetchLines > 0, "invalid value"); |
140 if (AllocatePrefetchLines < 1) // Set valid value in product VM. | 141 if (AllocatePrefetchLines < 1) // Set valid value in product VM. |
141 AllocatePrefetchLines = 1; // Conservative value | 142 AllocatePrefetchLines = 1; // Conservative value. |
142 | 143 |
143 if (AllocatePrefetchStyle == 3 && AllocatePrefetchDistance < cache_line_size) | 144 if (AllocatePrefetchStyle == 3 && AllocatePrefetchDistance < cache_line_size) |
144 AllocatePrefetchStyle = 1; // fall back if inappropriate | 145 AllocatePrefetchStyle = 1; // Fall back if inappropriate. |
145 | 146 |
146 assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); | 147 assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); |
147 } | 148 } |
148 | 149 |
149 void VM_Version::print_features() { | 150 void VM_Version::print_features() { |
158 | 159 |
159 int unroll = 80; | 160 int unroll = 80; |
160 | 161 |
161 const int code_size = (2* unroll * 32 + 100)*BytesPerInstWord; | 162 const int code_size = (2* unroll * 32 + 100)*BytesPerInstWord; |
162 | 163 |
163 // Allocate space for the code | 164 // Allocate space for the code. |
164 ResourceMark rm; | 165 ResourceMark rm; |
165 CodeBuffer cb("detect_section_size", code_size, 0); | 166 CodeBuffer cb("detect_section_size", code_size, 0); |
166 MacroAssembler* a = new MacroAssembler(&cb); | 167 MacroAssembler* a = new MacroAssembler(&cb); |
167 | 168 |
168 uint32_t *code = (uint32_t *)a->pc(); | 169 uint32_t *code = (uint32_t *)a->pc(); |
169 // emit code. | 170 // Emit code. |
170 void (*test1)() = (void(*)())(void *)a->emit_fd(); | 171 void (*test1)() = (void(*)())(void *)a->emit_fd(); |
171 | 172 |
172 Label l1; | 173 Label l1; |
173 | 174 |
174 a->li(R4, 1); | 175 a->li(R4, 1); |
187 a->fpnop1(); // 3 | 188 a->fpnop1(); // 3 |
188 a->addi(R4,R4, -1); // 4 | 189 a->addi(R4,R4, -1); // 4 |
189 | 190 |
190 // ;; 1 | 191 // ;; 1 |
191 a->nop(); // 5 | 192 a->nop(); // 5 |
192 a->fmr(F6, F6); // 6 | 193 a->fmr(F6, F6); // 6 |
193 a->fmr(F7, F7); // 7 | 194 a->fmr(F7, F7); // 7 |
194 a->endgroup(); // 8 | 195 a->endgroup(); // 8 |
195 // ------- sector 8 ------------ | 196 // ------- sector 8 ------------ |
196 | 197 |
197 // ;; 2 | 198 // ;; 2 |
198 a->nop(); // 9 | 199 a->nop(); // 9 |
199 a->nop(); // 10 | 200 a->nop(); // 10 |
200 a->fmr(F8, F8); // 11 | 201 a->fmr(F8, F8); // 11 |
201 a->fmr(F9, F9); // 12 | 202 a->fmr(F9, F9); // 12 |
202 | 203 |
203 // ;; 3 | 204 // ;; 3 |
204 a->nop(); // 13 | 205 a->nop(); // 13 |
205 a->fmr(F10, F10); // 14 | 206 a->fmr(F10, F10); // 14 |
206 a->fmr(F11, F11); // 15 | 207 a->fmr(F11, F11); // 15 |
207 a->endgroup(); // 16 | 208 a->endgroup(); // 16 |
208 // -------- sector 16 ------------- | 209 // -------- sector 16 ------------- |
209 | 210 |
210 // ;; 4 | 211 // ;; 4 |
211 a->nop(); // 17 | 212 a->nop(); // 17 |
212 a->nop(); // 18 | 213 a->nop(); // 18 |
213 a->fmr(F15, F15); // 19 | 214 a->fmr(F15, F15); // 19 |
214 a->fmr(F16, F16); // 20 | 215 a->fmr(F16, F16); // 20 |
215 | 216 |
216 // ;; 5 | 217 // ;; 5 |
217 a->nop(); // 21 | 218 a->nop(); // 21 |
218 a->fmr(F17, F17); // 22 | 219 a->fmr(F17, F17); // 22 |
219 a->fmr(F18, F18); // 23 | 220 a->fmr(F18, F18); // 23 |
220 a->endgroup(); // 24 | 221 a->endgroup(); // 24 |
221 // ------- sector 24 ------------ | 222 // ------- sector 24 ------------ |
222 | 223 |
223 // ;; 6 | 224 // ;; 6 |
224 a->nop(); // 25 | 225 a->nop(); // 25 |
225 a->nop(); // 26 | 226 a->nop(); // 26 |
226 a->fmr(F19, F19); // 27 | 227 a->fmr(F19, F19); // 27 |
227 a->fmr(F20, F20); // 28 | 228 a->fmr(F20, F20); // 28 |
228 | 229 |
229 // ;; 7 | 230 // ;; 7 |
230 a->nop(); // 29 | 231 a->nop(); // 29 |
231 a->fmr(F21, F21); // 30 | 232 a->fmr(F21, F21); // 30 |
232 a->fmr(F22, F22); // 31 | 233 a->fmr(F22, F22); // 31 |
233 a->brnop0(); // 32 | 234 a->brnop0(); // 32 |
234 | 235 |
235 // ------- sector 32 ------------ | 236 // ------- sector 32 ------------ |
236 } | 237 } |
237 | 238 |
238 // ;; 8 | 239 // ;; 8 |
239 a->cmpdi(CCR0, R4, unroll);// 33 | 240 a->cmpdi(CCR0, R4, unroll); // 33 |
240 a->bge(CCR0, l1); // 34 | 241 a->bge(CCR0, l1); // 34 |
241 a->blr(); | 242 a->blr(); |
242 | 243 |
243 // emit code. | 244 // Emit code. |
244 void (*test2)() = (void(*)())(void *)a->emit_fd(); | 245 void (*test2)() = (void(*)())(void *)a->emit_fd(); |
245 // uint32_t *code = (uint32_t *)a->pc(); | 246 // uint32_t *code = (uint32_t *)a->pc(); |
246 | 247 |
247 Label l2; | 248 Label l2; |
248 | 249 |
380 if (UsePower6SchedulerPPC64) Unimplemented(); | 381 if (UsePower6SchedulerPPC64) Unimplemented(); |
381 } | 382 } |
382 #endif // COMPILER2 | 383 #endif // COMPILER2 |
383 | 384 |
384 void VM_Version::determine_features() { | 385 void VM_Version::determine_features() { |
385 const int code_size = (num_features+1+2*7)*BytesPerInstWord; // 7 InstWords for each call (function descriptor + blr instruction) | 386 // 7 InstWords for each call (function descriptor + blr instruction). |
387 const int code_size = (num_features+1+2*7)*BytesPerInstWord; | |
386 int features = 0; | 388 int features = 0; |
387 | 389 |
388 // create test area | 390 // create test area |
389 enum { BUFFER_SIZE = 2*4*K }; // needs to be >=2* max cache line size (cache line size can't exceed min page size) | 391 enum { BUFFER_SIZE = 2*4*K }; // Needs to be >=2* max cache line size (cache line size can't exceed min page size). |
390 char test_area[BUFFER_SIZE]; | 392 char test_area[BUFFER_SIZE]; |
391 char *mid_of_test_area = &test_area[BUFFER_SIZE>>1]; | 393 char *mid_of_test_area = &test_area[BUFFER_SIZE>>1]; |
392 | 394 |
393 // Allocate space for the code | 395 // Allocate space for the code. |
394 ResourceMark rm; | 396 ResourceMark rm; |
395 CodeBuffer cb("detect_cpu_features", code_size, 0); | 397 CodeBuffer cb("detect_cpu_features", code_size, 0); |
396 MacroAssembler* a = new MacroAssembler(&cb); | 398 MacroAssembler* a = new MacroAssembler(&cb); |
397 | 399 |
398 // emit code. | 400 // Emit code. |
399 void (*test)(address addr, uint64_t offset)=(void(*)(address addr, uint64_t offset))(void *)a->emit_fd(); | 401 void (*test)(address addr, uint64_t offset)=(void(*)(address addr, uint64_t offset))(void *)a->emit_fd(); |
400 uint32_t *code = (uint32_t *)a->pc(); | 402 uint32_t *code = (uint32_t *)a->pc(); |
401 // Don't use R0 in ldarx. | 403 // Don't use R0 in ldarx. |
402 // keep R3_ARG1 = R3 unmodified, it contains &field (see below) | 404 // Keep R3_ARG1 unmodified, it contains &field (see below). |
403 // keep R4_ARG2 = R4 unmodified, it contains offset = 0 (see below) | 405 // Keep R4_ARG2 unmodified, it contains offset = 0 (see below). |
404 a->fsqrt(F3, F4); // code[0] -> fsqrt_m | 406 a->fsqrt(F3, F4); // code[0] -> fsqrt_m |
405 a->isel(R7, R5, R6, 0); // code[1] -> isel_m | 407 a->isel(R7, R5, R6, 0); // code[1] -> isel_m |
406 a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1);// code[2] -> lxarx_m | 408 a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[2] -> lxarx_m |
407 a->cmpb(R7, R5, R6); // code[3] -> bcmp | 409 a->cmpb(R7, R5, R6); // code[3] -> bcmp |
408 //a->mftgpr(R7, F3); // code[4] -> mftgpr | 410 //a->mftgpr(R7, F3); // code[4] -> mftgpr |
409 a->popcntb(R7, R5); // code[5] -> popcntb | 411 a->popcntb(R7, R5); // code[5] -> popcntb |
410 a->popcntw(R7, R5); // code[6] -> popcntw | 412 a->popcntw(R7, R5); // code[6] -> popcntw |
411 a->fcfids(F3, F4); // code[7] -> fcfids | 413 a->fcfids(F3, F4); // code[7] -> fcfids |
412 a->vand(VR0, VR0, VR0); // code[8] -> vand | 414 a->vand(VR0, VR0, VR0); // code[8] -> vand |
413 a->blr(); | 415 a->blr(); |
414 | 416 |
415 // Emit function to set one cache line to zero | 417 // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it. |
416 void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->emit_fd(); // emit function descriptor and get pointer to it | 418 void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->emit_fd(); |
417 a->dcbz(R3_ARG1); // R3_ARG1 = R3 = addr | 419 a->dcbz(R3_ARG1); // R3_ARG1 = addr |
418 a->blr(); | 420 a->blr(); |
419 | 421 |
420 uint32_t *code_end = (uint32_t *)a->pc(); | 422 uint32_t *code_end = (uint32_t *)a->pc(); |
421 a->flush(); | 423 a->flush(); |
422 | 424 |
426 tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " before execution:", code); | 428 tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " before execution:", code); |
427 Disassembler::decode((u_char*)code, (u_char*)code_end, tty); | 429 Disassembler::decode((u_char*)code, (u_char*)code_end, tty); |
428 } | 430 } |
429 | 431 |
430 // Measure cache line size. | 432 // Measure cache line size. |
431 memset(test_area, 0xFF, BUFFER_SIZE); // fill test area with 0xFF | 433 memset(test_area, 0xFF, BUFFER_SIZE); // Fill test area with 0xFF. |
432 (*zero_cacheline_func_ptr)(mid_of_test_area); // call function which executes dcbz to the middle | 434 (*zero_cacheline_func_ptr)(mid_of_test_area); // Call function which executes dcbz to the middle. |
433 int count = 0; // count zeroed bytes | 435 int count = 0; // count zeroed bytes |
434 for (int i = 0; i < BUFFER_SIZE; i++) if (test_area[i] == 0) count++; | 436 for (int i = 0; i < BUFFER_SIZE; i++) if (test_area[i] == 0) count++; |
435 guarantee(is_power_of_2(count), "cache line size needs to be a power of 2"); | 437 guarantee(is_power_of_2(count), "cache line size needs to be a power of 2"); |
436 _measured_cache_line_size = count; | 438 _measured_cache_line_size = count; |
437 | 439 |