Mercurial > hg > graal-compiler
comparison src/cpu/x86/vm/vm_version_x86_32.cpp @ 405:2649e5276dd7
6532536: Optimize arraycopy stubs for Intel cpus
Summary: Use SSE2 movdqu in arraycopy stubs on newest Intel's cpus
Reviewed-by: rasbold
author | kvn |
---|---|
date | Tue, 14 Oct 2008 15:10:26 -0700 |
parents | dc7f315e41f7 |
children |
comparison
equal
deleted
inserted
replaced
404:78c058bc5cdc | 405:2649e5276dd7 |
---|---|
240 } | 240 } |
241 } | 241 } |
242 _supports_cx8 = supports_cmpxchg8(); | 242 _supports_cx8 = supports_cmpxchg8(); |
243 // if the OS doesn't support SSE, we can't use this feature even if the HW does | 243 // if the OS doesn't support SSE, we can't use this feature even if the HW does |
244 if( !os::supports_sse()) | 244 if( !os::supports_sse()) |
245 _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4|CPU_SSE4A); | 245 _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2); |
246 if (UseSSE < 4) | 246 if (UseSSE < 4) { |
247 _cpuFeatures &= ~CPU_SSE4; | 247 _cpuFeatures &= ~CPU_SSE4_1; |
248 _cpuFeatures &= ~CPU_SSE4_2; | |
249 } | |
248 if (UseSSE < 3) { | 250 if (UseSSE < 3) { |
249 _cpuFeatures &= ~CPU_SSE3; | 251 _cpuFeatures &= ~CPU_SSE3; |
250 _cpuFeatures &= ~CPU_SSSE3; | 252 _cpuFeatures &= ~CPU_SSSE3; |
251 _cpuFeatures &= ~CPU_SSE4A; | 253 _cpuFeatures &= ~CPU_SSE4A; |
252 } | 254 } |
259 // HT processor could be installed on a system which doesn't support HT. | 261 // HT processor could be installed on a system which doesn't support HT. |
260 _cpuFeatures &= ~CPU_HT; | 262 _cpuFeatures &= ~CPU_HT; |
261 } | 263 } |
262 | 264 |
263 char buf[256]; | 265 char buf[256]; |
264 jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s", | 266 jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", |
265 cores_per_cpu(), threads_per_core(), | 267 cores_per_cpu(), threads_per_core(), |
266 cpu_family(), _model, _stepping, | 268 cpu_family(), _model, _stepping, |
267 (supports_cmov() ? ", cmov" : ""), | 269 (supports_cmov() ? ", cmov" : ""), |
268 (supports_cmpxchg8() ? ", cx8" : ""), | 270 (supports_cmpxchg8() ? ", cx8" : ""), |
269 (supports_fxsr() ? ", fxsr" : ""), | 271 (supports_fxsr() ? ", fxsr" : ""), |
270 (supports_mmx() ? ", mmx" : ""), | 272 (supports_mmx() ? ", mmx" : ""), |
271 (supports_sse() ? ", sse" : ""), | 273 (supports_sse() ? ", sse" : ""), |
272 (supports_sse2() ? ", sse2" : ""), | 274 (supports_sse2() ? ", sse2" : ""), |
273 (supports_sse3() ? ", sse3" : ""), | 275 (supports_sse3() ? ", sse3" : ""), |
274 (supports_ssse3()? ", ssse3": ""), | 276 (supports_ssse3()? ", ssse3": ""), |
275 (supports_sse4() ? ", sse4" : ""), | 277 (supports_sse4_1() ? ", sse4.1" : ""), |
278 (supports_sse4_2() ? ", sse4.2" : ""), | |
276 (supports_mmx_ext() ? ", mmxext" : ""), | 279 (supports_mmx_ext() ? ", mmxext" : ""), |
277 (supports_3dnow() ? ", 3dnow" : ""), | 280 (supports_3dnow() ? ", 3dnow" : ""), |
278 (supports_3dnow2() ? ", 3dnowext" : ""), | 281 (supports_3dnow2() ? ", 3dnowext" : ""), |
279 (supports_sse4a() ? ", sse4a": ""), | 282 (supports_sse4a() ? ", sse4a": ""), |
280 (supports_ht() ? ", ht": "")); | 283 (supports_ht() ? ", ht": "")); |
283 // UseSSE is set to the smaller of what hardware supports and what | 286 // UseSSE is set to the smaller of what hardware supports and what |
284 // the command line requires. I.e., you cannot set UseSSE to 2 on | 287 // the command line requires. I.e., you cannot set UseSSE to 2 on |
285 // older Pentiums which do not support it. | 288 // older Pentiums which do not support it. |
286 if( UseSSE > 4 ) UseSSE=4; | 289 if( UseSSE > 4 ) UseSSE=4; |
287 if( UseSSE < 0 ) UseSSE=0; | 290 if( UseSSE < 0 ) UseSSE=0; |
288 if( !supports_sse4() ) // Drop to 3 if no SSE4 support | 291 if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support |
289 UseSSE = MIN2((intx)3,UseSSE); | 292 UseSSE = MIN2((intx)3,UseSSE); |
290 if( !supports_sse3() ) // Drop to 2 if no SSE3 support | 293 if( !supports_sse3() ) // Drop to 2 if no SSE3 support |
291 UseSSE = MIN2((intx)2,UseSSE); | 294 UseSSE = MIN2((intx)2,UseSSE); |
292 if( !supports_sse2() ) // Drop to 1 if no SSE2 support | 295 if( !supports_sse2() ) // Drop to 1 if no SSE2 support |
293 UseSSE = MIN2((intx)1,UseSSE); | 296 UseSSE = MIN2((intx)1,UseSSE); |
373 // generated NOP instructions. 11 is the largest size of one | 376 // generated NOP instructions. 11 is the largest size of one |
374 // address NOP instruction '0F 1F' (see Assembler::nop(i)). | 377 // address NOP instruction '0F 1F' (see Assembler::nop(i)). |
375 MaxLoopPad = 11; | 378 MaxLoopPad = 11; |
376 } | 379 } |
377 #endif // COMPILER2 | 380 #endif // COMPILER2 |
381 if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { | |
382 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus | |
383 } | |
384 if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus | |
385 if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { | |
386 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus | |
387 } | |
388 } | |
378 } | 389 } |
379 } | 390 } |
380 | 391 |
381 assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); | 392 assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); |
382 assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); | 393 assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); |
411 } | 422 } |
412 assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); | 423 assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); |
413 | 424 |
414 #ifndef PRODUCT | 425 #ifndef PRODUCT |
415 if (PrintMiscellaneous && Verbose) { | 426 if (PrintMiscellaneous && Verbose) { |
416 tty->print_cr("Logical CPUs per package: %u", | 427 tty->print_cr("Logical CPUs per core: %u", |
417 logical_processors_per_package()); | 428 logical_processors_per_package()); |
418 tty->print_cr("UseSSE=%d",UseSSE); | 429 tty->print_cr("UseSSE=%d",UseSSE); |
419 tty->print("Allocation: "); | 430 tty->print("Allocation: "); |
420 if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow()) { | 431 if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow()) { |
421 tty->print_cr("no prefetching"); | 432 tty->print_cr("no prefetching"); |