comparison src/cpu/x86/vm/vm_version_x86_32.cpp @ 405:2649e5276dd7

6532536: Optimize arraycopy stubs for Intel cpus Summary: Use SSE2 movdqu in arraycopy stubs on newest Intel's cpus Reviewed-by: rasbold
author kvn
date Tue, 14 Oct 2008 15:10:26 -0700
parents dc7f315e41f7
children
comparison
equal deleted inserted replaced
404:78c058bc5cdc 405:2649e5276dd7
240 } 240 }
241 } 241 }
242 _supports_cx8 = supports_cmpxchg8(); 242 _supports_cx8 = supports_cmpxchg8();
243 // if the OS doesn't support SSE, we can't use this feature even if the HW does 243 // if the OS doesn't support SSE, we can't use this feature even if the HW does
244 if( !os::supports_sse()) 244 if( !os::supports_sse())
245 _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4|CPU_SSE4A); 245 _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
246 if (UseSSE < 4) 246 if (UseSSE < 4) {
247 _cpuFeatures &= ~CPU_SSE4; 247 _cpuFeatures &= ~CPU_SSE4_1;
248 _cpuFeatures &= ~CPU_SSE4_2;
249 }
248 if (UseSSE < 3) { 250 if (UseSSE < 3) {
249 _cpuFeatures &= ~CPU_SSE3; 251 _cpuFeatures &= ~CPU_SSE3;
250 _cpuFeatures &= ~CPU_SSSE3; 252 _cpuFeatures &= ~CPU_SSSE3;
251 _cpuFeatures &= ~CPU_SSE4A; 253 _cpuFeatures &= ~CPU_SSE4A;
252 } 254 }
259 // HT processor could be installed on a system which doesn't support HT. 261 // HT processor could be installed on a system which doesn't support HT.
260 _cpuFeatures &= ~CPU_HT; 262 _cpuFeatures &= ~CPU_HT;
261 } 263 }
262 264
263 char buf[256]; 265 char buf[256];
264 jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 266 jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
265 cores_per_cpu(), threads_per_core(), 267 cores_per_cpu(), threads_per_core(),
266 cpu_family(), _model, _stepping, 268 cpu_family(), _model, _stepping,
267 (supports_cmov() ? ", cmov" : ""), 269 (supports_cmov() ? ", cmov" : ""),
268 (supports_cmpxchg8() ? ", cx8" : ""), 270 (supports_cmpxchg8() ? ", cx8" : ""),
269 (supports_fxsr() ? ", fxsr" : ""), 271 (supports_fxsr() ? ", fxsr" : ""),
270 (supports_mmx() ? ", mmx" : ""), 272 (supports_mmx() ? ", mmx" : ""),
271 (supports_sse() ? ", sse" : ""), 273 (supports_sse() ? ", sse" : ""),
272 (supports_sse2() ? ", sse2" : ""), 274 (supports_sse2() ? ", sse2" : ""),
273 (supports_sse3() ? ", sse3" : ""), 275 (supports_sse3() ? ", sse3" : ""),
274 (supports_ssse3()? ", ssse3": ""), 276 (supports_ssse3()? ", ssse3": ""),
275 (supports_sse4() ? ", sse4" : ""), 277 (supports_sse4_1() ? ", sse4.1" : ""),
278 (supports_sse4_2() ? ", sse4.2" : ""),
276 (supports_mmx_ext() ? ", mmxext" : ""), 279 (supports_mmx_ext() ? ", mmxext" : ""),
277 (supports_3dnow() ? ", 3dnow" : ""), 280 (supports_3dnow() ? ", 3dnow" : ""),
278 (supports_3dnow2() ? ", 3dnowext" : ""), 281 (supports_3dnow2() ? ", 3dnowext" : ""),
279 (supports_sse4a() ? ", sse4a": ""), 282 (supports_sse4a() ? ", sse4a": ""),
280 (supports_ht() ? ", ht": "")); 283 (supports_ht() ? ", ht": ""));
283 // UseSSE is set to the smaller of what hardware supports and what 286 // UseSSE is set to the smaller of what hardware supports and what
284 // the command line requires. I.e., you cannot set UseSSE to 2 on 287 // the command line requires. I.e., you cannot set UseSSE to 2 on
285 // older Pentiums which do not support it. 288 // older Pentiums which do not support it.
286 if( UseSSE > 4 ) UseSSE=4; 289 if( UseSSE > 4 ) UseSSE=4;
287 if( UseSSE < 0 ) UseSSE=0; 290 if( UseSSE < 0 ) UseSSE=0;
288 if( !supports_sse4() ) // Drop to 3 if no SSE4 support 291 if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
289 UseSSE = MIN2((intx)3,UseSSE); 292 UseSSE = MIN2((intx)3,UseSSE);
290 if( !supports_sse3() ) // Drop to 2 if no SSE3 support 293 if( !supports_sse3() ) // Drop to 2 if no SSE3 support
291 UseSSE = MIN2((intx)2,UseSSE); 294 UseSSE = MIN2((intx)2,UseSSE);
292 if( !supports_sse2() ) // Drop to 1 if no SSE2 support 295 if( !supports_sse2() ) // Drop to 1 if no SSE2 support
293 UseSSE = MIN2((intx)1,UseSSE); 296 UseSSE = MIN2((intx)1,UseSSE);
373 // generated NOP instructions. 11 is the largest size of one 376 // generated NOP instructions. 11 is the largest size of one
374 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 377 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
375 MaxLoopPad = 11; 378 MaxLoopPad = 11;
376 } 379 }
377 #endif // COMPILER2 380 #endif // COMPILER2
381 if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
382 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
383 }
384 if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus
385 if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
386 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
387 }
388 }
378 } 389 }
379 } 390 }
380 391
381 assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); 392 assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
382 assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); 393 assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
411 } 422 }
412 assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); 423 assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
413 424
414 #ifndef PRODUCT 425 #ifndef PRODUCT
415 if (PrintMiscellaneous && Verbose) { 426 if (PrintMiscellaneous && Verbose) {
416 tty->print_cr("Logical CPUs per package: %u", 427 tty->print_cr("Logical CPUs per core: %u",
417 logical_processors_per_package()); 428 logical_processors_per_package());
418 tty->print_cr("UseSSE=%d",UseSSE); 429 tty->print_cr("UseSSE=%d",UseSSE);
419 tty->print("Allocation: "); 430 tty->print("Allocation: ");
420 if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow()) { 431 if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow()) {
421 tty->print_cr("no prefetching"); 432 tty->print_cr("no prefetching");