Mercurial > hg > graal-jvmci-8
diff src/cpu/x86/vm/macroAssembler_x86.cpp @ 8873:e961c11b85fe
8011102: Clear AVX registers after return from JNI call
Summary: Execute vzeroupper instruction after JNI call and on exits in jit compiled code which use 256bit vectors.
Reviewed-by: roland
author | kvn |
---|---|
date | Wed, 03 Apr 2013 11:12:57 -0700 |
parents | a5de0cc2f91c |
children | b9a918201d47 b800986664f4 |
line wrap: on
line diff
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Apr 02 09:30:07 2013 +0200 +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp Wed Apr 03 11:12:57 2013 -0700 @@ -4765,6 +4765,31 @@ pop_CPU_state(); } +void MacroAssembler::restore_cpu_control_state_after_jni() { + // Either restore the MXCSR register after returning from the JNI Call + // or verify that it wasn't changed (with -Xcheck:jni flag). + if (VM_Version::supports_sse()) { + if (RestoreMXCSROnJNICalls) { + ldmxcsr(ExternalAddress(StubRoutines::addr_mxcsr_std())); + } else if (CheckJNICalls) { + call(RuntimeAddress(StubRoutines::x86::verify_mxcsr_entry())); + } + } + if (VM_Version::supports_avx()) { + // Clear upper bits of YMM registers to avoid SSE <-> AVX transition penalty. + vzeroupper(); + } + +#ifndef _LP64 + // Either restore the x87 floating pointer control word after returning + // from the JNI call or verify that it wasn't changed. + if (CheckJNICalls) { + call(RuntimeAddress(StubRoutines::x86::verify_fpu_cntrl_wrd_entry())); + } +#endif // _LP64 +} + + void MacroAssembler::load_klass(Register dst, Register src) { #ifdef _LP64 if (UseCompressedKlassPointers) { @@ -5759,6 +5784,8 @@ addptr(result, stride2); subl(cnt2, stride2); jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP); + // clean upper bits of YMM registers + vzeroupper(); // compare wide vectors tail bind(COMPARE_WIDE_TAIL); @@ -5772,6 +5799,8 @@ // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors. bind(VECTOR_NOT_EQUAL); + // clean upper bits of YMM registers + vzeroupper(); lea(str1, Address(str1, result, scale)); lea(str2, Address(str2, result, scale)); jmp(COMPARE_16_CHARS); @@ -6028,6 +6057,10 @@ // That's it bind(DONE); + if (UseAVX >= 2) { + // clean upper bits of YMM registers + vzeroupper(); + } } void MacroAssembler::generate_fill(BasicType t, bool aligned, @@ -6157,6 +6190,10 @@ vmovdqu(Address(to, 0), xtmp); addptr(to, 32); subl(count, 8 << shift); + + BIND(L_check_fill_8_bytes); + // clean upper bits of YMM registers + vzeroupper(); } else { // Fill 32-byte chunks pshufd(xtmp, xtmp, 0); @@ -6180,8 +6217,9 @@ addptr(to, 32); subl(count, 8 << shift); jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); + + BIND(L_check_fill_8_bytes); } - BIND(L_check_fill_8_bytes); addl(count, 8 << shift); jccb(Assembler::zero, L_exit); jmpb(L_fill_8_bytes); @@ -6316,6 +6354,10 @@ jccb(Assembler::lessEqual, L_copy_16_chars); bind(L_copy_16_chars_exit); + if (UseAVX >= 2) { + // clean upper bits of YMM registers + vzeroupper(); + } subptr(len, 8); jccb(Assembler::greater, L_copy_8_chars_exit);