Mercurial > hg > graal-jvmci-8
comparison src/cpu/x86/vm/macroAssembler_x86.cpp @ 8873:e961c11b85fe
8011102: Clear AVX registers after return from JNI call
Summary: Execute vzeroupper instruction after JNI call and on exits in jit compiled code which use 256bit vectors.
Reviewed-by: roland
author | kvn |
---|---|
date | Wed, 03 Apr 2013 11:12:57 -0700 |
parents | a5de0cc2f91c |
children | b9a918201d47 b800986664f4 |
comparison
equal
deleted
inserted
replaced
8872:53028d751155 | 8873:e961c11b85fe |
---|---|
4763 bind(L); | 4763 bind(L); |
4764 } | 4764 } |
4765 pop_CPU_state(); | 4765 pop_CPU_state(); |
4766 } | 4766 } |
4767 | 4767 |
4768 void MacroAssembler::restore_cpu_control_state_after_jni() { | |
4769 // Either restore the MXCSR register after returning from the JNI Call | |
4770 // or verify that it wasn't changed (with -Xcheck:jni flag). | |
4771 if (VM_Version::supports_sse()) { | |
4772 if (RestoreMXCSROnJNICalls) { | |
4773 ldmxcsr(ExternalAddress(StubRoutines::addr_mxcsr_std())); | |
4774 } else if (CheckJNICalls) { | |
4775 call(RuntimeAddress(StubRoutines::x86::verify_mxcsr_entry())); | |
4776 } | |
4777 } | |
4778 if (VM_Version::supports_avx()) { | |
4779 // Clear upper bits of YMM registers to avoid SSE <-> AVX transition penalty. | |
4780 vzeroupper(); | |
4781 } | |
4782 | |
4783 #ifndef _LP64 | |
4784 // Either restore the x87 floating pointer control word after returning | |
4785 // from the JNI call or verify that it wasn't changed. | |
4786 if (CheckJNICalls) { | |
4787 call(RuntimeAddress(StubRoutines::x86::verify_fpu_cntrl_wrd_entry())); | |
4788 } | |
4789 #endif // _LP64 | |
4790 } | |
4791 | |
4792 | |
4768 void MacroAssembler::load_klass(Register dst, Register src) { | 4793 void MacroAssembler::load_klass(Register dst, Register src) { |
4769 #ifdef _LP64 | 4794 #ifdef _LP64 |
4770 if (UseCompressedKlassPointers) { | 4795 if (UseCompressedKlassPointers) { |
4771 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); | 4796 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); |
4772 decode_klass_not_null(dst); | 4797 decode_klass_not_null(dst); |
5757 vptest(vec1, vec1); | 5782 vptest(vec1, vec1); |
5758 jccb(Assembler::notZero, VECTOR_NOT_EQUAL); | 5783 jccb(Assembler::notZero, VECTOR_NOT_EQUAL); |
5759 addptr(result, stride2); | 5784 addptr(result, stride2); |
5760 subl(cnt2, stride2); | 5785 subl(cnt2, stride2); |
5761 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP); | 5786 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP); |
5787 // clean upper bits of YMM registers | |
5788 vzeroupper(); | |
5762 | 5789 |
5763 // compare wide vectors tail | 5790 // compare wide vectors tail |
5764 bind(COMPARE_WIDE_TAIL); | 5791 bind(COMPARE_WIDE_TAIL); |
5765 testptr(result, result); | 5792 testptr(result, result); |
5766 jccb(Assembler::zero, LENGTH_DIFF_LABEL); | 5793 jccb(Assembler::zero, LENGTH_DIFF_LABEL); |
5770 negptr(result); | 5797 negptr(result); |
5771 jmpb(COMPARE_WIDE_VECTORS_LOOP); | 5798 jmpb(COMPARE_WIDE_VECTORS_LOOP); |
5772 | 5799 |
5773 // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors. | 5800 // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors. |
5774 bind(VECTOR_NOT_EQUAL); | 5801 bind(VECTOR_NOT_EQUAL); |
5802 // clean upper bits of YMM registers | |
5803 vzeroupper(); | |
5775 lea(str1, Address(str1, result, scale)); | 5804 lea(str1, Address(str1, result, scale)); |
5776 lea(str2, Address(str2, result, scale)); | 5805 lea(str2, Address(str2, result, scale)); |
5777 jmp(COMPARE_16_CHARS); | 5806 jmp(COMPARE_16_CHARS); |
5778 | 5807 |
5779 // Compare tail chars, length between 1 to 15 chars | 5808 // Compare tail chars, length between 1 to 15 chars |
6026 bind(FALSE_LABEL); | 6055 bind(FALSE_LABEL); |
6027 xorl(result, result); // return false | 6056 xorl(result, result); // return false |
6028 | 6057 |
6029 // That's it | 6058 // That's it |
6030 bind(DONE); | 6059 bind(DONE); |
6060 if (UseAVX >= 2) { | |
6061 // clean upper bits of YMM registers | |
6062 vzeroupper(); | |
6063 } | |
6031 } | 6064 } |
6032 | 6065 |
6033 void MacroAssembler::generate_fill(BasicType t, bool aligned, | 6066 void MacroAssembler::generate_fill(BasicType t, bool aligned, |
6034 Register to, Register value, Register count, | 6067 Register to, Register value, Register count, |
6035 Register rtmp, XMMRegister xtmp) { | 6068 Register rtmp, XMMRegister xtmp) { |
6155 addl(count, 8 << shift); | 6188 addl(count, 8 << shift); |
6156 jccb(Assembler::less, L_check_fill_8_bytes); | 6189 jccb(Assembler::less, L_check_fill_8_bytes); |
6157 vmovdqu(Address(to, 0), xtmp); | 6190 vmovdqu(Address(to, 0), xtmp); |
6158 addptr(to, 32); | 6191 addptr(to, 32); |
6159 subl(count, 8 << shift); | 6192 subl(count, 8 << shift); |
6193 | |
6194 BIND(L_check_fill_8_bytes); | |
6195 // clean upper bits of YMM registers | |
6196 vzeroupper(); | |
6160 } else { | 6197 } else { |
6161 // Fill 32-byte chunks | 6198 // Fill 32-byte chunks |
6162 pshufd(xtmp, xtmp, 0); | 6199 pshufd(xtmp, xtmp, 0); |
6163 | 6200 |
6164 subl(count, 8 << shift); | 6201 subl(count, 8 << shift); |
6178 } | 6215 } |
6179 | 6216 |
6180 addptr(to, 32); | 6217 addptr(to, 32); |
6181 subl(count, 8 << shift); | 6218 subl(count, 8 << shift); |
6182 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); | 6219 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); |
6220 | |
6221 BIND(L_check_fill_8_bytes); | |
6183 } | 6222 } |
6184 BIND(L_check_fill_8_bytes); | |
6185 addl(count, 8 << shift); | 6223 addl(count, 8 << shift); |
6186 jccb(Assembler::zero, L_exit); | 6224 jccb(Assembler::zero, L_exit); |
6187 jmpb(L_fill_8_bytes); | 6225 jmpb(L_fill_8_bytes); |
6188 | 6226 |
6189 // | 6227 // |
6314 bind(L_chars_16_check); | 6352 bind(L_chars_16_check); |
6315 addptr(len, 16); | 6353 addptr(len, 16); |
6316 jccb(Assembler::lessEqual, L_copy_16_chars); | 6354 jccb(Assembler::lessEqual, L_copy_16_chars); |
6317 | 6355 |
6318 bind(L_copy_16_chars_exit); | 6356 bind(L_copy_16_chars_exit); |
6357 if (UseAVX >= 2) { | |
6358 // clean upper bits of YMM registers | |
6359 vzeroupper(); | |
6360 } | |
6319 subptr(len, 8); | 6361 subptr(len, 8); |
6320 jccb(Assembler::greater, L_copy_8_chars_exit); | 6362 jccb(Assembler::greater, L_copy_8_chars_exit); |
6321 | 6363 |
6322 bind(L_copy_8_chars); | 6364 bind(L_copy_8_chars); |
6323 movdqu(tmp3Reg, Address(src, len, Address::times_2, -16)); | 6365 movdqu(tmp3Reg, Address(src, len, Address::times_2, -16)); |