comparison src/cpu/x86/vm/macroAssembler_x86.cpp @ 8873:e961c11b85fe

8011102: Clear AVX registers after return from JNI call Summary: Execute vzeroupper instruction after JNI call and on exits in jit compiled code which use 256bit vectors. Reviewed-by: roland
author kvn
date Wed, 03 Apr 2013 11:12:57 -0700
parents a5de0cc2f91c
children b9a918201d47 b800986664f4
comparison
equal deleted inserted replaced
8872:53028d751155 8873:e961c11b85fe
4763 bind(L); 4763 bind(L);
4764 } 4764 }
4765 pop_CPU_state(); 4765 pop_CPU_state();
4766 } 4766 }
4767 4767
4768 void MacroAssembler::restore_cpu_control_state_after_jni() {
4769 // Either restore the MXCSR register after returning from the JNI Call
4770 // or verify that it wasn't changed (with -Xcheck:jni flag).
4771 if (VM_Version::supports_sse()) {
4772 if (RestoreMXCSROnJNICalls) {
4773 ldmxcsr(ExternalAddress(StubRoutines::addr_mxcsr_std()));
4774 } else if (CheckJNICalls) {
4775 call(RuntimeAddress(StubRoutines::x86::verify_mxcsr_entry()));
4776 }
4777 }
4778 if (VM_Version::supports_avx()) {
4779 // Clear upper bits of YMM registers to avoid SSE <-> AVX transition penalty.
4780 vzeroupper();
4781 }
4782
4783 #ifndef _LP64
4784 // Either restore the x87 floating pointer control word after returning
4785 // from the JNI call or verify that it wasn't changed.
4786 if (CheckJNICalls) {
4787 call(RuntimeAddress(StubRoutines::x86::verify_fpu_cntrl_wrd_entry()));
4788 }
4789 #endif // _LP64
4790 }
4791
4792
4768 void MacroAssembler::load_klass(Register dst, Register src) { 4793 void MacroAssembler::load_klass(Register dst, Register src) {
4769 #ifdef _LP64 4794 #ifdef _LP64
4770 if (UseCompressedKlassPointers) { 4795 if (UseCompressedKlassPointers) {
4771 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 4796 movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4772 decode_klass_not_null(dst); 4797 decode_klass_not_null(dst);
5757 vptest(vec1, vec1); 5782 vptest(vec1, vec1);
5758 jccb(Assembler::notZero, VECTOR_NOT_EQUAL); 5783 jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
5759 addptr(result, stride2); 5784 addptr(result, stride2);
5760 subl(cnt2, stride2); 5785 subl(cnt2, stride2);
5761 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP); 5786 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP);
5787 // clean upper bits of YMM registers
5788 vzeroupper();
5762 5789
5763 // compare wide vectors tail 5790 // compare wide vectors tail
5764 bind(COMPARE_WIDE_TAIL); 5791 bind(COMPARE_WIDE_TAIL);
5765 testptr(result, result); 5792 testptr(result, result);
5766 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 5793 jccb(Assembler::zero, LENGTH_DIFF_LABEL);
5770 negptr(result); 5797 negptr(result);
5771 jmpb(COMPARE_WIDE_VECTORS_LOOP); 5798 jmpb(COMPARE_WIDE_VECTORS_LOOP);
5772 5799
5773 // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors. 5800 // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
5774 bind(VECTOR_NOT_EQUAL); 5801 bind(VECTOR_NOT_EQUAL);
5802 // clean upper bits of YMM registers
5803 vzeroupper();
5775 lea(str1, Address(str1, result, scale)); 5804 lea(str1, Address(str1, result, scale));
5776 lea(str2, Address(str2, result, scale)); 5805 lea(str2, Address(str2, result, scale));
5777 jmp(COMPARE_16_CHARS); 5806 jmp(COMPARE_16_CHARS);
5778 5807
5779 // Compare tail chars, length between 1 to 15 chars 5808 // Compare tail chars, length between 1 to 15 chars
6026 bind(FALSE_LABEL); 6055 bind(FALSE_LABEL);
6027 xorl(result, result); // return false 6056 xorl(result, result); // return false
6028 6057
6029 // That's it 6058 // That's it
6030 bind(DONE); 6059 bind(DONE);
6060 if (UseAVX >= 2) {
6061 // clean upper bits of YMM registers
6062 vzeroupper();
6063 }
6031 } 6064 }
6032 6065
6033 void MacroAssembler::generate_fill(BasicType t, bool aligned, 6066 void MacroAssembler::generate_fill(BasicType t, bool aligned,
6034 Register to, Register value, Register count, 6067 Register to, Register value, Register count,
6035 Register rtmp, XMMRegister xtmp) { 6068 Register rtmp, XMMRegister xtmp) {
6155 addl(count, 8 << shift); 6188 addl(count, 8 << shift);
6156 jccb(Assembler::less, L_check_fill_8_bytes); 6189 jccb(Assembler::less, L_check_fill_8_bytes);
6157 vmovdqu(Address(to, 0), xtmp); 6190 vmovdqu(Address(to, 0), xtmp);
6158 addptr(to, 32); 6191 addptr(to, 32);
6159 subl(count, 8 << shift); 6192 subl(count, 8 << shift);
6193
6194 BIND(L_check_fill_8_bytes);
6195 // clean upper bits of YMM registers
6196 vzeroupper();
6160 } else { 6197 } else {
6161 // Fill 32-byte chunks 6198 // Fill 32-byte chunks
6162 pshufd(xtmp, xtmp, 0); 6199 pshufd(xtmp, xtmp, 0);
6163 6200
6164 subl(count, 8 << shift); 6201 subl(count, 8 << shift);
6178 } 6215 }
6179 6216
6180 addptr(to, 32); 6217 addptr(to, 32);
6181 subl(count, 8 << shift); 6218 subl(count, 8 << shift);
6182 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 6219 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
6220
6221 BIND(L_check_fill_8_bytes);
6183 } 6222 }
6184 BIND(L_check_fill_8_bytes);
6185 addl(count, 8 << shift); 6223 addl(count, 8 << shift);
6186 jccb(Assembler::zero, L_exit); 6224 jccb(Assembler::zero, L_exit);
6187 jmpb(L_fill_8_bytes); 6225 jmpb(L_fill_8_bytes);
6188 6226
6189 // 6227 //
6314 bind(L_chars_16_check); 6352 bind(L_chars_16_check);
6315 addptr(len, 16); 6353 addptr(len, 16);
6316 jccb(Assembler::lessEqual, L_copy_16_chars); 6354 jccb(Assembler::lessEqual, L_copy_16_chars);
6317 6355
6318 bind(L_copy_16_chars_exit); 6356 bind(L_copy_16_chars_exit);
6357 if (UseAVX >= 2) {
6358 // clean upper bits of YMM registers
6359 vzeroupper();
6360 }
6319 subptr(len, 8); 6361 subptr(len, 8);
6320 jccb(Assembler::greater, L_copy_8_chars_exit); 6362 jccb(Assembler::greater, L_copy_8_chars_exit);
6321 6363
6322 bind(L_copy_8_chars); 6364 bind(L_copy_8_chars);
6323 movdqu(tmp3Reg, Address(src, len, Address::times_2, -16)); 6365 movdqu(tmp3Reg, Address(src, len, Address::times_2, -16));