comparison src/cpu/x86/vm/assembler_x86.cpp @ 6792:137868b7aa6f

7196199: java/text/Bidi/Bug6665028.java failed: Bidi run count incorrect Summary: Save whole XMM/YMM registers in safepoint interrupt handler. Reviewed-by: roland, twisti
author kvn
date Mon, 17 Sep 2012 19:39:07 -0700
parents da91efe96a93
children d8ce2825b193 8e47bac5643a
comparison
equal deleted inserted replaced
6791:8d3cc6612bd1 6792:137868b7aa6f
3494 // 0x00 - insert into lower 128 bits 3494 // 0x00 - insert into lower 128 bits
3495 // 0x01 - insert into upper 128 bits 3495 // 0x01 - insert into upper 128 bits
3496 emit_byte(0x01); 3496 emit_byte(0x01);
3497 } 3497 }
3498 3498
3499 void Assembler::vinsertf128h(XMMRegister dst, Address src) {
3500 assert(VM_Version::supports_avx(), "");
3501 InstructionMark im(this);
3502 bool vector256 = true;
3503 assert(dst != xnoreg, "sanity");
3504 int dst_enc = dst->encoding();
3505 // swap src<->dst for encoding
3506 vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3507 emit_byte(0x18);
3508 emit_operand(dst, src);
3509 // 0x01 - insert into upper 128 bits
3510 emit_byte(0x01);
3511 }
3512
3513 void Assembler::vextractf128h(Address dst, XMMRegister src) {
3514 assert(VM_Version::supports_avx(), "");
3515 InstructionMark im(this);
3516 bool vector256 = true;
3517 assert(src != xnoreg, "sanity");
3518 int src_enc = src->encoding();
3519 vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3520 emit_byte(0x19);
3521 emit_operand(src, dst);
3522 // 0x01 - extract from upper 128 bits
3523 emit_byte(0x01);
3524 }
3525
3499 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3526 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3500 assert(VM_Version::supports_avx2(), ""); 3527 assert(VM_Version::supports_avx2(), "");
3501 bool vector256 = true; 3528 bool vector256 = true;
3502 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A); 3529 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
3503 emit_byte(0x38); 3530 emit_byte(0x38);
3504 emit_byte(0xC0 | encode); 3531 emit_byte(0xC0 | encode);
3505 // 0x00 - insert into lower 128 bits 3532 // 0x00 - insert into lower 128 bits
3506 // 0x01 - insert into upper 128 bits 3533 // 0x01 - insert into upper 128 bits
3534 emit_byte(0x01);
3535 }
3536
3537 void Assembler::vinserti128h(XMMRegister dst, Address src) {
3538 assert(VM_Version::supports_avx2(), "");
3539 InstructionMark im(this);
3540 bool vector256 = true;
3541 assert(dst != xnoreg, "sanity");
3542 int dst_enc = dst->encoding();
3543 // swap src<->dst for encoding
3544 vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3545 emit_byte(0x38);
3546 emit_operand(dst, src);
3547 // 0x01 - insert into upper 128 bits
3548 emit_byte(0x01);
3549 }
3550
3551 void Assembler::vextracti128h(Address dst, XMMRegister src) {
3552 assert(VM_Version::supports_avx2(), "");
3553 InstructionMark im(this);
3554 bool vector256 = true;
3555 assert(src != xnoreg, "sanity");
3556 int src_enc = src->encoding();
3557 vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3558 emit_byte(0x39);
3559 emit_operand(src, dst);
3560 // 0x01 - extract from upper 128 bits
3507 emit_byte(0x01); 3561 emit_byte(0x01);
3508 } 3562 }
3509 3563
3510 void Assembler::vzeroupper() { 3564 void Assembler::vzeroupper() {
3511 assert(VM_Version::supports_avx(), ""); 3565 assert(VM_Version::supports_avx(), "");
8905 8959
8906 void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) { 8960 void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) {
8907 pusha(); 8961 pusha();
8908 8962
8909 // if we are coming from c1, xmm registers may be live 8963 // if we are coming from c1, xmm registers may be live
8910 if (UseSSE >= 1) {
8911 subptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8));
8912 }
8913 int off = 0; 8964 int off = 0;
8914 if (UseSSE == 1) { 8965 if (UseSSE == 1) {
8966 subptr(rsp, sizeof(jdouble)*8);
8915 movflt(Address(rsp,off++*sizeof(jdouble)),xmm0); 8967 movflt(Address(rsp,off++*sizeof(jdouble)),xmm0);
8916 movflt(Address(rsp,off++*sizeof(jdouble)),xmm1); 8968 movflt(Address(rsp,off++*sizeof(jdouble)),xmm1);
8917 movflt(Address(rsp,off++*sizeof(jdouble)),xmm2); 8969 movflt(Address(rsp,off++*sizeof(jdouble)),xmm2);
8918 movflt(Address(rsp,off++*sizeof(jdouble)),xmm3); 8970 movflt(Address(rsp,off++*sizeof(jdouble)),xmm3);
8919 movflt(Address(rsp,off++*sizeof(jdouble)),xmm4); 8971 movflt(Address(rsp,off++*sizeof(jdouble)),xmm4);
8920 movflt(Address(rsp,off++*sizeof(jdouble)),xmm5); 8972 movflt(Address(rsp,off++*sizeof(jdouble)),xmm5);
8921 movflt(Address(rsp,off++*sizeof(jdouble)),xmm6); 8973 movflt(Address(rsp,off++*sizeof(jdouble)),xmm6);
8922 movflt(Address(rsp,off++*sizeof(jdouble)),xmm7); 8974 movflt(Address(rsp,off++*sizeof(jdouble)),xmm7);
8923 } else if (UseSSE >= 2) { 8975 } else if (UseSSE >= 2) {
8924 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm0); 8976 #ifdef COMPILER2
8925 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm1); 8977 if (MaxVectorSize > 16) {
8926 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm2); 8978 assert(UseAVX > 0, "256bit vectors are supported only with AVX");
8927 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm3); 8979 // Save upper half of YMM registes
8928 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm4); 8980 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
8929 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm5); 8981 vextractf128h(Address(rsp, 0),xmm0);
8930 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm6); 8982 vextractf128h(Address(rsp, 16),xmm1);
8931 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm7); 8983 vextractf128h(Address(rsp, 32),xmm2);
8984 vextractf128h(Address(rsp, 48),xmm3);
8985 vextractf128h(Address(rsp, 64),xmm4);
8986 vextractf128h(Address(rsp, 80),xmm5);
8987 vextractf128h(Address(rsp, 96),xmm6);
8988 vextractf128h(Address(rsp,112),xmm7);
8932 #ifdef _LP64 8989 #ifdef _LP64
8933 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm8); 8990 vextractf128h(Address(rsp,128),xmm8);
8934 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm9); 8991 vextractf128h(Address(rsp,144),xmm9);
8935 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm10); 8992 vextractf128h(Address(rsp,160),xmm10);
8936 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm11); 8993 vextractf128h(Address(rsp,176),xmm11);
8937 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm12); 8994 vextractf128h(Address(rsp,192),xmm12);
8938 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm13); 8995 vextractf128h(Address(rsp,208),xmm13);
8939 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm14); 8996 vextractf128h(Address(rsp,224),xmm14);
8940 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm15); 8997 vextractf128h(Address(rsp,240),xmm15);
8998 #endif
8999 }
9000 #endif
9001 // Save whole 128bit (16 bytes) XMM regiters
9002 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
9003 movdqu(Address(rsp,off++*16),xmm0);
9004 movdqu(Address(rsp,off++*16),xmm1);
9005 movdqu(Address(rsp,off++*16),xmm2);
9006 movdqu(Address(rsp,off++*16),xmm3);
9007 movdqu(Address(rsp,off++*16),xmm4);
9008 movdqu(Address(rsp,off++*16),xmm5);
9009 movdqu(Address(rsp,off++*16),xmm6);
9010 movdqu(Address(rsp,off++*16),xmm7);
9011 #ifdef _LP64
9012 movdqu(Address(rsp,off++*16),xmm8);
9013 movdqu(Address(rsp,off++*16),xmm9);
9014 movdqu(Address(rsp,off++*16),xmm10);
9015 movdqu(Address(rsp,off++*16),xmm11);
9016 movdqu(Address(rsp,off++*16),xmm12);
9017 movdqu(Address(rsp,off++*16),xmm13);
9018 movdqu(Address(rsp,off++*16),xmm14);
9019 movdqu(Address(rsp,off++*16),xmm15);
8941 #endif 9020 #endif
8942 } 9021 }
8943 9022
8944 // Preserve registers across runtime call 9023 // Preserve registers across runtime call
8945 int incoming_argument_and_return_value_offset = -1; 9024 int incoming_argument_and_return_value_offset = -1;
9013 movflt(xmm3, Address(rsp,off++*sizeof(jdouble))); 9092 movflt(xmm3, Address(rsp,off++*sizeof(jdouble)));
9014 movflt(xmm4, Address(rsp,off++*sizeof(jdouble))); 9093 movflt(xmm4, Address(rsp,off++*sizeof(jdouble)));
9015 movflt(xmm5, Address(rsp,off++*sizeof(jdouble))); 9094 movflt(xmm5, Address(rsp,off++*sizeof(jdouble)));
9016 movflt(xmm6, Address(rsp,off++*sizeof(jdouble))); 9095 movflt(xmm6, Address(rsp,off++*sizeof(jdouble)));
9017 movflt(xmm7, Address(rsp,off++*sizeof(jdouble))); 9096 movflt(xmm7, Address(rsp,off++*sizeof(jdouble)));
9097 addptr(rsp, sizeof(jdouble)*8);
9018 } else if (UseSSE >= 2) { 9098 } else if (UseSSE >= 2) {
9019 movdbl(xmm0, Address(rsp,off++*sizeof(jdouble))); 9099 // Restore whole 128bit (16 bytes) XMM regiters
9020 movdbl(xmm1, Address(rsp,off++*sizeof(jdouble))); 9100 movdqu(xmm0, Address(rsp,off++*16));
9021 movdbl(xmm2, Address(rsp,off++*sizeof(jdouble))); 9101 movdqu(xmm1, Address(rsp,off++*16));
9022 movdbl(xmm3, Address(rsp,off++*sizeof(jdouble))); 9102 movdqu(xmm2, Address(rsp,off++*16));
9023 movdbl(xmm4, Address(rsp,off++*sizeof(jdouble))); 9103 movdqu(xmm3, Address(rsp,off++*16));
9024 movdbl(xmm5, Address(rsp,off++*sizeof(jdouble))); 9104 movdqu(xmm4, Address(rsp,off++*16));
9025 movdbl(xmm6, Address(rsp,off++*sizeof(jdouble))); 9105 movdqu(xmm5, Address(rsp,off++*16));
9026 movdbl(xmm7, Address(rsp,off++*sizeof(jdouble))); 9106 movdqu(xmm6, Address(rsp,off++*16));
9107 movdqu(xmm7, Address(rsp,off++*16));
9027 #ifdef _LP64 9108 #ifdef _LP64
9028 movdbl(xmm8, Address(rsp,off++*sizeof(jdouble))); 9109 movdqu(xmm8, Address(rsp,off++*16));
9029 movdbl(xmm9, Address(rsp,off++*sizeof(jdouble))); 9110 movdqu(xmm9, Address(rsp,off++*16));
9030 movdbl(xmm10, Address(rsp,off++*sizeof(jdouble))); 9111 movdqu(xmm10, Address(rsp,off++*16));
9031 movdbl(xmm11, Address(rsp,off++*sizeof(jdouble))); 9112 movdqu(xmm11, Address(rsp,off++*16));
9032 movdbl(xmm12, Address(rsp,off++*sizeof(jdouble))); 9113 movdqu(xmm12, Address(rsp,off++*16));
9033 movdbl(xmm13, Address(rsp,off++*sizeof(jdouble))); 9114 movdqu(xmm13, Address(rsp,off++*16));
9034 movdbl(xmm14, Address(rsp,off++*sizeof(jdouble))); 9115 movdqu(xmm14, Address(rsp,off++*16));
9035 movdbl(xmm15, Address(rsp,off++*sizeof(jdouble))); 9116 movdqu(xmm15, Address(rsp,off++*16));
9036 #endif 9117 #endif
9037 } 9118 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
9038 if (UseSSE >= 1) { 9119 #ifdef COMPILER2
9039 addptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8)); 9120 if (MaxVectorSize > 16) {
9121 // Restore upper half of YMM registes.
9122 vinsertf128h(xmm0, Address(rsp, 0));
9123 vinsertf128h(xmm1, Address(rsp, 16));
9124 vinsertf128h(xmm2, Address(rsp, 32));
9125 vinsertf128h(xmm3, Address(rsp, 48));
9126 vinsertf128h(xmm4, Address(rsp, 64));
9127 vinsertf128h(xmm5, Address(rsp, 80));
9128 vinsertf128h(xmm6, Address(rsp, 96));
9129 vinsertf128h(xmm7, Address(rsp,112));
9130 #ifdef _LP64
9131 vinsertf128h(xmm8, Address(rsp,128));
9132 vinsertf128h(xmm9, Address(rsp,144));
9133 vinsertf128h(xmm10, Address(rsp,160));
9134 vinsertf128h(xmm11, Address(rsp,176));
9135 vinsertf128h(xmm12, Address(rsp,192));
9136 vinsertf128h(xmm13, Address(rsp,208));
9137 vinsertf128h(xmm14, Address(rsp,224));
9138 vinsertf128h(xmm15, Address(rsp,240));
9139 #endif
9140 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
9141 }
9142 #endif
9040 } 9143 }
9041 popa(); 9144 popa();
9042 } 9145 }
9043 9146
9044 static const double pi_4 = 0.7853981633974483; 9147 static const double pi_4 = 0.7853981633974483;