Mercurial > hg > truffle
comparison src/cpu/x86/vm/assembler_x86.cpp @ 6792:137868b7aa6f
7196199: java/text/Bidi/Bug6665028.java failed: Bidi run count incorrect
Summary: Save whole XMM/YMM registers in safepoint interrupt handler.
Reviewed-by: roland, twisti
author | kvn |
---|---|
date | Mon, 17 Sep 2012 19:39:07 -0700 |
parents | da91efe96a93 |
children | d8ce2825b193 8e47bac5643a |
comparison
equal
deleted
inserted
replaced
6791:8d3cc6612bd1 | 6792:137868b7aa6f |
---|---|
3494 // 0x00 - insert into lower 128 bits | 3494 // 0x00 - insert into lower 128 bits |
3495 // 0x01 - insert into upper 128 bits | 3495 // 0x01 - insert into upper 128 bits |
3496 emit_byte(0x01); | 3496 emit_byte(0x01); |
3497 } | 3497 } |
3498 | 3498 |
3499 void Assembler::vinsertf128h(XMMRegister dst, Address src) { | |
3500 assert(VM_Version::supports_avx(), ""); | |
3501 InstructionMark im(this); | |
3502 bool vector256 = true; | |
3503 assert(dst != xnoreg, "sanity"); | |
3504 int dst_enc = dst->encoding(); | |
3505 // swap src<->dst for encoding | |
3506 vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); | |
3507 emit_byte(0x18); | |
3508 emit_operand(dst, src); | |
3509 // 0x01 - insert into upper 128 bits | |
3510 emit_byte(0x01); | |
3511 } | |
3512 | |
3513 void Assembler::vextractf128h(Address dst, XMMRegister src) { | |
3514 assert(VM_Version::supports_avx(), ""); | |
3515 InstructionMark im(this); | |
3516 bool vector256 = true; | |
3517 assert(src != xnoreg, "sanity"); | |
3518 int src_enc = src->encoding(); | |
3519 vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); | |
3520 emit_byte(0x19); | |
3521 emit_operand(src, dst); | |
3522 // 0x01 - extract from upper 128 bits | |
3523 emit_byte(0x01); | |
3524 } | |
3525 | |
3499 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { | 3526 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { |
3500 assert(VM_Version::supports_avx2(), ""); | 3527 assert(VM_Version::supports_avx2(), ""); |
3501 bool vector256 = true; | 3528 bool vector256 = true; |
3502 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A); | 3529 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A); |
3503 emit_byte(0x38); | 3530 emit_byte(0x38); |
3504 emit_byte(0xC0 | encode); | 3531 emit_byte(0xC0 | encode); |
3505 // 0x00 - insert into lower 128 bits | 3532 // 0x00 - insert into lower 128 bits |
3506 // 0x01 - insert into upper 128 bits | 3533 // 0x01 - insert into upper 128 bits |
3534 emit_byte(0x01); | |
3535 } | |
3536 | |
3537 void Assembler::vinserti128h(XMMRegister dst, Address src) { | |
3538 assert(VM_Version::supports_avx2(), ""); | |
3539 InstructionMark im(this); | |
3540 bool vector256 = true; | |
3541 assert(dst != xnoreg, "sanity"); | |
3542 int dst_enc = dst->encoding(); | |
3543 // swap src<->dst for encoding | |
3544 vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); | |
3545 emit_byte(0x38); | |
3546 emit_operand(dst, src); | |
3547 // 0x01 - insert into upper 128 bits | |
3548 emit_byte(0x01); | |
3549 } | |
3550 | |
3551 void Assembler::vextracti128h(Address dst, XMMRegister src) { | |
3552 assert(VM_Version::supports_avx2(), ""); | |
3553 InstructionMark im(this); | |
3554 bool vector256 = true; | |
3555 assert(src != xnoreg, "sanity"); | |
3556 int src_enc = src->encoding(); | |
3557 vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); | |
3558 emit_byte(0x39); | |
3559 emit_operand(src, dst); | |
3560 // 0x01 - extract from upper 128 bits | |
3507 emit_byte(0x01); | 3561 emit_byte(0x01); |
3508 } | 3562 } |
3509 | 3563 |
3510 void Assembler::vzeroupper() { | 3564 void Assembler::vzeroupper() { |
3511 assert(VM_Version::supports_avx(), ""); | 3565 assert(VM_Version::supports_avx(), ""); |
8905 | 8959 |
8906 void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) { | 8960 void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) { |
8907 pusha(); | 8961 pusha(); |
8908 | 8962 |
8909 // if we are coming from c1, xmm registers may be live | 8963 // if we are coming from c1, xmm registers may be live |
8910 if (UseSSE >= 1) { | |
8911 subptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8)); | |
8912 } | |
8913 int off = 0; | 8964 int off = 0; |
8914 if (UseSSE == 1) { | 8965 if (UseSSE == 1) { |
8966 subptr(rsp, sizeof(jdouble)*8); | |
8915 movflt(Address(rsp,off++*sizeof(jdouble)),xmm0); | 8967 movflt(Address(rsp,off++*sizeof(jdouble)),xmm0); |
8916 movflt(Address(rsp,off++*sizeof(jdouble)),xmm1); | 8968 movflt(Address(rsp,off++*sizeof(jdouble)),xmm1); |
8917 movflt(Address(rsp,off++*sizeof(jdouble)),xmm2); | 8969 movflt(Address(rsp,off++*sizeof(jdouble)),xmm2); |
8918 movflt(Address(rsp,off++*sizeof(jdouble)),xmm3); | 8970 movflt(Address(rsp,off++*sizeof(jdouble)),xmm3); |
8919 movflt(Address(rsp,off++*sizeof(jdouble)),xmm4); | 8971 movflt(Address(rsp,off++*sizeof(jdouble)),xmm4); |
8920 movflt(Address(rsp,off++*sizeof(jdouble)),xmm5); | 8972 movflt(Address(rsp,off++*sizeof(jdouble)),xmm5); |
8921 movflt(Address(rsp,off++*sizeof(jdouble)),xmm6); | 8973 movflt(Address(rsp,off++*sizeof(jdouble)),xmm6); |
8922 movflt(Address(rsp,off++*sizeof(jdouble)),xmm7); | 8974 movflt(Address(rsp,off++*sizeof(jdouble)),xmm7); |
8923 } else if (UseSSE >= 2) { | 8975 } else if (UseSSE >= 2) { |
8924 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm0); | 8976 #ifdef COMPILER2 |
8925 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm1); | 8977 if (MaxVectorSize > 16) { |
8926 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm2); | 8978 assert(UseAVX > 0, "256bit vectors are supported only with AVX"); |
8927 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm3); | 8979 // Save upper half of YMM registes |
8928 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm4); | 8980 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); |
8929 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm5); | 8981 vextractf128h(Address(rsp, 0),xmm0); |
8930 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm6); | 8982 vextractf128h(Address(rsp, 16),xmm1); |
8931 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm7); | 8983 vextractf128h(Address(rsp, 32),xmm2); |
8984 vextractf128h(Address(rsp, 48),xmm3); | |
8985 vextractf128h(Address(rsp, 64),xmm4); | |
8986 vextractf128h(Address(rsp, 80),xmm5); | |
8987 vextractf128h(Address(rsp, 96),xmm6); | |
8988 vextractf128h(Address(rsp,112),xmm7); | |
8932 #ifdef _LP64 | 8989 #ifdef _LP64 |
8933 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm8); | 8990 vextractf128h(Address(rsp,128),xmm8); |
8934 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm9); | 8991 vextractf128h(Address(rsp,144),xmm9); |
8935 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm10); | 8992 vextractf128h(Address(rsp,160),xmm10); |
8936 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm11); | 8993 vextractf128h(Address(rsp,176),xmm11); |
8937 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm12); | 8994 vextractf128h(Address(rsp,192),xmm12); |
8938 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm13); | 8995 vextractf128h(Address(rsp,208),xmm13); |
8939 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm14); | 8996 vextractf128h(Address(rsp,224),xmm14); |
8940 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm15); | 8997 vextractf128h(Address(rsp,240),xmm15); |
8998 #endif | |
8999 } | |
9000 #endif | |
9001 // Save whole 128bit (16 bytes) XMM regiters | |
9002 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); | |
9003 movdqu(Address(rsp,off++*16),xmm0); | |
9004 movdqu(Address(rsp,off++*16),xmm1); | |
9005 movdqu(Address(rsp,off++*16),xmm2); | |
9006 movdqu(Address(rsp,off++*16),xmm3); | |
9007 movdqu(Address(rsp,off++*16),xmm4); | |
9008 movdqu(Address(rsp,off++*16),xmm5); | |
9009 movdqu(Address(rsp,off++*16),xmm6); | |
9010 movdqu(Address(rsp,off++*16),xmm7); | |
9011 #ifdef _LP64 | |
9012 movdqu(Address(rsp,off++*16),xmm8); | |
9013 movdqu(Address(rsp,off++*16),xmm9); | |
9014 movdqu(Address(rsp,off++*16),xmm10); | |
9015 movdqu(Address(rsp,off++*16),xmm11); | |
9016 movdqu(Address(rsp,off++*16),xmm12); | |
9017 movdqu(Address(rsp,off++*16),xmm13); | |
9018 movdqu(Address(rsp,off++*16),xmm14); | |
9019 movdqu(Address(rsp,off++*16),xmm15); | |
8941 #endif | 9020 #endif |
8942 } | 9021 } |
8943 | 9022 |
8944 // Preserve registers across runtime call | 9023 // Preserve registers across runtime call |
8945 int incoming_argument_and_return_value_offset = -1; | 9024 int incoming_argument_and_return_value_offset = -1; |
9013 movflt(xmm3, Address(rsp,off++*sizeof(jdouble))); | 9092 movflt(xmm3, Address(rsp,off++*sizeof(jdouble))); |
9014 movflt(xmm4, Address(rsp,off++*sizeof(jdouble))); | 9093 movflt(xmm4, Address(rsp,off++*sizeof(jdouble))); |
9015 movflt(xmm5, Address(rsp,off++*sizeof(jdouble))); | 9094 movflt(xmm5, Address(rsp,off++*sizeof(jdouble))); |
9016 movflt(xmm6, Address(rsp,off++*sizeof(jdouble))); | 9095 movflt(xmm6, Address(rsp,off++*sizeof(jdouble))); |
9017 movflt(xmm7, Address(rsp,off++*sizeof(jdouble))); | 9096 movflt(xmm7, Address(rsp,off++*sizeof(jdouble))); |
9097 addptr(rsp, sizeof(jdouble)*8); | |
9018 } else if (UseSSE >= 2) { | 9098 } else if (UseSSE >= 2) { |
9019 movdbl(xmm0, Address(rsp,off++*sizeof(jdouble))); | 9099 // Restore whole 128bit (16 bytes) XMM regiters |
9020 movdbl(xmm1, Address(rsp,off++*sizeof(jdouble))); | 9100 movdqu(xmm0, Address(rsp,off++*16)); |
9021 movdbl(xmm2, Address(rsp,off++*sizeof(jdouble))); | 9101 movdqu(xmm1, Address(rsp,off++*16)); |
9022 movdbl(xmm3, Address(rsp,off++*sizeof(jdouble))); | 9102 movdqu(xmm2, Address(rsp,off++*16)); |
9023 movdbl(xmm4, Address(rsp,off++*sizeof(jdouble))); | 9103 movdqu(xmm3, Address(rsp,off++*16)); |
9024 movdbl(xmm5, Address(rsp,off++*sizeof(jdouble))); | 9104 movdqu(xmm4, Address(rsp,off++*16)); |
9025 movdbl(xmm6, Address(rsp,off++*sizeof(jdouble))); | 9105 movdqu(xmm5, Address(rsp,off++*16)); |
9026 movdbl(xmm7, Address(rsp,off++*sizeof(jdouble))); | 9106 movdqu(xmm6, Address(rsp,off++*16)); |
9107 movdqu(xmm7, Address(rsp,off++*16)); | |
9027 #ifdef _LP64 | 9108 #ifdef _LP64 |
9028 movdbl(xmm8, Address(rsp,off++*sizeof(jdouble))); | 9109 movdqu(xmm8, Address(rsp,off++*16)); |
9029 movdbl(xmm9, Address(rsp,off++*sizeof(jdouble))); | 9110 movdqu(xmm9, Address(rsp,off++*16)); |
9030 movdbl(xmm10, Address(rsp,off++*sizeof(jdouble))); | 9111 movdqu(xmm10, Address(rsp,off++*16)); |
9031 movdbl(xmm11, Address(rsp,off++*sizeof(jdouble))); | 9112 movdqu(xmm11, Address(rsp,off++*16)); |
9032 movdbl(xmm12, Address(rsp,off++*sizeof(jdouble))); | 9113 movdqu(xmm12, Address(rsp,off++*16)); |
9033 movdbl(xmm13, Address(rsp,off++*sizeof(jdouble))); | 9114 movdqu(xmm13, Address(rsp,off++*16)); |
9034 movdbl(xmm14, Address(rsp,off++*sizeof(jdouble))); | 9115 movdqu(xmm14, Address(rsp,off++*16)); |
9035 movdbl(xmm15, Address(rsp,off++*sizeof(jdouble))); | 9116 movdqu(xmm15, Address(rsp,off++*16)); |
9036 #endif | 9117 #endif |
9037 } | 9118 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); |
9038 if (UseSSE >= 1) { | 9119 #ifdef COMPILER2 |
9039 addptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8)); | 9120 if (MaxVectorSize > 16) { |
9121 // Restore upper half of YMM registes. | |
9122 vinsertf128h(xmm0, Address(rsp, 0)); | |
9123 vinsertf128h(xmm1, Address(rsp, 16)); | |
9124 vinsertf128h(xmm2, Address(rsp, 32)); | |
9125 vinsertf128h(xmm3, Address(rsp, 48)); | |
9126 vinsertf128h(xmm4, Address(rsp, 64)); | |
9127 vinsertf128h(xmm5, Address(rsp, 80)); | |
9128 vinsertf128h(xmm6, Address(rsp, 96)); | |
9129 vinsertf128h(xmm7, Address(rsp,112)); | |
9130 #ifdef _LP64 | |
9131 vinsertf128h(xmm8, Address(rsp,128)); | |
9132 vinsertf128h(xmm9, Address(rsp,144)); | |
9133 vinsertf128h(xmm10, Address(rsp,160)); | |
9134 vinsertf128h(xmm11, Address(rsp,176)); | |
9135 vinsertf128h(xmm12, Address(rsp,192)); | |
9136 vinsertf128h(xmm13, Address(rsp,208)); | |
9137 vinsertf128h(xmm14, Address(rsp,224)); | |
9138 vinsertf128h(xmm15, Address(rsp,240)); | |
9139 #endif | |
9140 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); | |
9141 } | |
9142 #endif | |
9040 } | 9143 } |
9041 popa(); | 9144 popa(); |
9042 } | 9145 } |
9043 | 9146 |
9044 static const double pi_4 = 0.7853981633974483; | 9147 static const double pi_4 = 0.7853981633974483; |