# HG changeset patch # User kvn # Date 1325187470 28800 # Node ID 8940fd98d540456989d35adfeeb427658c73172d # Parent 7faca6dfa2ed2f5f14b5a9a294de07639080956f# Parent d12a66fa3820e2cc065f98992f8885f9621f7cb8 Merge diff -r 7faca6dfa2ed -r 8940fd98d540 make/bsd/makefiles/adlc.make --- a/make/bsd/makefiles/adlc.make Tue Dec 27 12:38:49 2011 -0800 +++ b/make/bsd/makefiles/adlc.make Thu Dec 29 11:37:50 2011 -0800 @@ -39,9 +39,16 @@ SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad -SOURCES.AD = \ +ifeq ("${Platform_arch_model}", "${Platform_arch}") + SOURCES.AD = \ $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) +else + SOURCES.AD = \ + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \ + $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) +endif EXEC = $(OUTDIR)/adlc diff -r 7faca6dfa2ed -r 8940fd98d540 make/linux/makefiles/adlc.make --- a/make/linux/makefiles/adlc.make Tue Dec 27 12:38:49 2011 -0800 +++ b/make/linux/makefiles/adlc.make Thu Dec 29 11:37:50 2011 -0800 @@ -39,9 +39,16 @@ SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad -SOURCES.AD = \ +ifeq ("${Platform_arch_model}", "${Platform_arch}") + SOURCES.AD = \ $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) +else + SOURCES.AD = \ + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \ + $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) +endif EXEC = $(OUTDIR)/adlc diff -r 7faca6dfa2ed -r 8940fd98d540 make/solaris/makefiles/adlc.make --- a/make/solaris/makefiles/adlc.make Tue Dec 27 12:38:49 2011 -0800 +++ b/make/solaris/makefiles/adlc.make Thu Dec 29 11:37:50 2011 -0800 @@ -40,9 +40,16 @@ SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad -SOURCES.AD = \ +ifeq ("${Platform_arch_model}", "${Platform_arch}") + SOURCES.AD = \ $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) +else + SOURCES.AD = \ + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \ + $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) +endif EXEC = $(OUTDIR)/adlc diff -r 7faca6dfa2ed -r 8940fd98d540 make/windows/makefiles/adlc.make --- a/make/windows/makefiles/adlc.make Tue Dec 27 12:38:49 2011 -0800 +++ b/make/windows/makefiles/adlc.make Thu Dec 29 11:37:50 2011 -0800 @@ -53,6 +53,17 @@ /I "$(WorkSpace)\src\os\windows\vm" \ /I "$(WorkSpace)\src\cpu\$(Platform_arch)\vm" +!if "$(Platform_arch_model)" == "$(Platform_arch)" +SOURCES_AD=\ + $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad \ + $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad +!else +SOURCES_AD=\ + $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad \ + $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch).ad \ + $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad +!endif + # NOTE! If you add any files here, you must also update GENERATED_NAMES_IN_DIR # and ProjectCreatorIDEOptions in projectcreator.make. GENERATED_NAMES=\ @@ -105,7 +116,6 @@ $(ADLC) $(ADLCFLAGS) $(Platform_arch_model).ad mv $(GENERATED_NAMES) $(AdlcOutDir)/ -$(Platform_arch_model).ad: $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad +$(Platform_arch_model).ad: $(SOURCES_AD) rm -f $(Platform_arch_model).ad - cat $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad \ - $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad >$(Platform_arch_model).ad + cat $(SOURCES_AD) >$(Platform_arch_model).ad diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/sparc/vm/assembler_sparc.cpp --- a/src/cpu/sparc/vm/assembler_sparc.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/sparc/vm/assembler_sparc.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -3036,10 +3036,8 @@ Label* L_failure, Label* L_slow_path, RegisterOrConstant super_check_offset) { - int sc_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::secondary_super_cache_offset_in_bytes()); - int sco_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::super_check_offset_offset_in_bytes()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); bool must_load_sco = (super_check_offset.constant_or_zero() == -1); bool need_slow_path = (must_load_sco || @@ -3159,10 +3157,8 @@ assert(label_nulls <= 1, "at most one NULL in the batch"); // a couple of useful fields in sub_klass: - int ss_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::secondary_supers_offset_in_bytes()); - int sc_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::secondary_super_cache_offset_in_bytes()); + int ss_offset = in_bytes(Klass::secondary_supers_offset()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); // Do a linear scan of the secondary super-klass chain. // This code is rarely used, so simplicity is a virtue here. @@ -3336,7 +3332,7 @@ cmp_and_brx_short(temp_reg, markOopDesc::biased_lock_pattern, Assembler::notEqual, Assembler::pn, cas_label); load_klass(obj_reg, temp_reg); - ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); + ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg); or3(G2_thread, temp_reg, temp_reg); xor3(mark_reg, temp_reg, temp_reg); andcc(temp_reg, ~((int) markOopDesc::age_mask_in_place), temp_reg); @@ -3413,7 +3409,7 @@ // FIXME: due to a lack of registers we currently blow away the age // bits in this situation. Should attempt to preserve them. load_klass(obj_reg, temp_reg); - ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); + ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg); or3(G2_thread, temp_reg, temp_reg); casn(mark_addr.base(), mark_reg, temp_reg); // If the biasing toward our thread failed, this means that @@ -3443,7 +3439,7 @@ // FIXME: due to a lack of registers we currently blow away the age // bits in this situation. Should attempt to preserve them. load_klass(obj_reg, temp_reg); - ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); + ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg); casn(mark_addr.base(), mark_reg, temp_reg); // Fall through to the normal CAS-based lock, because no matter what // the result of the above CAS, some thread must have succeeded in diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp --- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -302,7 +302,7 @@ assert(_obj != noreg, "must be a valid register"); assert(_oop_index >= 0, "must have oop index"); __ load_heap_oop(_obj, java_lang_Class::klass_offset_in_bytes(), G3); - __ ld_ptr(G3, instanceKlass::init_thread_offset_in_bytes() + sizeof(klassOopDesc), G3); + __ ld_ptr(G3, in_bytes(instanceKlass::init_thread_offset()), G3); __ cmp_and_brx_short(G2_thread, G3, Assembler::notEqual, Assembler::pn, call_patch); // load_klass patches may execute the patched code before it's @@ -471,7 +471,7 @@ __ load_klass(src_reg, tmp_reg); - Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset_in_bytes() + sizeof(oopDesc)); + Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset()); __ ld(ref_type_adr, tmp_reg); // _reference_type field is of type ReferenceType (enum) diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp --- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -2202,8 +2202,7 @@ } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) { __ load_klass(dst, tmp); } - int lh_offset = klassOopDesc::header_size() * HeapWordSize + - Klass::layout_helper_offset_in_bytes(); + int lh_offset = in_bytes(Klass::layout_helper_offset()); __ lduw(tmp, lh_offset, tmp2); @@ -2238,12 +2237,10 @@ __ mov(length, len); __ load_klass(dst, tmp); - int ek_offset = (klassOopDesc::header_size() * HeapWordSize + - objArrayKlass::element_klass_offset_in_bytes()); + int ek_offset = in_bytes(objArrayKlass::element_klass_offset()); __ ld_ptr(tmp, ek_offset, super_k); - int sco_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::super_check_offset_offset_in_bytes()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); __ lduw(super_k, sco_offset, chk_off); __ call_VM_leaf(tmp, copyfunc_addr); @@ -2456,7 +2453,7 @@ op->klass()->as_register() == G5, "must be"); if (op->init_check()) { __ ld(op->klass()->as_register(), - instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc), + in_bytes(instanceKlass::init_state_offset()), op->tmp1()->as_register()); add_debug_info_for_null_check_here(op->stub()->info()); __ cmp(op->tmp1()->as_register(), instanceKlass::fully_initialized); @@ -2627,7 +2624,7 @@ } else { bool need_slow_path = true; if (k->is_loaded()) { - if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()) + if ((int) k->super_check_offset() != in_bytes(Klass::secondary_super_cache_offset())) need_slow_path = false; // perform the fast part of the checking logic __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, noreg, @@ -2731,7 +2728,7 @@ __ load_klass(value, klass_RInfo); // get instance klass - __ ld_ptr(Address(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)), k_RInfo); + __ ld_ptr(Address(k_RInfo, objArrayKlass::element_klass_offset()), k_RInfo); // perform the fast part of the checking logic __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, success_target, failure_target, NULL); diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp --- a/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -181,7 +181,7 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) { assert_different_registers(obj, klass, len, t1, t2); if (UseBiasedLocking && !len->is_valid()) { - ld_ptr(klass, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes(), t1); + ld_ptr(klass, in_bytes(Klass::prototype_header_offset()), t1); } else { set((intx)markOopDesc::prototype(), t1); } @@ -252,7 +252,7 @@ #ifdef ASSERT { Label ok; - ld(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), t1); + ld(klass, in_bytes(Klass::layout_helper_offset()), t1); if (var_size_in_bytes != noreg) { cmp_and_brx_short(t1, var_size_in_bytes, Assembler::equal, Assembler::pt, ok); } else { diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/sparc/vm/c1_Runtime1_sparc.cpp --- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -398,14 +398,14 @@ if (id == fast_new_instance_init_check_id) { // make sure the klass is initialized - __ ld(G5_klass, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc), G3_t1); + __ ld(G5_klass, in_bytes(instanceKlass::init_state_offset()), G3_t1); __ cmp_and_br_short(G3_t1, instanceKlass::fully_initialized, Assembler::notEqual, Assembler::pn, slow_path); } #ifdef ASSERT // assert object can be fast path allocated { Label ok, not_ok; - __ ld(G5_klass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc), G1_obj_size); + __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size); // make sure it's an instance (LH > 0) __ cmp_and_br_short(G1_obj_size, 0, Assembler::lessEqual, Assembler::pn, not_ok); __ btst(Klass::_lh_instance_slow_path_bit, G1_obj_size); @@ -425,7 +425,7 @@ __ bind(retry_tlab); // get the instance size - __ ld(G5_klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), G1_obj_size); + __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size); __ tlab_allocate(O0_obj, G1_obj_size, 0, G3_t1, slow_path); @@ -437,7 +437,7 @@ __ bind(try_eden); // get the instance size - __ ld(G5_klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), G1_obj_size); + __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size); __ eden_allocate(O0_obj, G1_obj_size, 0, G3_t1, G4_t2, slow_path); __ incr_allocated_bytes(G1_obj_size, G3_t1, G4_t2); @@ -471,8 +471,7 @@ Register G4_length = G4; // Incoming Register O0_obj = O0; // Outgoing - Address klass_lh(G5_klass, ((klassOopDesc::header_size() * HeapWordSize) - + Klass::layout_helper_offset_in_bytes())); + Address klass_lh(G5_klass, Klass::layout_helper_offset()); assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); assert(Klass::_lh_header_size_mask == 0xFF, "bytewise"); // Use this offset to pick out an individual byte of the layout_helper: @@ -592,7 +591,7 @@ Label register_finalizer; Register t = O1; __ load_klass(O0, t); - __ ld(t, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc), t); + __ ld(t, in_bytes(Klass::access_flags_offset()), t); __ set(JVM_ACC_HAS_FINALIZER, G3); __ andcc(G3, t, G0); __ br(Assembler::notZero, false, Assembler::pt, register_finalizer); diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/sparc/vm/cppInterpreter_sparc.cpp --- a/src/cpu/sparc/vm/cppInterpreter_sparc.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/sparc/vm/cppInterpreter_sparc.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -766,7 +766,7 @@ // get native function entry point(O0 is a good temp until the very end) ld_ptr(Address(G5_method, 0, in_bytes(methodOopDesc::native_function_offset())), O0); // for static methods insert the mirror argument - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); __ ld_ptr(Address(G5_method, 0, in_bytes(methodOopDesc:: constants_offset())), O1); __ ld_ptr(Address(O1, 0, constantPoolOopDesc::pool_holder_offset_in_bytes()), O1); @@ -1173,7 +1173,7 @@ __ btst(JVM_ACC_SYNCHRONIZED, O1); __ br( Assembler::zero, false, Assembler::pt, done); - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); __ delayed()->btst(JVM_ACC_STATIC, O1); __ ld_ptr(XXX_STATE(_locals), O1); __ br( Assembler::zero, true, Assembler::pt, got_obj); diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/sparc/vm/methodHandles_sparc.cpp --- a/src/cpu/sparc/vm/methodHandles_sparc.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/sparc/vm/methodHandles_sparc.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -1098,7 +1098,7 @@ Address G3_amh_argument ( G3_method_handle, java_lang_invoke_AdapterMethodHandle::argument_offset_in_bytes()); Address G3_amh_conversion(G3_method_handle, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes()); - const int java_mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); + const int java_mirror_offset = in_bytes(Klass::java_mirror_offset()); if (have_entry(ek)) { __ nop(); // empty stubs make SG sick diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/sparc/vm/sparc.ad --- a/src/cpu/sparc/vm/sparc.ad Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/sparc/vm/sparc.ad Thu Dec 29 11:37:50 2011 -0800 @@ -6773,6 +6773,16 @@ ins_pipe(empty); %} +instruct membar_storestore() %{ + match(MemBarStoreStore); + ins_cost(0); + + size(0); + format %{ "!MEMBAR-storestore (empty encoding)" %} + ins_encode( ); + ins_pipe(empty); +%} + //----------Register Move Instructions----------------------------------------- instruct roundDouble_nop(regD dst) %{ match(Set dst (RoundDouble dst)); diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/sparc/vm/stubGenerator_sparc.cpp --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -3046,8 +3046,7 @@ // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 // - int lh_offset = klassOopDesc::header_size() * HeapWordSize + - Klass::layout_helper_offset_in_bytes(); + int lh_offset = in_bytes(Klass::layout_helper_offset()); // Load 32-bits signed value. Use br() instruction with it to check icc. __ lduw(G3_src_klass, lh_offset, G5_lh); @@ -3194,15 +3193,13 @@ G4_dst_klass, G3_src_klass); // Generate the type check. - int sco_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::super_check_offset_offset_in_bytes()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); __ lduw(G4_dst_klass, sco_offset, sco_temp); generate_type_check(G3_src_klass, sco_temp, G4_dst_klass, O5_temp, L_plain_copy); // Fetch destination element klass from the objArrayKlass header. - int ek_offset = (klassOopDesc::header_size() * HeapWordSize + - objArrayKlass::element_klass_offset_in_bytes()); + int ek_offset = in_bytes(objArrayKlass::element_klass_offset()); // the checkcast_copy loop needs two extra arguments: __ ld_ptr(G4_dst_klass, ek_offset, O4); // dest elem klass diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/sparc/vm/templateInterpreter_sparc.cpp --- a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -366,7 +366,7 @@ // get synchronization object to O0 { Label done; - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); __ btst(JVM_ACC_STATIC, O0); __ br( Assembler::zero, true, Assembler::pt, done); __ delayed()->ld_ptr(Llocals, Interpreter::local_offset_in_bytes(0), O0); // get receiver for not-static case @@ -984,7 +984,7 @@ // get native function entry point(O0 is a good temp until the very end) __ delayed()->ld_ptr(Lmethod, in_bytes(methodOopDesc::native_function_offset()), O0); // for static methods insert the mirror argument - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); __ ld_ptr(Lmethod, methodOopDesc:: constants_offset(), O1); __ ld_ptr(O1, constantPoolOopDesc::pool_holder_offset_in_bytes(), O1); diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/sparc/vm/templateTable_sparc.cpp --- a/src/cpu/sparc/vm/templateTable_sparc.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/sparc/vm/templateTable_sparc.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -888,7 +888,7 @@ // do fast instanceof cache test - __ ld_ptr(O4, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes(), O4); + __ ld_ptr(O4, in_bytes(objArrayKlass::element_klass_offset()), O4); assert(Otos_i == O0, "just checking"); @@ -2031,7 +2031,7 @@ __ access_local_ptr(G3_scratch, Otos_i); __ load_klass(Otos_i, O2); __ set(JVM_ACC_HAS_FINALIZER, G3); - __ ld(O2, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc), O2); + __ ld(O2, in_bytes(Klass::access_flags_offset()), O2); __ andcc(G3, O2, G0); Label skip_register_finalizer; __ br(Assembler::zero, false, Assembler::pn, skip_register_finalizer); @@ -3350,13 +3350,13 @@ __ ld_ptr(Rscratch, Roffset, RinstanceKlass); // make sure klass is fully initialized: - __ ld(RinstanceKlass, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc), G3_scratch); + __ ld(RinstanceKlass, in_bytes(instanceKlass::init_state_offset()), G3_scratch); __ cmp(G3_scratch, instanceKlass::fully_initialized); __ br(Assembler::notEqual, false, Assembler::pn, slow_case); - __ delayed()->ld(RinstanceKlass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc), Roffset); + __ delayed()->ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset); // get instance_size in instanceKlass (already aligned) - //__ ld(RinstanceKlass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc), Roffset); + //__ ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset); // make sure klass does not have has_finalizer, or is abstract, or interface or java/lang/Class __ btst(Klass::_lh_instance_slow_path_bit, Roffset); @@ -3483,7 +3483,7 @@ __ bind(initialize_header); if (UseBiasedLocking) { - __ ld_ptr(RinstanceKlass, Klass::prototype_header_offset_in_bytes() + sizeof(oopDesc), G4_scratch); + __ ld_ptr(RinstanceKlass, in_bytes(Klass::prototype_header_offset()), G4_scratch); } else { __ set((intptr_t)markOopDesc::prototype(), G4_scratch); } diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/x86/vm/assembler_x86.cpp --- a/src/cpu/x86/vm/assembler_x86.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/x86/vm/assembler_x86.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -533,6 +533,19 @@ case 0x0F: // movx..., etc. switch (0xFF & *ip++) { + case 0x3A: // pcmpestri + tail_size = 1; + case 0x38: // ptest, pmovzxbw + ip++; // skip opcode + debug_only(has_disp32 = true); // has both kinds of operands! + break; + + case 0x70: // pshufd r, r/a, #8 + debug_only(has_disp32 = true); // has both kinds of operands! + case 0x73: // psrldq r, #8 + tail_size = 1; + break; + case 0x12: // movlps case 0x28: // movaps case 0x2E: // ucomiss @@ -543,9 +556,7 @@ case 0x57: // xorps case 0x6E: // movd case 0x7E: // movd - case 0xAE: // ldmxcsr a - // 64bit side says it these have both operands but that doesn't - // appear to be true + case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush debug_only(has_disp32 = true); break; @@ -565,6 +576,12 @@ // fall out of the switch to decode the address break; + case 0xC4: // pinsrw r, a, #8 + debug_only(has_disp32 = true); + case 0xC5: // pextrw r, r, #8 + tail_size = 1; // the imm8 + break; + case 0xAC: // shrd r, a, #8 debug_only(has_disp32 = true); tail_size = 1; // the imm8 @@ -625,11 +642,44 @@ tail_size = 1; // the imm8 break; - case 0xE8: // call rdisp32 - case 0xE9: // jmp rdisp32 - if (which == end_pc_operand) return ip + 4; - assert(which == call32_operand, "call has no disp32 or imm"); - return ip; + case 0xC4: // VEX_3bytes + case 0xC5: // VEX_2bytes + assert((UseAVX > 0), "shouldn't have VEX prefix"); + assert(ip == inst+1, "no prefixes allowed"); + // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions + // but they have prefix 0x0F and processed when 0x0F processed above. + // + // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES + // instructions (these instructions are not supported in 64-bit mode). + // To distinguish them bits [7:6] are set in the VEX second byte since + // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set + // those VEX bits REX and vvvv bits are inverted. + // + // Fortunately C2 doesn't generate these instructions so we don't need + // to check for them in product version. + + // Check second byte + NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions")); + + // First byte + if ((0xFF & *inst) == VEX_3bytes) { + ip++; // third byte + is_64bit = ((VEX_W & *ip) == VEX_W); + } + ip++; // opcode + // To find the end of instruction (which == end_pc_operand). + switch (0xFF & *ip) { + case 0x61: // pcmpestri r, r/a, #8 + case 0x70: // pshufd r, r/a, #8 + case 0x73: // psrldq r, #8 + tail_size = 1; // the imm8 + break; + default: + break; + } + ip++; // skip opcode + debug_only(has_disp32 = true); // has both kinds of operands! + break; case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl @@ -643,6 +693,12 @@ debug_only(has_disp32 = true); break; + case 0xE8: // call rdisp32 + case 0xE9: // jmp rdisp32 + if (which == end_pc_operand) return ip + 4; + assert(which == call32_operand, "call has no disp32 or imm"); + return ip; + case 0xF0: // Lock assert(os::is_MP(), "only on MP"); goto again_after_prefix; @@ -918,9 +974,7 @@ void Assembler::addsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x58); emit_byte(0xC0 | encode); } @@ -928,18 +982,14 @@ void Assembler::addsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF2); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F2); emit_byte(0x58); emit_operand(dst, src); } void Assembler::addss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x58); emit_byte(0xC0 | encode); } @@ -947,13 +997,19 @@ void Assembler::addss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F3); emit_byte(0x58); emit_operand(dst, src); } +void Assembler::andl(Address dst, int32_t imm32) { + InstructionMark im(this); + prefix(dst); + emit_byte(0x81); + emit_operand(rsp, dst, 4); + emit_long(imm32); +} + void Assembler::andl(Register dst, int32_t imm32) { prefix(dst); emit_arith(0x81, 0xE0, dst, imm32); @@ -974,13 +1030,33 @@ void Assembler::andpd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_66); emit_byte(0x54); emit_operand(dst, src); } +void Assembler::andpd(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); + emit_byte(0x54); + emit_byte(0xC0 | encode); +} + +void Assembler::andps(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_NONE); + emit_byte(0x54); + emit_operand(dst, src); +} + +void Assembler::andps(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE); + emit_byte(0x54); + emit_byte(0xC0 | encode); +} + void Assembler::bsfl(Register dst, Register src) { int encode = prefix_and_encode(dst->encoding(), src->encoding()); emit_byte(0x0F); @@ -1025,19 +1101,7 @@ } void Assembler::call(Register dst) { - // This was originally using a 32bit register encoding - // and surely we want 64bit! - // this is a 32bit encoding but in 64bit mode the default - // operand size is 64bit so there is no need for the - // wide prefix. So prefix only happens if we use the - // new registers. Much like push/pop. - int x = offset(); - // this may be true but dbx disassembles it as if it - // were 32bits... - // int encode = prefix_and_encode(dst->encoding()); - // if (offset() != x) assert(dst->encoding() >= 8, "what?"); - int encode = prefixq_and_encode(dst->encoding()); - + int encode = prefix_and_encode(dst->encoding()); emit_byte(0xFF); emit_byte(0xD0 | encode); } @@ -1157,87 +1221,119 @@ // NOTE: dbx seems to decode this as comiss even though the // 0x66 is there. Strangly ucomisd comes out correct NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); - comiss(dst, src); + InstructionMark im(this); + simd_prefix(dst, src, VEX_SIMD_66); + emit_byte(0x2F); + emit_operand(dst, src); +} + +void Assembler::comisd(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); + emit_byte(0x2F); + emit_byte(0xC0 | encode); } void Assembler::comiss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - - InstructionMark im(this); - prefix(src, dst); - emit_byte(0x0F); + InstructionMark im(this); + simd_prefix(dst, src, VEX_SIMD_NONE); emit_byte(0x2F); emit_operand(dst, src); } +void Assembler::comiss(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); + emit_byte(0x2F); + emit_byte(0xC0 | encode); +} + void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); emit_byte(0xE6); emit_byte(0xC0 | encode); } void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); emit_byte(0x5B); emit_byte(0xC0 | encode); } void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x5A); emit_byte(0xC0 | encode); } +void Assembler::cvtsd2ss(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_F2); + emit_byte(0x5A); + emit_operand(dst, src); +} + void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x2A); emit_byte(0xC0 | encode); } +void Assembler::cvtsi2sdl(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_F2); + emit_byte(0x2A); + emit_operand(dst, src); +} + void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x2A); emit_byte(0xC0 | encode); } +void Assembler::cvtsi2ssl(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_F3); + emit_byte(0x2A); + emit_operand(dst, src); +} + void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x5A); emit_byte(0xC0 | encode); } +void Assembler::cvtss2sd(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_F3); + emit_byte(0x5A); + emit_operand(dst, src); +} + + void Assembler::cvttsd2sil(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); emit_byte(0x2C); emit_byte(0xC0 | encode); } void Assembler::cvttss2sil(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); emit_byte(0x2C); emit_byte(0xC0 | encode); } @@ -1253,18 +1349,14 @@ void Assembler::divsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF2); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F2); emit_byte(0x5E); emit_operand(dst, src); } void Assembler::divsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x5E); emit_byte(0xC0 | encode); } @@ -1272,18 +1364,14 @@ void Assembler::divss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F3); emit_byte(0x5E); emit_operand(dst, src); } void Assembler::divss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x5E); emit_byte(0xC0 | encode); } @@ -1377,8 +1465,14 @@ if (L.is_bound()) { const int short_size = 2; address entry = target(L); - assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)), - "Dispacement too large for a short jmp"); +#ifdef ASSERT + intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size); + intptr_t delta = short_branch_delta(); + if (delta != 0) { + dist += (dist < 0 ? (-delta) :delta); + } + assert(is8bit(dist), "Dispacement too large for a short jmp"); +#endif intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos; // 0111 tttn #8-bit disp emit_byte(0x70 | cc); @@ -1444,9 +1538,15 @@ if (L.is_bound()) { const int short_size = 2; address entry = target(L); - assert(is8bit((entry - _code_pos) + short_size), - "Dispacement too large for a short jmp"); assert(entry != NULL, "jmp most probably wrong"); +#ifdef ASSERT + intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size); + intptr_t delta = short_branch_delta(); + if (delta != 0) { + dist += (dist < 0 ? (-delta) :delta); + } + assert(is8bit(dist), "Dispacement too large for a short jmp"); +#endif intptr_t offs = entry - _code_pos; emit_byte(0xEB); emit_byte((offs - short_size) & 0xFF); @@ -1509,49 +1609,16 @@ void Assembler::movapd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int dstenc = dst->encoding(); - int srcenc = src->encoding(); - emit_byte(0x66); - if (dstenc < 8) { - if (srcenc >= 8) { - prefix(REX_B); - srcenc -= 8; - } - } else { - if (srcenc < 8) { - prefix(REX_R); - } else { - prefix(REX_RB); - srcenc -= 8; - } - dstenc -= 8; - } - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); emit_byte(0x28); - emit_byte(0xC0 | dstenc << 3 | srcenc); + emit_byte(0xC0 | encode); } void Assembler::movaps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int dstenc = dst->encoding(); - int srcenc = src->encoding(); - if (dstenc < 8) { - if (srcenc >= 8) { - prefix(REX_B); - srcenc -= 8; - } - } else { - if (srcenc < 8) { - prefix(REX_R); - } else { - prefix(REX_RB); - srcenc -= 8; - } - dstenc -= 8; - } - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); emit_byte(0x28); - emit_byte(0xC0 | dstenc << 3 | srcenc); + emit_byte(0xC0 | encode); } void Assembler::movb(Register dst, Address src) { @@ -1582,19 +1649,15 @@ void Assembler::movdl(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); emit_byte(0x6E); emit_byte(0xC0 | encode); } void Assembler::movdl(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); // swap src/dst to get correct prefix - int encode = prefix_and_encode(src->encoding(), dst->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66); emit_byte(0x7E); emit_byte(0xC0 | encode); } @@ -1602,58 +1665,29 @@ void Assembler::movdl(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_66); emit_byte(0x6E); emit_operand(dst, src); } - -void Assembler::movdqa(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); - emit_byte(0x6F); - emit_operand(dst, src); -} - void Assembler::movdqa(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); emit_byte(0x6F); emit_byte(0xC0 | encode); } -void Assembler::movdqa(Address dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - emit_byte(0x66); - prefix(dst, src); - emit_byte(0x0F); - emit_byte(0x7F); - emit_operand(src, dst); -} - void Assembler::movdqu(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_F3); emit_byte(0x6F); emit_operand(dst, src); } void Assembler::movdqu(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF3); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); emit_byte(0x6F); emit_byte(0xC0 | encode); } @@ -1661,9 +1695,7 @@ void Assembler::movdqu(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(dst, src); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_F3); emit_byte(0x7F); emit_operand(src, dst); } @@ -1710,9 +1742,7 @@ void Assembler::movlpd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_66); emit_byte(0x12); emit_operand(dst, src); } @@ -1740,9 +1770,7 @@ void Assembler::movq(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_F3); emit_byte(0x7E); emit_operand(dst, src); } @@ -1750,9 +1778,7 @@ void Assembler::movq(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0x66); - prefix(dst, src); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_66); emit_byte(0xD6); emit_operand(src, dst); } @@ -1775,9 +1801,7 @@ void Assembler::movsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x10); emit_byte(0xC0 | encode); } @@ -1785,9 +1809,7 @@ void Assembler::movsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF2); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_F2); emit_byte(0x10); emit_operand(dst, src); } @@ -1795,18 +1817,14 @@ void Assembler::movsd(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF2); - prefix(dst, src); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_F2); emit_byte(0x11); emit_operand(src, dst); } void Assembler::movss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x10); emit_byte(0xC0 | encode); } @@ -1814,9 +1832,7 @@ void Assembler::movss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_F3); emit_byte(0x10); emit_operand(dst, src); } @@ -1824,9 +1840,7 @@ void Assembler::movss(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(dst, src); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_F3); emit_byte(0x11); emit_operand(src, dst); } @@ -1919,18 +1933,14 @@ void Assembler::mulsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF2); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F2); emit_byte(0x59); emit_operand(dst, src); } void Assembler::mulsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x59); emit_byte(0xC0 | encode); } @@ -1938,18 +1948,14 @@ void Assembler::mulss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F3); emit_byte(0x59); emit_operand(dst, src); } void Assembler::mulss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x59); emit_byte(0xC0 | encode); } @@ -2237,14 +2243,26 @@ emit_arith(0x0B, 0xC0, dst, src); } +void Assembler::packuswb(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66); + emit_byte(0x67); + emit_operand(dst, src); +} + +void Assembler::packuswb(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); + emit_byte(0x67); + emit_byte(0xC0 | encode); +} + void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { assert(VM_Version::supports_sse4_2(), ""); - - InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); - emit_byte(0x3A); + InstructionMark im(this); + simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); emit_byte(0x61); emit_operand(dst, src); emit_byte(imm8); @@ -2252,16 +2270,27 @@ void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse4_2(), ""); - - emit_byte(0x66); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0x3A); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); emit_byte(0x61); emit_byte(0xC0 | encode); emit_byte(imm8); } +void Assembler::pmovzxbw(XMMRegister dst, Address src) { + assert(VM_Version::supports_sse4_1(), ""); + InstructionMark im(this); + simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0x30); + emit_operand(dst, src); +} + +void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse4_1(), ""); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0x30); + emit_byte(0xC0 | encode); +} + // generic void Assembler::pop(Register dst) { int encode = prefix_and_encode(dst->encoding()); @@ -2360,22 +2389,24 @@ void Assembler::por(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - - emit_byte(0x66); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); emit_byte(0xEB); emit_byte(0xC0 | encode); } +void Assembler::por(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66); + emit_byte(0xEB); + emit_operand(dst, src); +} + void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); - - emit_byte(0x66); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); emit_byte(0x70); emit_byte(0xC0 | encode); emit_byte(mode & 0xFF); @@ -2385,11 +2416,9 @@ void Assembler::pshufd(XMMRegister dst, Address src, int mode) { assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); - - InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + InstructionMark im(this); + simd_prefix(dst, src, VEX_SIMD_66); emit_byte(0x70); emit_operand(dst, src); emit_byte(mode & 0xFF); @@ -2398,10 +2427,7 @@ void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); - - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); emit_byte(0x70); emit_byte(0xC0 | encode); emit_byte(mode & 0xFF); @@ -2410,11 +2436,9 @@ void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); - - InstructionMark im(this); - emit_byte(0xF2); - prefix(src, dst); // QQ new - emit_byte(0x0F); + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + InstructionMark im(this); + simd_prefix(dst, src, VEX_SIMD_F2); emit_byte(0x70); emit_operand(dst, src); emit_byte(mode & 0xFF); @@ -2425,11 +2449,8 @@ // HMM Table D-1 says sse2 or mmx. // Do not confuse it with psrldq SSE2 instruction which // shifts 128 bit value in xmm register by number of bytes. - NOT_LP64(assert(VM_Version::supports_sse(), "")); - - int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding()); - emit_byte(0x66); - emit_byte(0x0F); + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); emit_byte(0x73); emit_byte(0xC0 | encode); emit_byte(shift); @@ -2438,10 +2459,7 @@ void Assembler::psrldq(XMMRegister dst, int shift) { // Shift 128 bit value in xmm register by number of bytes. NOT_LP64(assert(VM_Version::supports_sse2(), "")); - - int encode = prefixq_and_encode(xmm3->encoding(), dst->encoding()); - emit_byte(0x66); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66); emit_byte(0x73); emit_byte(0xC0 | encode); emit_byte(shift); @@ -2449,36 +2467,52 @@ void Assembler::ptest(XMMRegister dst, Address src) { assert(VM_Version::supports_sse4_1(), ""); - - InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); - emit_byte(0x38); + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + InstructionMark im(this); + simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); emit_byte(0x17); emit_operand(dst, src); } void Assembler::ptest(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_sse4_1(), ""); - - emit_byte(0x66); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0x38); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); emit_byte(0x17); emit_byte(0xC0 | encode); } +void Assembler::punpcklbw(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66); + emit_byte(0x60); + emit_operand(dst, src); +} + void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); emit_byte(0x60); emit_byte(0xC0 | encode); } +void Assembler::punpckldq(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66); + emit_byte(0x62); + emit_operand(dst, src); +} + +void Assembler::punpckldq(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); + emit_byte(0x62); + emit_byte(0xC0 | encode); +} + void Assembler::push(int32_t imm32) { // in 64bits we push 64bits onto the stack but only // take a 32bit immediate @@ -2508,20 +2542,16 @@ void Assembler::pxor(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66); emit_byte(0xEF); emit_operand(dst, src); } void Assembler::pxor(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - emit_byte(0x66); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); emit_byte(0xEF); emit_byte(0xC0 | encode); } @@ -2683,12 +2713,8 @@ } void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { - // HMM Table D-1 says sse2 - // NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x51); emit_byte(0xC0 | encode); } @@ -2696,30 +2722,22 @@ void Assembler::sqrtsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF2); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F2); emit_byte(0x51); emit_operand(dst, src); } void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { - // HMM Table D-1 says sse2 - // NOT_LP64(assert(VM_Version::supports_sse(), "")); - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + NOT_LP64(assert(VM_Version::supports_sse(), "")); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x51); emit_byte(0xC0 | encode); } void Assembler::sqrtss(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + NOT_LP64(assert(VM_Version::supports_sse(), "")); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_F3); emit_byte(0x51); emit_operand(dst, src); } @@ -2765,9 +2783,7 @@ void Assembler::subsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x5C); emit_byte(0xC0 | encode); } @@ -2775,18 +2791,14 @@ void Assembler::subsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF2); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F2); emit_byte(0x5C); emit_operand(dst, src); } void Assembler::subss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x5C); emit_byte(0xC0 | encode); } @@ -2794,9 +2806,7 @@ void Assembler::subss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F3); emit_byte(0x5C); emit_operand(dst, src); } @@ -2836,30 +2846,30 @@ void Assembler::ucomisd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); - ucomiss(dst, src); + InstructionMark im(this); + simd_prefix(dst, src, VEX_SIMD_66); + emit_byte(0x2E); + emit_operand(dst, src); } void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); - ucomiss(dst, src); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); + emit_byte(0x2E); + emit_byte(0xC0 | encode); } void Assembler::ucomiss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - - InstructionMark im(this); - prefix(src, dst); - emit_byte(0x0F); + InstructionMark im(this); + simd_prefix(dst, src, VEX_SIMD_NONE); emit_byte(0x2E); emit_operand(dst, src); } void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); emit_byte(0x2E); emit_byte(0xC0 | encode); } @@ -2905,16 +2915,15 @@ void Assembler::xorpd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); - xorps(dst, src); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); + emit_byte(0x57); + emit_byte(0xC0 | encode); } void Assembler::xorpd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_66); emit_byte(0x57); emit_operand(dst, src); } @@ -2922,8 +2931,7 @@ void Assembler::xorps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE); emit_byte(0x57); emit_byte(0xC0 | encode); } @@ -2931,12 +2939,166 @@ void Assembler::xorps(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_NONE); emit_byte(0x57); emit_operand(dst, src); } +// AVX 3-operands non destructive source instructions (encoded with VEX prefix) + +void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_F2); + emit_byte(0x58); + emit_operand(dst, src); +} + +void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); + emit_byte(0x58); + emit_byte(0xC0 | encode); +} + +void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_F3); + emit_byte(0x58); + emit_operand(dst, src); +} + +void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); + emit_byte(0x58); + emit_byte(0xC0 | encode); +} + +void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector + emit_byte(0x54); + emit_operand(dst, src); +} + +void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector + emit_byte(0x54); + emit_operand(dst, src); +} + +void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_F2); + emit_byte(0x5E); + emit_operand(dst, src); +} + +void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); + emit_byte(0x5E); + emit_byte(0xC0 | encode); +} + +void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_F3); + emit_byte(0x5E); + emit_operand(dst, src); +} + +void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); + emit_byte(0x5E); + emit_byte(0xC0 | encode); +} + +void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_F2); + emit_byte(0x59); + emit_operand(dst, src); +} + +void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); + emit_byte(0x59); + emit_byte(0xC0 | encode); +} + +void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) { + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_F3); + emit_byte(0x59); + emit_operand(dst, src); +} + +void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); + emit_byte(0x59); + emit_byte(0xC0 | encode); +} + + +void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_F2); + emit_byte(0x5C); + emit_operand(dst, src); +} + +void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); + emit_byte(0x5C); + emit_byte(0xC0 | encode); +} + +void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_F3); + emit_byte(0x5C); + emit_operand(dst, src); +} + +void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); + emit_byte(0x5C); + emit_byte(0xC0 | encode); +} + +void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector + emit_byte(0x57); + emit_operand(dst, src); +} + +void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector + emit_byte(0x57); + emit_operand(dst, src); +} + + #ifndef _LP64 // 32bit only pieces of the assembler @@ -3394,12 +3556,114 @@ emit_byte(0xF1); } +// SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding. +static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 }; +// SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding. +static int simd_opc[4] = { 0, 0, 0x38, 0x3A }; + +// Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding. +void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { + if (pre > 0) { + emit_byte(simd_pre[pre]); + } + if (rex_w) { + prefixq(adr, xreg); + } else { + prefix(adr, xreg); + } + if (opc > 0) { + emit_byte(0x0F); + int opc2 = simd_opc[opc]; + if (opc2 > 0) { + emit_byte(opc2); + } + } +} + +int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { + if (pre > 0) { + emit_byte(simd_pre[pre]); + } + int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : + prefix_and_encode(dst_enc, src_enc); + if (opc > 0) { + emit_byte(0x0F); + int opc2 = simd_opc[opc]; + if (opc2 > 0) { + emit_byte(opc2); + } + } + return encode; +} + + +void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) { + if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) { + prefix(VEX_3bytes); + + int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0); + byte1 = (~byte1) & 0xE0; + byte1 |= opc; + a_byte(byte1); + + int byte2 = ((~nds_enc) & 0xf) << 3; + byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre; + emit_byte(byte2); + } else { + prefix(VEX_2bytes); + + int byte1 = vex_r ? VEX_R : 0; + byte1 = (~byte1) & 0x80; + byte1 |= ((~nds_enc) & 0xf) << 3; + byte1 |= (vector256 ? 4 : 0) | pre; + emit_byte(byte1); + } +} + +void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){ + bool vex_r = (xreg_enc >= 8); + bool vex_b = adr.base_needs_rex(); + bool vex_x = adr.index_needs_rex(); + vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); +} + +int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) { + bool vex_r = (dst_enc >= 8); + bool vex_b = (src_enc >= 8); + bool vex_x = false; + vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); + return (((dst_enc & 7) << 3) | (src_enc & 7)); +} + + +void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { + if (UseAVX > 0) { + int xreg_enc = xreg->encoding(); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256); + } else { + assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding"); + rex_prefix(adr, xreg, pre, opc, rex_w); + } +} + +int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { + int dst_enc = dst->encoding(); + int src_enc = src->encoding(); + if (UseAVX > 0) { + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256); + } else { + assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding"); + return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w); + } +} #ifndef _LP64 void Assembler::incl(Register dst) { // Don't use it directly. Use MacroAssembler::incrementl() instead. - emit_byte(0x40 | dst->encoding()); + emit_byte(0x40 | dst->encoding()); } void Assembler::lea(Register dst, Address src) { @@ -3756,6 +4020,38 @@ } } +void Assembler::prefixq(Address adr, XMMRegister src) { + if (src->encoding() < 8) { + if (adr.base_needs_rex()) { + if (adr.index_needs_rex()) { + prefix(REX_WXB); + } else { + prefix(REX_WB); + } + } else { + if (adr.index_needs_rex()) { + prefix(REX_WX); + } else { + prefix(REX_W); + } + } + } else { + if (adr.base_needs_rex()) { + if (adr.index_needs_rex()) { + prefix(REX_WRXB); + } else { + prefix(REX_WRB); + } + } else { + if (adr.index_needs_rex()) { + prefix(REX_WRX); + } else { + prefix(REX_WR); + } + } + } +} + void Assembler::adcq(Register dst, int32_t imm32) { (void) prefixq_and_encode(dst->encoding()); emit_arith(0x81, 0xD0, dst, imm32); @@ -3918,36 +4214,44 @@ void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2); emit_byte(0x2A); emit_byte(0xC0 | encode); } +void Assembler::cvtsi2sdq(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionMark im(this); + simd_prefix_q(dst, dst, src, VEX_SIMD_F2); + emit_byte(0x2A); + emit_operand(dst, src); +} + void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3); emit_byte(0x2A); emit_byte(0xC0 | encode); } +void Assembler::cvtsi2ssq(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + InstructionMark im(this); + simd_prefix_q(dst, dst, src, VEX_SIMD_F3); + emit_byte(0x2A); + emit_operand(dst, src); +} + void Assembler::cvttsd2siq(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2); emit_byte(0x2C); emit_byte(0xC0 | encode); } void Assembler::cvttss2siq(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3); emit_byte(0x2C); emit_byte(0xC0 | encode); } @@ -4107,21 +4411,17 @@ void Assembler::movdq(XMMRegister dst, Register src) { // table D-1 says MMX/SSE2 - NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); - emit_byte(0x66); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66); emit_byte(0x6E); emit_byte(0xC0 | encode); } void Assembler::movdq(Register dst, XMMRegister src) { // table D-1 says MMX/SSE2 - NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); - emit_byte(0x66); + NOT_LP64(assert(VM_Version::supports_sse2(), "")); // swap src/dst to get correct prefix - int encode = prefixq_and_encode(src->encoding(), dst->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66); emit_byte(0x7E); emit_byte(0xC0 | encode); } @@ -4632,7 +4932,7 @@ null_check_offset = offset(); } movl(tmp_reg, klass_addr); - xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset())); andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); if (need_tmp_reg) { pop(tmp_reg); @@ -4719,7 +5019,7 @@ } get_thread(tmp_reg); movl(swap_reg, klass_addr); - orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset())); movl(swap_reg, saved_mark_addr); if (os::is_MP()) { lock(); @@ -4757,7 +5057,7 @@ push(tmp_reg); } movl(tmp_reg, klass_addr); - movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); if (os::is_MP()) { lock(); } @@ -5680,6 +5980,24 @@ LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); } +void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::addsd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::addsd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + addss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + addss(dst, Address(rscratch1, 0)); + } +} + void MacroAssembler::align(int modulus) { if (offset() % modulus != 0) { nop(modulus - (offset() % modulus)); @@ -5687,11 +6005,24 @@ } void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { + // Used in sign-masking with aligned address. + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); if (reachable(src)) { - andpd(dst, as_Address(src)); + Assembler::andpd(dst, as_Address(src)); } else { lea(rscratch1, src); - andpd(dst, Address(rscratch1, 0)); + Assembler::andpd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) { + // Used in sign-masking with aligned address. + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); + if (reachable(src)) { + Assembler::andps(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::andps(dst, Address(rscratch1, 0)); } } @@ -6270,19 +6601,19 @@ void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { if (reachable(src)) { - comisd(dst, as_Address(src)); + Assembler::comisd(dst, as_Address(src)); } else { lea(rscratch1, src); - comisd(dst, Address(rscratch1, 0)); + Assembler::comisd(dst, Address(rscratch1, 0)); } } void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { if (reachable(src)) { - comiss(dst, as_Address(src)); + Assembler::comiss(dst, as_Address(src)); } else { lea(rscratch1, src); - comiss(dst, Address(rscratch1, 0)); + Assembler::comiss(dst, Address(rscratch1, 0)); } } @@ -6366,6 +6697,24 @@ sarl(reg, shift_value); } +void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::divsd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::divsd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::divss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::divss(dst, Address(rscratch1, 0)); + } +} + // !defined(COMPILER2) is because of stupid core builds #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) void MacroAssembler::empty_FPU_stack() { @@ -6805,12 +7154,39 @@ LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); } +void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::movsd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::movsd(dst, Address(rscratch1, 0)); + } +} + void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { if (reachable(src)) { - movss(dst, as_Address(src)); + Assembler::movss(dst, as_Address(src)); } else { lea(rscratch1, src); - movss(dst, Address(rscratch1, 0)); + Assembler::movss(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::mulsd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::mulsd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::mulss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::mulss(dst, Address(rscratch1, 0)); } } @@ -6992,6 +7368,193 @@ testl(dst, as_Address(src)); } +void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::sqrtsd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::sqrtsd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::sqrtss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::sqrtss(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::subsd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::subsd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::subss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::subss(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::ucomisd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::ucomisd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::ucomiss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::ucomiss(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { + // Used in sign-bit flipping with aligned address. + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); + if (reachable(src)) { + Assembler::xorpd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::xorpd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { + // Used in sign-bit flipping with aligned address. + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); + if (reachable(src)) { + Assembler::xorps(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::xorps(dst, Address(rscratch1, 0)); + } +} + +// AVX 3-operands instructions + +void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vaddsd(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vaddsd(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vaddss(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vaddss(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vandpd(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vandpd(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vandps(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vandps(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vdivsd(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vdivsd(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vdivss(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vdivss(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vmulsd(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vmulsd(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vmulss(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vmulss(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vsubsd(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vsubsd(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vsubss(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vsubss(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vxorpd(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vxorpd(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vxorps(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vxorps(dst, nds, Address(rscratch1, 0)); + } +} + + ////////////////////////////////////////////////////////////////////////////////// #ifndef SERIALGC @@ -7685,10 +8248,8 @@ if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } assert(label_nulls <= 1, "at most one NULL in the batch"); - int sc_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::secondary_super_cache_offset_in_bytes()); - int sco_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::super_check_offset_offset_in_bytes()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); Address super_check_offset_addr(super_klass, sco_offset); // Hacked jcc, which "knows" that L_fallthrough, at least, is in @@ -7786,10 +8347,8 @@ assert(label_nulls <= 1, "at most one NULL in the batch"); // a couple of useful fields in sub_klass: - int ss_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::secondary_supers_offset_in_bytes()); - int sc_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::secondary_super_cache_offset_in_bytes()); + int ss_offset = in_bytes(Klass::secondary_supers_offset()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); Address secondary_supers_addr(sub_klass, ss_offset); Address super_cache_addr( sub_klass, sc_offset); @@ -7876,32 +8435,6 @@ } -void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { - ucomisd(dst, as_Address(src)); -} - -void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { - ucomiss(dst, as_Address(src)); -} - -void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - xorpd(dst, as_Address(src)); - } else { - lea(rscratch1, src); - xorpd(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - xorps(dst, as_Address(src)); - } else { - lea(rscratch1, src); - xorps(dst, Address(rscratch1, 0)); - } -} - void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { if (VM_Version::supports_cmov()) { cmovl(cc, dst, src); @@ -8487,20 +9020,20 @@ if (Universe::narrow_oop_shift() != 0) { assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); if (LogMinObjAlignmentInBytes == Address::times_8) { - movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset())); } else { // OK to use shift since we don't need to preserve flags. shlq(dst, LogMinObjAlignmentInBytes); - movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset())); } } else { - movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + movq(dst, Address(dst, Klass::prototype_header_offset())); } } else #endif { movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); - movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + movptr(dst, Address(dst, Klass::prototype_header_offset())); } } @@ -8761,6 +9294,7 @@ Register cnt1, Register cnt2, int int_cnt2, Register result, XMMRegister vec, Register tmp) { + ShortBranchVerifier sbv(this); assert(UseSSE42Intrinsics, "SSE4.2 is required"); // This method uses pcmpestri inxtruction with bound registers @@ -8890,9 +9424,9 @@ pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); } // Need to reload strings pointers if not matched whole vector - jccb(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 + jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 addptr(cnt2, 8); - jccb(Assembler::negative, SCAN_SUBSTR); + jcc(Assembler::negative, SCAN_SUBSTR); // Fall through if found full substring } // (int_cnt2 > 8) @@ -8911,6 +9445,7 @@ Register cnt1, Register cnt2, int int_cnt2, Register result, XMMRegister vec, Register tmp) { + ShortBranchVerifier sbv(this); assert(UseSSE42Intrinsics, "SSE4.2 is required"); // // int_cnt2 is length of small (< 8 chars) constant substring @@ -9172,6 +9707,7 @@ void MacroAssembler::string_compare(Register str1, Register str2, Register cnt1, Register cnt2, Register result, XMMRegister vec1) { + ShortBranchVerifier sbv(this); Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; // Compute the minimum of the string lengths and the @@ -9308,6 +9844,7 @@ void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, Register limit, Register result, Register chr, XMMRegister vec1, XMMRegister vec2) { + ShortBranchVerifier sbv(this); Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; int length_offset = arrayOopDesc::length_offset_in_bytes(); @@ -9427,6 +9964,7 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned, Register to, Register value, Register count, Register rtmp, XMMRegister xtmp) { + ShortBranchVerifier sbv(this); assert_different_registers(to, value, count, rtmp); Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; Label L_fill_2_bytes, L_fill_4_bytes; diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/x86/vm/assembler_x86.hpp --- a/src/cpu/x86/vm/assembler_x86.hpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/x86/vm/assembler_x86.hpp Thu Dec 29 11:37:50 2011 -0800 @@ -503,7 +503,31 @@ REX_WR = 0x4C, REX_WRB = 0x4D, REX_WRX = 0x4E, - REX_WRXB = 0x4F + REX_WRXB = 0x4F, + + VEX_3bytes = 0xC4, + VEX_2bytes = 0xC5 + }; + + enum VexPrefix { + VEX_B = 0x20, + VEX_X = 0x40, + VEX_R = 0x80, + VEX_W = 0x80 + }; + + enum VexSimdPrefix { + VEX_SIMD_NONE = 0x0, + VEX_SIMD_66 = 0x1, + VEX_SIMD_F3 = 0x2, + VEX_SIMD_F2 = 0x3 + }; + + enum VexOpcode { + VEX_OPCODE_NONE = 0x0, + VEX_OPCODE_0F = 0x1, + VEX_OPCODE_0F_38 = 0x2, + VEX_OPCODE_0F_3A = 0x3 }; enum WhichOperand { @@ -546,12 +570,99 @@ void prefixq(Address adr); void prefix(Address adr, Register reg, bool byteinst = false); + void prefix(Address adr, XMMRegister reg); void prefixq(Address adr, Register reg); - - void prefix(Address adr, XMMRegister reg); + void prefixq(Address adr, XMMRegister reg); void prefetch_prefix(Address src); + void rex_prefix(Address adr, XMMRegister xreg, + VexSimdPrefix pre, VexOpcode opc, bool rex_w); + int rex_prefix_and_encode(int dst_enc, int src_enc, + VexSimdPrefix pre, VexOpcode opc, bool rex_w); + + void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, + int nds_enc, VexSimdPrefix pre, VexOpcode opc, + bool vector256); + + void vex_prefix(Address adr, int nds_enc, int xreg_enc, + VexSimdPrefix pre, VexOpcode opc, + bool vex_w, bool vector256); + + void vex_prefix(XMMRegister dst, XMMRegister nds, Address src, + VexSimdPrefix pre, bool vector256 = false) { + vex_prefix(src, nds->encoding(), dst->encoding(), + pre, VEX_OPCODE_0F, false, vector256); + } + + int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, + VexSimdPrefix pre, VexOpcode opc, + bool vex_w, bool vector256); + + int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, + VexSimdPrefix pre, bool vector256 = false) { + return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), + pre, VEX_OPCODE_0F, false, vector256); + } + + void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, + VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F, + bool rex_w = false, bool vector256 = false); + + void simd_prefix(XMMRegister dst, Address src, + VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { + simd_prefix(dst, xnoreg, src, pre, opc); + } + void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) { + simd_prefix(src, dst, pre); + } + void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src, + VexSimdPrefix pre) { + bool rex_w = true; + simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w); + } + + + int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, + VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F, + bool rex_w = false, bool vector256 = false); + + int simd_prefix_and_encode(XMMRegister dst, XMMRegister src, + VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { + return simd_prefix_and_encode(dst, xnoreg, src, pre, opc); + } + + // Move/convert 32-bit integer value. + int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src, + VexSimdPrefix pre) { + // It is OK to cast from Register to XMMRegister to pass argument here + // since only encoding is used in simd_prefix_and_encode() and number of + // Gen and Xmm registers are the same. + return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre); + } + int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) { + return simd_prefix_and_encode(dst, xnoreg, src, pre); + } + int simd_prefix_and_encode(Register dst, XMMRegister src, + VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { + return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc); + } + + // Move/convert 64-bit integer value. + int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src, + VexSimdPrefix pre) { + bool rex_w = true; + return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, VEX_OPCODE_0F, rex_w); + } + int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) { + return simd_prefix_and_encode_q(dst, xnoreg, src, pre); + } + int simd_prefix_and_encode_q(Register dst, XMMRegister src, + VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { + bool rex_w = true; + return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w); + } + // Helper functions for groups of instructions void emit_arith_b(int op1, int op2, Register dst, int imm8); @@ -764,6 +875,7 @@ void addss(XMMRegister dst, Address src); void addss(XMMRegister dst, XMMRegister src); + void andl(Address dst, int32_t imm32); void andl(Register dst, int32_t imm32); void andl(Register dst, Address src); void andl(Register dst, Register src); @@ -774,9 +886,11 @@ void andq(Register dst, Register src); // Bitwise Logical AND of Packed Double-Precision Floating-Point Values - void andpd(XMMRegister dst, Address src); void andpd(XMMRegister dst, XMMRegister src); + // Bitwise Logical AND of Packed Single-Precision Floating-Point Values + void andps(XMMRegister dst, XMMRegister src); + void bsfl(Register dst, Register src); void bsrl(Register dst, Register src); @@ -837,9 +951,11 @@ // Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS void comisd(XMMRegister dst, Address src); + void comisd(XMMRegister dst, XMMRegister src); // Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS void comiss(XMMRegister dst, Address src); + void comiss(XMMRegister dst, XMMRegister src); // Identify processor type and features void cpuid() { @@ -849,14 +965,19 @@ // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value void cvtsd2ss(XMMRegister dst, XMMRegister src); + void cvtsd2ss(XMMRegister dst, Address src); // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value void cvtsi2sdl(XMMRegister dst, Register src); + void cvtsi2sdl(XMMRegister dst, Address src); void cvtsi2sdq(XMMRegister dst, Register src); + void cvtsi2sdq(XMMRegister dst, Address src); // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value void cvtsi2ssl(XMMRegister dst, Register src); + void cvtsi2ssl(XMMRegister dst, Address src); void cvtsi2ssq(XMMRegister dst, Register src); + void cvtsi2ssq(XMMRegister dst, Address src); // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value void cvtdq2pd(XMMRegister dst, XMMRegister src); @@ -866,6 +987,7 @@ // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value void cvtss2sd(XMMRegister dst, XMMRegister src); + void cvtss2sd(XMMRegister dst, Address src); // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer void cvttsd2sil(Register dst, Address src); @@ -1140,8 +1262,6 @@ void movdq(Register dst, XMMRegister src); // Move Aligned Double Quadword - void movdqa(Address dst, XMMRegister src); - void movdqa(XMMRegister dst, Address src); void movdqa(XMMRegister dst, XMMRegister src); // Move Unaligned Double Quadword @@ -1261,10 +1381,18 @@ void orq(Register dst, Address src); void orq(Register dst, Register src); + // Pack with unsigned saturation + void packuswb(XMMRegister dst, XMMRegister src); + void packuswb(XMMRegister dst, Address src); + // SSE4.2 string instructions void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8); void pcmpestri(XMMRegister xmm1, Address src, int imm8); + // SSE4.1 packed move + void pmovzxbw(XMMRegister dst, XMMRegister src); + void pmovzxbw(XMMRegister dst, Address src); + #ifndef _LP64 // no 32bit push/pop on amd64 void popl(Address dst); #endif @@ -1292,6 +1420,7 @@ // POR - Bitwise logical OR void por(XMMRegister dst, XMMRegister src); + void por(XMMRegister dst, Address src); // Shuffle Packed Doublewords void pshufd(XMMRegister dst, XMMRegister src, int mode); @@ -1313,6 +1442,11 @@ // Interleave Low Bytes void punpcklbw(XMMRegister dst, XMMRegister src); + void punpcklbw(XMMRegister dst, Address src); + + // Interleave Low Doublewords + void punpckldq(XMMRegister dst, XMMRegister src); + void punpckldq(XMMRegister dst, Address src); #ifndef _LP64 // no 32bit push/pop on amd64 void pushl(Address src); @@ -1429,6 +1563,13 @@ void xchgq(Register reg, Address adr); void xchgq(Register dst, Register src); + // Get Value of Extended Control Register + void xgetbv() { + emit_byte(0x0F); + emit_byte(0x01); + emit_byte(0xD0); + } + void xorl(Register dst, int32_t imm32); void xorl(Register dst, Address src); void xorl(Register dst, Register src); @@ -1437,14 +1578,44 @@ void xorq(Register dst, Register src); // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values - void xorpd(XMMRegister dst, Address src); void xorpd(XMMRegister dst, XMMRegister src); // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values - void xorps(XMMRegister dst, Address src); void xorps(XMMRegister dst, XMMRegister src); void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0 + + // AVX 3-operands instructions (encoded with VEX prefix) + void vaddsd(XMMRegister dst, XMMRegister nds, Address src); + void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vaddss(XMMRegister dst, XMMRegister nds, Address src); + void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vandpd(XMMRegister dst, XMMRegister nds, Address src); + void vandps(XMMRegister dst, XMMRegister nds, Address src); + void vdivsd(XMMRegister dst, XMMRegister nds, Address src); + void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vdivss(XMMRegister dst, XMMRegister nds, Address src); + void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vmulsd(XMMRegister dst, XMMRegister nds, Address src); + void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vmulss(XMMRegister dst, XMMRegister nds, Address src); + void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vsubsd(XMMRegister dst, XMMRegister nds, Address src); + void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vsubss(XMMRegister dst, XMMRegister nds, Address src); + void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vxorpd(XMMRegister dst, XMMRegister nds, Address src); + void vxorps(XMMRegister dst, XMMRegister nds, Address src); + + + protected: + // Next instructions require address alignment 16 bytes SSE mode. + // They should be called only from corresponding MacroAssembler instructions. + void andpd(XMMRegister dst, Address src); + void andps(XMMRegister dst, Address src); + void xorpd(XMMRegister dst, Address src); + void xorps(XMMRegister dst, Address src); + }; @@ -2175,9 +2346,15 @@ void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); } void andpd(XMMRegister dst, AddressLiteral src); + void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); } + void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); } + void andps(XMMRegister dst, AddressLiteral src); + + void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); } void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); } void comiss(XMMRegister dst, AddressLiteral src); + void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); } void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); } void comisd(XMMRegister dst, AddressLiteral src); @@ -2211,62 +2388,62 @@ void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); } void movss(XMMRegister dst, AddressLiteral src); - void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); } + void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); } void movlpd(XMMRegister dst, AddressLiteral src); public: void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); } void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); } - void addsd(XMMRegister dst, AddressLiteral src) { Assembler::addsd(dst, as_Address(src)); } + void addsd(XMMRegister dst, AddressLiteral src); void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); } void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); } - void addss(XMMRegister dst, AddressLiteral src) { Assembler::addss(dst, as_Address(src)); } + void addss(XMMRegister dst, AddressLiteral src); void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); } void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); } - void divsd(XMMRegister dst, AddressLiteral src) { Assembler::divsd(dst, as_Address(src)); } + void divsd(XMMRegister dst, AddressLiteral src); void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); } void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); } - void divss(XMMRegister dst, AddressLiteral src) { Assembler::divss(dst, as_Address(src)); } + void divss(XMMRegister dst, AddressLiteral src); void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); } void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); } void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); } - void movsd(XMMRegister dst, AddressLiteral src) { Assembler::movsd(dst, as_Address(src)); } + void movsd(XMMRegister dst, AddressLiteral src); void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); } void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); } - void mulsd(XMMRegister dst, AddressLiteral src) { Assembler::mulsd(dst, as_Address(src)); } + void mulsd(XMMRegister dst, AddressLiteral src); void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); } void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); } - void mulss(XMMRegister dst, AddressLiteral src) { Assembler::mulss(dst, as_Address(src)); } + void mulss(XMMRegister dst, AddressLiteral src); void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); } void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); } - void sqrtsd(XMMRegister dst, AddressLiteral src) { Assembler::sqrtsd(dst, as_Address(src)); } + void sqrtsd(XMMRegister dst, AddressLiteral src); void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); } void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); } - void sqrtss(XMMRegister dst, AddressLiteral src) { Assembler::sqrtss(dst, as_Address(src)); } + void sqrtss(XMMRegister dst, AddressLiteral src); void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); } void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); } - void subsd(XMMRegister dst, AddressLiteral src) { Assembler::subsd(dst, as_Address(src)); } + void subsd(XMMRegister dst, AddressLiteral src); void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); } void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); } - void subss(XMMRegister dst, AddressLiteral src) { Assembler::subss(dst, as_Address(src)); } + void subss(XMMRegister dst, AddressLiteral src); void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); } - void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); } + void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); } void ucomiss(XMMRegister dst, AddressLiteral src); void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); } - void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); } + void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); } void ucomisd(XMMRegister dst, AddressLiteral src); // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values @@ -2279,6 +2456,53 @@ void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } void xorps(XMMRegister dst, AddressLiteral src); + // AVX 3-operands instructions + + void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); } + void vaddsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddsd(dst, nds, src); } + void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); } + void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); } + void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vandpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vandpd(dst, nds, src); } + void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vandps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vandps(dst, nds, src); } + void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); } + void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); } + void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); } + void vdivss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivss(dst, nds, src); } + void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); } + void vmulsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulsd(dst, nds, src); } + void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); } + void vmulss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulss(dst, nds, src); } + void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); } + void vsubsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubsd(dst, nds, src); } + void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); } + void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); } + void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vxorpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorpd(dst, nds, src); } + void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); } + void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + // Data void cmov32( Condition cc, Register dst, Address src); diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/x86/vm/assembler_x86.inline.hpp --- a/src/cpu/x86/vm/assembler_x86.inline.hpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/x86/vm/assembler_x86.inline.hpp Thu Dec 29 11:37:50 2011 -0800 @@ -86,6 +86,7 @@ inline void Assembler::prefixq(Address adr, Register reg) {} inline void Assembler::prefix(Address adr, XMMRegister reg) {} +inline void Assembler::prefixq(Address adr, XMMRegister reg) {} #else inline void Assembler::emit_long64(jlong x) { *(jlong*) _code_pos = x; diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/x86/vm/c1_CodeStubs_x86.cpp --- a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -320,7 +320,7 @@ // begin_initialized_entry_offset has to fit in a byte. Also, we know it's not null. __ load_heap_oop_not_null(tmp2, Address(_obj, java_lang_Class::klass_offset_in_bytes())); __ get_thread(tmp); - __ cmpptr(tmp, Address(tmp2, instanceKlass::init_thread_offset_in_bytes() + sizeof(klassOopDesc))); + __ cmpptr(tmp, Address(tmp2, instanceKlass::init_thread_offset())); __ pop(tmp2); __ pop(tmp); __ jcc(Assembler::notEqual, call_patch); @@ -519,7 +519,7 @@ __ load_klass(tmp_reg, src_reg); - Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset_in_bytes() + sizeof(oopDesc)); + Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset()); __ cmpl(ref_type_adr, REF_NONE); __ jcc(Assembler::equal, _continuation); diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/x86/vm/c1_LIRAssembler_x86.cpp --- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -1558,7 +1558,7 @@ void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { if (op->init_check()) { __ cmpl(Address(op->klass()->as_register(), - instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)), + instanceKlass::init_state_offset()), instanceKlass::fully_initialized); add_debug_info_for_null_check_here(op->stub()->info()); __ jcc(Assembler::notEqual, *op->stub()->entry()); @@ -1730,7 +1730,7 @@ #else __ cmpoop(Address(klass_RInfo, k->super_check_offset()), k->constant_encoding()); #endif // _LP64 - if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) { + if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) { __ jcc(Assembler::notEqual, *failure_target); // successful cast, fall through to profile or jump } else { @@ -1842,7 +1842,7 @@ __ load_klass(klass_RInfo, value); // get instance klass (it's already uncompressed) - __ movptr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc))); + __ movptr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset())); // perform the fast part of the checking logic __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); // call out-of-line instance of __ check_klass_subtype_slow_path(...): @@ -3289,8 +3289,7 @@ } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) { __ load_klass(tmp, dst); } - int lh_offset = klassOopDesc::header_size() * HeapWordSize + - Klass::layout_helper_offset_in_bytes(); + int lh_offset = in_bytes(Klass::layout_helper_offset()); Address klass_lh_addr(tmp, lh_offset); jint objArray_lh = Klass::array_layout_helper(T_OBJECT); __ cmpl(klass_lh_addr, objArray_lh); @@ -3307,9 +3306,9 @@ #ifndef _LP64 __ movptr(tmp, dst_klass_addr); - __ movptr(tmp, Address(tmp, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc))); + __ movptr(tmp, Address(tmp, objArrayKlass::element_klass_offset())); __ push(tmp); - __ movl(tmp, Address(tmp, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc))); + __ movl(tmp, Address(tmp, Klass::super_check_offset_offset())); __ push(tmp); __ push(length); __ lea(tmp, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); @@ -3333,15 +3332,15 @@ // Allocate abi space for args but be sure to keep stack aligned __ subptr(rsp, 6*wordSize); __ load_klass(c_rarg3, dst); - __ movptr(c_rarg3, Address(c_rarg3, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc))); + __ movptr(c_rarg3, Address(c_rarg3, objArrayKlass::element_klass_offset())); store_parameter(c_rarg3, 4); - __ movl(c_rarg3, Address(c_rarg3, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc))); + __ movl(c_rarg3, Address(c_rarg3, Klass::super_check_offset_offset())); __ call(RuntimeAddress(copyfunc_addr)); __ addptr(rsp, 6*wordSize); #else __ load_klass(c_rarg4, dst); - __ movptr(c_rarg4, Address(c_rarg4, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc))); - __ movl(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc))); + __ movptr(c_rarg4, Address(c_rarg4, objArrayKlass::element_klass_offset())); + __ movl(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset())); __ call(RuntimeAddress(copyfunc_addr)); #endif diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/x86/vm/c1_MacroAssembler_x86.cpp --- a/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -150,7 +150,7 @@ assert_different_registers(obj, klass, len); if (UseBiasedLocking && !len->is_valid()) { assert_different_registers(obj, klass, len, t1, t2); - movptr(t1, Address(klass, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + movptr(t1, Address(klass, Klass::prototype_header_offset())); movptr(Address(obj, oopDesc::mark_offset_in_bytes()), t1); } else { // This assumes that all prototype bits fit in an int32_t diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/x86/vm/c1_Runtime1_x86.cpp --- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -1011,7 +1011,7 @@ if (id == fast_new_instance_init_check_id) { // make sure the klass is initialized - __ cmpl(Address(klass, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)), instanceKlass::fully_initialized); + __ cmpl(Address(klass, instanceKlass::init_state_offset()), instanceKlass::fully_initialized); __ jcc(Assembler::notEqual, slow_path); } @@ -1019,7 +1019,7 @@ // assert object can be fast path allocated { Label ok, not_ok; - __ movl(obj_size, Address(klass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc))); + __ movl(obj_size, Address(klass, Klass::layout_helper_offset())); __ cmpl(obj_size, 0); // make sure it's an instance (LH > 0) __ jcc(Assembler::lessEqual, not_ok); __ testl(obj_size, Klass::_lh_instance_slow_path_bit); @@ -1040,7 +1040,7 @@ __ bind(retry_tlab); // get the instance size (size is postive so movl is fine for 64bit) - __ movl(obj_size, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes())); + __ movl(obj_size, Address(klass, Klass::layout_helper_offset())); __ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path); @@ -1052,7 +1052,7 @@ __ bind(try_eden); // get the instance size (size is postive so movl is fine for 64bit) - __ movl(obj_size, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes())); + __ movl(obj_size, Address(klass, Klass::layout_helper_offset())); __ eden_allocate(obj, obj_size, 0, t1, slow_path); __ incr_allocated_bytes(thread, obj_size, 0); @@ -1119,7 +1119,7 @@ { Label ok; Register t0 = obj; - __ movl(t0, Address(klass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc))); + __ movl(t0, Address(klass, Klass::layout_helper_offset())); __ sarl(t0, Klass::_lh_array_tag_shift); int tag = ((id == new_type_array_id) ? Klass::_lh_array_tag_type_value @@ -1153,7 +1153,7 @@ // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F)) // since size is positive movl does right thing on 64bit - __ movl(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes())); + __ movl(t1, Address(klass, Klass::layout_helper_offset())); // since size is postive movl does right thing on 64bit __ movl(arr_size, length); assert(t1 == rcx, "fixed register usage"); @@ -1167,7 +1167,7 @@ __ tlab_allocate(obj, arr_size, 0, t1, t2, slow_path); // preserves arr_size __ initialize_header(obj, klass, length, t1, t2); - __ movb(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes() + (Klass::_lh_header_size_shift / BitsPerByte))); + __ movb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte))); assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise"); __ andptr(t1, Klass::_lh_header_size_mask); @@ -1180,7 +1180,7 @@ __ bind(try_eden); // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F)) // since size is positive movl does right thing on 64bit - __ movl(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes())); + __ movl(t1, Address(klass, Klass::layout_helper_offset())); // since size is postive movl does right thing on 64bit __ movl(arr_size, length); assert(t1 == rcx, "fixed register usage"); @@ -1195,7 +1195,7 @@ __ incr_allocated_bytes(thread, arr_size, 0); __ initialize_header(obj, klass, length, t1, t2); - __ movb(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes() + (Klass::_lh_header_size_shift / BitsPerByte))); + __ movb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte))); assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise"); __ andptr(t1, Klass::_lh_header_size_mask); @@ -1267,7 +1267,7 @@ Label register_finalizer; Register t = rsi; __ load_klass(t, rax); - __ movl(t, Address(t, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc))); + __ movl(t, Address(t, Klass::access_flags_offset())); __ testl(t, JVM_ACC_HAS_FINALIZER); __ jcc(Assembler::notZero, register_finalizer); __ ret(0); diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/x86/vm/cppInterpreter_x86.cpp --- a/src/cpu/x86/vm/cppInterpreter_x86.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/x86/vm/cppInterpreter_x86.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -511,7 +511,7 @@ // get synchronization object Label done; - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); __ movl(rax, access_flags); __ testl(rax, JVM_ACC_STATIC); __ movptr(rax, Address(locals, 0)); // get receiver (assume this is frequent case) @@ -763,7 +763,7 @@ #endif // ASSERT // get synchronization object { Label done; - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); __ movl(rax, access_flags); __ movptr(rdi, STATE(_locals)); // prepare to get receiver (assume common case) __ testl(rax, JVM_ACC_STATIC); @@ -1180,7 +1180,7 @@ // pass mirror handle if static call { Label L; - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); __ movl(t, Address(method, methodOopDesc::access_flags_offset())); __ testl(t, JVM_ACC_STATIC); __ jcc(Assembler::zero, L); diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/x86/vm/methodHandles_x86.cpp --- a/src/cpu/x86/vm/methodHandles_x86.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/x86/vm/methodHandles_x86.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -1160,7 +1160,7 @@ Address rcx_amh_conversion( rcx_recv, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes() ); Address vmarg; // __ argument_address(vmargslot) - const int java_mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); + const int java_mirror_offset = in_bytes(Klass::java_mirror_offset()); if (have_entry(ek)) { __ nop(); // empty stubs make SG sick diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/x86/vm/nativeInst_x86.cpp --- a/src/cpu/x86/vm/nativeInst_x86.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/x86/vm/nativeInst_x86.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -237,9 +237,21 @@ int off = 0; u_char instr_0 = ubyte_at(off); + // See comment in Assembler::locate_operand() about VEX prefixes. + if (instr_0 == instruction_VEX_prefix_2bytes) { + assert((UseAVX > 0), "shouldn't have VEX prefix"); + NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions")); + return 2; + } + if (instr_0 == instruction_VEX_prefix_3bytes) { + assert((UseAVX > 0), "shouldn't have VEX prefix"); + NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions")); + return 3; + } + // First check to see if we have a (prefixed or not) xor - if ( instr_0 >= instruction_prefix_wide_lo && // 0x40 - instr_0 <= instruction_prefix_wide_hi) { // 0x4f + if (instr_0 >= instruction_prefix_wide_lo && // 0x40 + instr_0 <= instruction_prefix_wide_hi) { // 0x4f off++; instr_0 = ubyte_at(off); } @@ -256,13 +268,13 @@ instr_0 = ubyte_at(off); } - if ( instr_0 == instruction_code_xmm_ss_prefix || // 0xf3 + if ( instr_0 == instruction_code_xmm_ss_prefix || // 0xf3 instr_0 == instruction_code_xmm_sd_prefix) { // 0xf2 off++; instr_0 = ubyte_at(off); } - if ( instr_0 >= instruction_prefix_wide_lo && // 0x40 + if ( instr_0 >= instruction_prefix_wide_lo && // 0x40 instr_0 <= instruction_prefix_wide_hi) { // 0x4f off++; instr_0 = ubyte_at(off); diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/x86/vm/nativeInst_x86.hpp --- a/src/cpu/x86/vm/nativeInst_x86.hpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/x86/vm/nativeInst_x86.hpp Thu Dec 29 11:37:50 2011 -0800 @@ -287,6 +287,9 @@ instruction_code_xmm_store = 0x11, instruction_code_xmm_lpd = 0x12, + instruction_VEX_prefix_2bytes = Assembler::VEX_2bytes, + instruction_VEX_prefix_3bytes = Assembler::VEX_3bytes, + instruction_size = 4, instruction_offset = 0, data_offset = 2, diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/x86/vm/register_definitions_x86.cpp --- a/src/cpu/x86/vm/register_definitions_x86.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/x86/vm/register_definitions_x86.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -53,6 +53,7 @@ REGISTER_DEFINITION(Register, r15); #endif // AMD64 +REGISTER_DEFINITION(XMMRegister, xnoreg); REGISTER_DEFINITION(XMMRegister, xmm0 ); REGISTER_DEFINITION(XMMRegister, xmm1 ); REGISTER_DEFINITION(XMMRegister, xmm2 ); @@ -115,6 +116,7 @@ REGISTER_DEFINITION(Register, r15_thread); #endif // AMD64 +REGISTER_DEFINITION(MMXRegister, mnoreg ); REGISTER_DEFINITION(MMXRegister, mmx0 ); REGISTER_DEFINITION(MMXRegister, mmx1 ); REGISTER_DEFINITION(MMXRegister, mmx2 ); diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/x86/vm/stubGenerator_x86_32.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -1374,8 +1374,7 @@ // L_success, L_failure, NULL); assert_different_registers(sub_klass, temp); - int sc_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::secondary_super_cache_offset_in_bytes()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); // if the pointers are equal, we are done (e.g., String[] elements) __ cmpptr(sub_klass, super_klass_addr); @@ -1787,8 +1786,7 @@ // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 // - int lh_offset = klassOopDesc::header_size() * HeapWordSize + - Klass::layout_helper_offset_in_bytes(); + int lh_offset = in_bytes(Klass::layout_helper_offset()); Address src_klass_lh_addr(rcx_src_klass, lh_offset); // Handle objArrays completely differently... @@ -1914,10 +1912,8 @@ // live at this point: rcx_src_klass, dst[_pos], src[_pos] { // Handy offsets: - int ek_offset = (klassOopDesc::header_size() * HeapWordSize + - objArrayKlass::element_klass_offset_in_bytes()); - int sco_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::super_check_offset_offset_in_bytes()); + int ek_offset = in_bytes(objArrayKlass::element_klass_offset()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); Register rsi_dst_klass = rsi; Register rdi_temp = rdi; diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/x86/vm/stubGenerator_x86_64.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -2261,8 +2261,7 @@ // The ckoff and ckval must be mutually consistent, // even though caller generates both. { Label L; - int sco_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::super_check_offset_offset_in_bytes()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); __ cmpl(ckoff, Address(ckval, sco_offset)); __ jcc(Assembler::equal, L); __ stop("super_check_offset inconsistent"); @@ -2572,8 +2571,7 @@ // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 // - const int lh_offset = klassOopDesc::header_size() * HeapWordSize + - Klass::layout_helper_offset_in_bytes(); + const int lh_offset = in_bytes(Klass::layout_helper_offset()); // Handle objArrays completely differently... const jint objArray_lh = Klass::array_layout_helper(T_OBJECT); @@ -2722,15 +2720,13 @@ assert_clean_int(count, sco_temp); // Generate the type check. - const int sco_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::super_check_offset_offset_in_bytes()); + const int sco_offset = in_bytes(Klass::super_check_offset_offset()); __ movl(sco_temp, Address(r11_dst_klass, sco_offset)); assert_clean_int(sco_temp, rax); generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy); // Fetch destination element klass from the objArrayKlass header. - int ek_offset = (klassOopDesc::header_size() * HeapWordSize + - objArrayKlass::element_klass_offset_in_bytes()); + int ek_offset = in_bytes(objArrayKlass::element_klass_offset()); __ movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset)); __ movl( sco_temp, Address(r11_dst_klass, sco_offset)); assert_clean_int(sco_temp, rax); diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/x86/vm/templateInterpreter_x86_32.cpp --- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -552,7 +552,7 @@ #endif // ASSERT // get synchronization object { Label done; - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); __ movl(rax, access_flags); __ testl(rax, JVM_ACC_STATIC); __ movptr(rax, Address(rdi, Interpreter::local_offset_in_bytes(0))); // get receiver (assume this is frequent case) @@ -1012,7 +1012,7 @@ // pass mirror handle if static call { Label L; - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); __ movl(t, Address(method, methodOopDesc::access_flags_offset())); __ testl(t, JVM_ACC_STATIC); __ jcc(Assembler::zero, L); diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/x86/vm/templateInterpreter_x86_64.cpp --- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -505,8 +505,7 @@ // get synchronization object { - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + - Klass::java_mirror_offset_in_bytes(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); Label done; __ movl(rax, access_flags); __ testl(rax, JVM_ACC_STATIC); @@ -1006,8 +1005,7 @@ // pass mirror handle if static call { Label L; - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + - Klass::java_mirror_offset_in_bytes(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); __ movl(t, Address(method, methodOopDesc::access_flags_offset())); __ testl(t, JVM_ACC_STATIC); __ jcc(Assembler::zero, L); diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/x86/vm/templateTable_x86_32.cpp --- a/src/cpu/x86/vm/templateTable_x86_32.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/x86/vm/templateTable_x86_32.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -980,7 +980,7 @@ __ load_klass(rbx, rax); // Move superklass into EAX __ load_klass(rax, rdx); - __ movptr(rax, Address(rax, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes())); + __ movptr(rax, Address(rax, objArrayKlass::element_klass_offset())); // Compress array+index*wordSize+12 into a single register. Frees ECX. __ lea(rdx, element_address); @@ -2033,7 +2033,7 @@ assert(state == vtos, "only valid state"); __ movptr(rax, aaddress(0)); __ load_klass(rdi, rax); - __ movl(rdi, Address(rdi, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc))); + __ movl(rdi, Address(rdi, Klass::access_flags_offset())); __ testl(rdi, JVM_ACC_HAS_FINALIZER); Label skip_register_finalizer; __ jcc(Assembler::zero, skip_register_finalizer); @@ -3188,11 +3188,11 @@ // make sure klass is initialized & doesn't have finalizer // make sure klass is fully initialized - __ cmpl(Address(rcx, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)), instanceKlass::fully_initialized); + __ cmpl(Address(rcx, instanceKlass::init_state_offset()), instanceKlass::fully_initialized); __ jcc(Assembler::notEqual, slow_case); // get instance_size in instanceKlass (scaled to a count of bytes) - __ movl(rdx, Address(rcx, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc))); + __ movl(rdx, Address(rcx, Klass::layout_helper_offset())); // test to see if it has a finalizer or is malformed in some way __ testl(rdx, Klass::_lh_instance_slow_path_bit); __ jcc(Assembler::notZero, slow_case); @@ -3293,7 +3293,7 @@ __ bind(initialize_header); if (UseBiasedLocking) { __ pop(rcx); // get saved klass back in the register. - __ movptr(rbx, Address(rcx, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + __ movptr(rbx, Address(rcx, Klass::prototype_header_offset())); __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()), rbx); } else { __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()), diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/x86/vm/templateTable_x86_64.cpp --- a/src/cpu/x86/vm/templateTable_x86_64.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/x86/vm/templateTable_x86_64.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -1004,8 +1004,7 @@ // Move superklass into rax __ load_klass(rax, rdx); __ movptr(rax, Address(rax, - sizeof(oopDesc) + - objArrayKlass::element_klass_offset_in_bytes())); + objArrayKlass::element_klass_offset())); // Compress array + index*oopSize + 12 into a single register. Frees rcx. __ lea(rdx, element_address); @@ -2067,7 +2066,7 @@ assert(state == vtos, "only valid state"); __ movptr(c_rarg1, aaddress(0)); __ load_klass(rdi, c_rarg1); - __ movl(rdi, Address(rdi, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc))); + __ movl(rdi, Address(rdi, Klass::access_flags_offset())); __ testl(rdi, JVM_ACC_HAS_FINALIZER); Label skip_register_finalizer; __ jcc(Assembler::zero, skip_register_finalizer); @@ -3236,15 +3235,14 @@ // make sure klass is initialized & doesn't have finalizer // make sure klass is fully initialized __ cmpl(Address(rsi, - instanceKlass::init_state_offset_in_bytes() + - sizeof(oopDesc)), + instanceKlass::init_state_offset()), instanceKlass::fully_initialized); __ jcc(Assembler::notEqual, slow_case); // get instance_size in instanceKlass (scaled to a count of bytes) __ movl(rdx, Address(rsi, - Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc))); + Klass::layout_helper_offset())); // test to see if it has a finalizer or is malformed in some way __ testl(rdx, Klass::_lh_instance_slow_path_bit); __ jcc(Assembler::notZero, slow_case); @@ -3337,7 +3335,7 @@ // initialize object header only. __ bind(initialize_header); if (UseBiasedLocking) { - __ movptr(rscratch1, Address(rsi, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + __ movptr(rscratch1, Address(rsi, Klass::prototype_header_offset())); __ movptr(Address(rax, oopDesc::mark_offset_in_bytes()), rscratch1); } else { __ movptr(Address(rax, oopDesc::mark_offset_in_bytes()), diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/x86/vm/vm_version_x86.cpp --- a/src/cpu/x86/vm/vm_version_x86.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -50,7 +50,7 @@ VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; static BufferBlob* stub_blob; -static const int stub_size = 400; +static const int stub_size = 500; extern "C" { typedef void (*getPsrInfo_stub_t)(void*); @@ -73,7 +73,7 @@ const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; - Label ext_cpuid1, ext_cpuid5, done; + Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, done; StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); # define __ _masm-> @@ -229,6 +229,41 @@ __ movl(Address(rsi, 8), rcx); __ movl(Address(rsi,12), rdx); + // + // Check if OS has enabled XGETBV instruction to access XCR0 + // (OSXSAVE feature flag) and CPU supports AVX + // + __ andl(rcx, 0x18000000); + __ cmpl(rcx, 0x18000000); + __ jccb(Assembler::notEqual, sef_cpuid); + + // + // XCR0, XFEATURE_ENABLED_MASK register + // + __ xorl(rcx, rcx); // zero for XCR0 register + __ xgetbv(); + __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); + __ movl(Address(rsi, 0), rax); + __ movl(Address(rsi, 4), rdx); + + // + // cpuid(0x7) Structured Extended Features + // + __ bind(sef_cpuid); + __ movl(rax, 7); + __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? + __ jccb(Assembler::greater, ext_cpuid); + + __ xorl(rcx, rcx); + __ cpuid(); + __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); + __ movl(Address(rsi, 0), rax); + __ movl(Address(rsi, 4), rbx); + + // + // Extended cpuid(0x80000000) + // + __ bind(ext_cpuid); __ movl(rax, 0x80000000); __ cpuid(); __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? @@ -359,13 +394,19 @@ if (UseSSE < 1) _cpuFeatures &= ~CPU_SSE; + if (UseAVX < 2) + _cpuFeatures &= ~CPU_AVX2; + + if (UseAVX < 1) + _cpuFeatures &= ~CPU_AVX; + if (logical_processors_per_package() == 1) { // HT processor could be installed on a system which doesn't support HT. _cpuFeatures &= ~CPU_HT; } char buf[256]; - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", cores_per_cpu(), threads_per_core(), cpu_family(), _model, _stepping, (supports_cmov() ? ", cmov" : ""), @@ -379,6 +420,8 @@ (supports_sse4_1() ? ", sse4.1" : ""), (supports_sse4_2() ? ", sse4.2" : ""), (supports_popcnt() ? ", popcnt" : ""), + (supports_avx() ? ", avx" : ""), + (supports_avx2() ? ", avx2" : ""), (supports_mmx_ext() ? ", mmxext" : ""), (supports_3dnow_prefetch() ? ", 3dnowpref" : ""), (supports_lzcnt() ? ", lzcnt": ""), @@ -389,17 +432,24 @@ // UseSSE is set to the smaller of what hardware supports and what // the command line requires. I.e., you cannot set UseSSE to 2 on // older Pentiums which do not support it. - if( UseSSE > 4 ) UseSSE=4; - if( UseSSE < 0 ) UseSSE=0; - if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support + if (UseSSE > 4) UseSSE=4; + if (UseSSE < 0) UseSSE=0; + if (!supports_sse4_1()) // Drop to 3 if no SSE4 support UseSSE = MIN2((intx)3,UseSSE); - if( !supports_sse3() ) // Drop to 2 if no SSE3 support + if (!supports_sse3()) // Drop to 2 if no SSE3 support UseSSE = MIN2((intx)2,UseSSE); - if( !supports_sse2() ) // Drop to 1 if no SSE2 support + if (!supports_sse2()) // Drop to 1 if no SSE2 support UseSSE = MIN2((intx)1,UseSSE); - if( !supports_sse () ) // Drop to 0 if no SSE support + if (!supports_sse ()) // Drop to 0 if no SSE support UseSSE = 0; + if (UseAVX > 2) UseAVX=2; + if (UseAVX < 0) UseAVX=0; + if (!supports_avx2()) // Drop to 1 if no AVX2 support + UseAVX = MIN2((intx)1,UseAVX); + if (!supports_avx ()) // Drop to 0 if no AVX support + UseAVX = 0; + // On new cpus instructions which update whole XMM register should be used // to prevent partial register stall due to dependencies on high half. // @@ -534,6 +584,9 @@ if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { UsePopCountInstruction = true; } + } else if (UsePopCountInstruction) { + warning("POPCNT instruction is not available on this CPU"); + FLAG_SET_DEFAULT(UsePopCountInstruction, false); } #ifdef COMPILER2 @@ -605,7 +658,11 @@ if (PrintMiscellaneous && Verbose) { tty->print_cr("Logical CPUs per core: %u", logical_processors_per_package()); - tty->print_cr("UseSSE=%d",UseSSE); + tty->print("UseSSE=%d",UseSSE); + if (UseAVX > 0) { + tty->print(" UseAVX=%d",UseAVX); + } + tty->cr(); tty->print("Allocation"); if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) { tty->print_cr(": no prefetching"); diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/x86/vm/vm_version_x86.hpp --- a/src/cpu/x86/vm/vm_version_x86.hpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/x86/vm/vm_version_x86.hpp Thu Dec 29 11:37:50 2011 -0800 @@ -78,7 +78,10 @@ sse4_2 : 1, : 2, popcnt : 1, - : 8; + : 3, + osxsave : 1, + avx : 1, + : 3; } bits; }; @@ -176,6 +179,34 @@ } bits; }; + union SefCpuid7Eax { + uint32_t value; + }; + + union SefCpuid7Ebx { + uint32_t value; + struct { + uint32_t fsgsbase : 1, + : 2, + bmi1 : 1, + : 1, + avx2 : 1, + : 2, + bmi2 : 1, + : 23; + } bits; + }; + + union XemXcr0Eax { + uint32_t value; + struct { + uint32_t x87 : 1, + sse : 1, + ymm : 1, + : 29; + } bits; + }; + protected: static int _cpu; static int _model; @@ -200,7 +231,9 @@ CPU_SSE4_1 = (1 << 11), CPU_SSE4_2 = (1 << 12), CPU_POPCNT = (1 << 13), - CPU_LZCNT = (1 << 14) + CPU_LZCNT = (1 << 14), + CPU_AVX = (1 << 15), + CPU_AVX2 = (1 << 16) } cpuFeatureFlags; // cpuid information block. All info derived from executing cpuid with @@ -228,6 +261,12 @@ uint32_t dcp_cpuid4_ecx; // unused currently uint32_t dcp_cpuid4_edx; // unused currently + // cpuid function 7 (structured extended features) + SefCpuid7Eax sef_cpuid7_eax; + SefCpuid7Ebx sef_cpuid7_ebx; + uint32_t sef_cpuid7_ecx; // unused currently + uint32_t sef_cpuid7_edx; // unused currently + // cpuid function 0xB (processor topology) // ecx = 0 uint32_t tpl_cpuidB0_eax; @@ -275,6 +314,10 @@ uint32_t ext_cpuid8_ebx; // reserved ExtCpuid8Ecx ext_cpuid8_ecx; uint32_t ext_cpuid8_edx; // reserved + + // extended control register XCR0 (the XFEATURE_ENABLED_MASK register) + XemXcr0Eax xem_xcr0_eax; + uint32_t xem_xcr0_edx; // reserved }; // The actual cpuid info block @@ -328,6 +371,14 @@ result |= CPU_SSE4_2; if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0) result |= CPU_POPCNT; + if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 && + _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 && + _cpuid_info.xem_xcr0_eax.bits.sse != 0 && + _cpuid_info.xem_xcr0_eax.bits.ymm != 0) { + result |= CPU_AVX; + if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0) + result |= CPU_AVX2; + } // AMD features. if (is_amd()) { @@ -350,12 +401,14 @@ static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); } static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); } + static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax); } static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); } static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); } static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); } static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); } static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); } static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); } + static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); } // Initialization static void initialize(); @@ -447,6 +500,8 @@ static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } static bool supports_popcnt() { return (_cpuFeatures & CPU_POPCNT) != 0; } + static bool supports_avx() { return (_cpuFeatures & CPU_AVX) != 0; } + static bool supports_avx2() { return (_cpuFeatures & CPU_AVX2) != 0; } // // AMD features // diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/x86/vm/x86.ad --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/x86/vm/x86.ad Thu Dec 29 11:37:50 2011 -0800 @@ -0,0 +1,777 @@ +// +// Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +// X86 Common Architecture Description File + +source %{ + // Float masks come from different places depending on platform. +#ifdef _LP64 + static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } + static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } + static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } + static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } +#else + static address float_signmask() { return (address)float_signmask_pool; } + static address float_signflip() { return (address)float_signflip_pool; } + static address double_signmask() { return (address)double_signmask_pool; } + static address double_signflip() { return (address)double_signflip_pool; } +#endif +%} + +// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) + +instruct addF_reg(regF dst, regF src) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (AddF dst src)); + + format %{ "addss $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ addss($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct addF_mem(regF dst, memory src) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (AddF dst (LoadF src))); + + format %{ "addss $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ addss($dst$$XMMRegister, $src$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct addF_imm(regF dst, immF con) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (AddF dst con)); + format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} + ins_cost(150); + ins_encode %{ + __ addss($dst$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct vaddF_reg(regF dst, regF src1, regF src2) %{ + predicate(UseAVX > 0); + match(Set dst (AddF src1 src2)); + + format %{ "vaddss $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct vaddF_mem(regF dst, regF src1, memory src2) %{ + predicate(UseAVX > 0); + match(Set dst (AddF src1 (LoadF src2))); + + format %{ "vaddss $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct vaddF_imm(regF dst, regF src, immF con) %{ + predicate(UseAVX > 0); + match(Set dst (AddF src con)); + + format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} + ins_cost(150); + ins_encode %{ + __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct addD_reg(regD dst, regD src) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (AddD dst src)); + + format %{ "addsd $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ addsd($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct addD_mem(regD dst, memory src) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (AddD dst (LoadD src))); + + format %{ "addsd $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ addsd($dst$$XMMRegister, $src$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct addD_imm(regD dst, immD con) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (AddD dst con)); + format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} + ins_cost(150); + ins_encode %{ + __ addsd($dst$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct vaddD_reg(regD dst, regD src1, regD src2) %{ + predicate(UseAVX > 0); + match(Set dst (AddD src1 src2)); + + format %{ "vaddsd $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct vaddD_mem(regD dst, regD src1, memory src2) %{ + predicate(UseAVX > 0); + match(Set dst (AddD src1 (LoadD src2))); + + format %{ "vaddsd $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct vaddD_imm(regD dst, regD src, immD con) %{ + predicate(UseAVX > 0); + match(Set dst (AddD src con)); + + format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} + ins_cost(150); + ins_encode %{ + __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct subF_reg(regF dst, regF src) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (SubF dst src)); + + format %{ "subss $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ subss($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct subF_mem(regF dst, memory src) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (SubF dst (LoadF src))); + + format %{ "subss $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ subss($dst$$XMMRegister, $src$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct subF_imm(regF dst, immF con) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (SubF dst con)); + format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} + ins_cost(150); + ins_encode %{ + __ subss($dst$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubF_reg(regF dst, regF src1, regF src2) %{ + predicate(UseAVX > 0); + match(Set dst (SubF src1 src2)); + + format %{ "vsubss $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubF_mem(regF dst, regF src1, memory src2) %{ + predicate(UseAVX > 0); + match(Set dst (SubF src1 (LoadF src2))); + + format %{ "vsubss $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubF_imm(regF dst, regF src, immF con) %{ + predicate(UseAVX > 0); + match(Set dst (SubF src con)); + + format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} + ins_cost(150); + ins_encode %{ + __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct subD_reg(regD dst, regD src) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (SubD dst src)); + + format %{ "subsd $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ subsd($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct subD_mem(regD dst, memory src) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (SubD dst (LoadD src))); + + format %{ "subsd $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ subsd($dst$$XMMRegister, $src$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct subD_imm(regD dst, immD con) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (SubD dst con)); + format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} + ins_cost(150); + ins_encode %{ + __ subsd($dst$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubD_reg(regD dst, regD src1, regD src2) %{ + predicate(UseAVX > 0); + match(Set dst (SubD src1 src2)); + + format %{ "vsubsd $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubD_mem(regD dst, regD src1, memory src2) %{ + predicate(UseAVX > 0); + match(Set dst (SubD src1 (LoadD src2))); + + format %{ "vsubsd $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubD_imm(regD dst, regD src, immD con) %{ + predicate(UseAVX > 0); + match(Set dst (SubD src con)); + + format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} + ins_cost(150); + ins_encode %{ + __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct mulF_reg(regF dst, regF src) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (MulF dst src)); + + format %{ "mulss $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ mulss($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct mulF_mem(regF dst, memory src) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (MulF dst (LoadF src))); + + format %{ "mulss $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ mulss($dst$$XMMRegister, $src$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct mulF_imm(regF dst, immF con) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (MulF dst con)); + format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} + ins_cost(150); + ins_encode %{ + __ mulss($dst$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulF_reg(regF dst, regF src1, regF src2) %{ + predicate(UseAVX > 0); + match(Set dst (MulF src1 src2)); + + format %{ "vmulss $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulF_mem(regF dst, regF src1, memory src2) %{ + predicate(UseAVX > 0); + match(Set dst (MulF src1 (LoadF src2))); + + format %{ "vmulss $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulF_imm(regF dst, regF src, immF con) %{ + predicate(UseAVX > 0); + match(Set dst (MulF src con)); + + format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} + ins_cost(150); + ins_encode %{ + __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct mulD_reg(regD dst, regD src) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (MulD dst src)); + + format %{ "mulsd $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ mulsd($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct mulD_mem(regD dst, memory src) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (MulD dst (LoadD src))); + + format %{ "mulsd $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ mulsd($dst$$XMMRegister, $src$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct mulD_imm(regD dst, immD con) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (MulD dst con)); + format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} + ins_cost(150); + ins_encode %{ + __ mulsd($dst$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulD_reg(regD dst, regD src1, regD src2) %{ + predicate(UseAVX > 0); + match(Set dst (MulD src1 src2)); + + format %{ "vmulsd $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulD_mem(regD dst, regD src1, memory src2) %{ + predicate(UseAVX > 0); + match(Set dst (MulD src1 (LoadD src2))); + + format %{ "vmulsd $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulD_imm(regD dst, regD src, immD con) %{ + predicate(UseAVX > 0); + match(Set dst (MulD src con)); + + format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} + ins_cost(150); + ins_encode %{ + __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct divF_reg(regF dst, regF src) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (DivF dst src)); + + format %{ "divss $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ divss($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct divF_mem(regF dst, memory src) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (DivF dst (LoadF src))); + + format %{ "divss $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ divss($dst$$XMMRegister, $src$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct divF_imm(regF dst, immF con) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (DivF dst con)); + format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} + ins_cost(150); + ins_encode %{ + __ divss($dst$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct vdivF_reg(regF dst, regF src1, regF src2) %{ + predicate(UseAVX > 0); + match(Set dst (DivF src1 src2)); + + format %{ "vdivss $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct vdivF_mem(regF dst, regF src1, memory src2) %{ + predicate(UseAVX > 0); + match(Set dst (DivF src1 (LoadF src2))); + + format %{ "vdivss $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct vdivF_imm(regF dst, regF src, immF con) %{ + predicate(UseAVX > 0); + match(Set dst (DivF src con)); + + format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} + ins_cost(150); + ins_encode %{ + __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct divD_reg(regD dst, regD src) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (DivD dst src)); + + format %{ "divsd $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ divsd($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct divD_mem(regD dst, memory src) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (DivD dst (LoadD src))); + + format %{ "divsd $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ divsd($dst$$XMMRegister, $src$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct divD_imm(regD dst, immD con) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (DivD dst con)); + format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} + ins_cost(150); + ins_encode %{ + __ divsd($dst$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct vdivD_reg(regD dst, regD src1, regD src2) %{ + predicate(UseAVX > 0); + match(Set dst (DivD src1 src2)); + + format %{ "vdivsd $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct vdivD_mem(regD dst, regD src1, memory src2) %{ + predicate(UseAVX > 0); + match(Set dst (DivD src1 (LoadD src2))); + + format %{ "vdivsd $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct vdivD_imm(regD dst, regD src, immD con) %{ + predicate(UseAVX > 0); + match(Set dst (DivD src con)); + + format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} + ins_cost(150); + ins_encode %{ + __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct absF_reg(regF dst) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (AbsF dst)); + ins_cost(150); + format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} + ins_encode %{ + __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); + %} + ins_pipe(pipe_slow); +%} + +instruct vabsF_reg(regF dst, regF src) %{ + predicate(UseAVX > 0); + match(Set dst (AbsF src)); + ins_cost(150); + format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} + ins_encode %{ + __ vandps($dst$$XMMRegister, $src$$XMMRegister, + ExternalAddress(float_signmask())); + %} + ins_pipe(pipe_slow); +%} + +instruct absD_reg(regD dst) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (AbsD dst)); + ins_cost(150); + format %{ "andpd $dst, [0x7fffffffffffffff]\t" + "# abs double by sign masking" %} + ins_encode %{ + __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); + %} + ins_pipe(pipe_slow); +%} + +instruct vabsD_reg(regD dst, regD src) %{ + predicate(UseAVX > 0); + match(Set dst (AbsD src)); + ins_cost(150); + format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" + "# abs double by sign masking" %} + ins_encode %{ + __ vandpd($dst$$XMMRegister, $src$$XMMRegister, + ExternalAddress(double_signmask())); + %} + ins_pipe(pipe_slow); +%} + +instruct negF_reg(regF dst) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (NegF dst)); + ins_cost(150); + format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} + ins_encode %{ + __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); + %} + ins_pipe(pipe_slow); +%} + +instruct vnegF_reg(regF dst, regF src) %{ + predicate(UseAVX > 0); + match(Set dst (NegF src)); + ins_cost(150); + format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} + ins_encode %{ + __ vxorps($dst$$XMMRegister, $src$$XMMRegister, + ExternalAddress(float_signflip())); + %} + ins_pipe(pipe_slow); +%} + +instruct negD_reg(regD dst) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (NegD dst)); + ins_cost(150); + format %{ "xorpd $dst, [0x8000000000000000]\t" + "# neg double by sign flipping" %} + ins_encode %{ + __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); + %} + ins_pipe(pipe_slow); +%} + +instruct vnegD_reg(regD dst, regD src) %{ + predicate(UseAVX > 0); + match(Set dst (NegD src)); + ins_cost(150); + format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" + "# neg double by sign flipping" %} + ins_encode %{ + __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, + ExternalAddress(double_signflip())); + %} + ins_pipe(pipe_slow); +%} + +instruct sqrtF_reg(regF dst, regF src) %{ + predicate(UseSSE>=1); + match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); + + format %{ "sqrtss $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct sqrtF_mem(regF dst, memory src) %{ + predicate(UseSSE>=1); + match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); + + format %{ "sqrtss $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ sqrtss($dst$$XMMRegister, $src$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct sqrtF_imm(regF dst, immF con) %{ + predicate(UseSSE>=1); + match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); + format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} + ins_cost(150); + ins_encode %{ + __ sqrtss($dst$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct sqrtD_reg(regD dst, regD src) %{ + predicate(UseSSE>=2); + match(Set dst (SqrtD src)); + + format %{ "sqrtsd $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct sqrtD_mem(regD dst, memory src) %{ + predicate(UseSSE>=2); + match(Set dst (SqrtD (LoadD src))); + + format %{ "sqrtsd $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ sqrtsd($dst$$XMMRegister, $src$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct sqrtD_imm(regD dst, immD con) %{ + predicate(UseSSE>=2); + match(Set dst (SqrtD con)); + format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} + ins_cost(150); + ins_encode %{ + __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/x86/vm/x86_32.ad --- a/src/cpu/x86/vm/x86_32.ad Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/x86/vm/x86_32.ad Thu Dec 29 11:37:50 2011 -0800 @@ -281,7 +281,7 @@ } static int preserve_SP_size() { - return LP64_ONLY(1 +) 2; // [rex,] op, rm(reg/reg) + return 2; // op, rm(reg/reg) } // !!!!! Special hack to get all type of calls to specify the byte offset @@ -495,14 +495,34 @@ } } -void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { - if( dst_encoding == src_encoding ) { - // reg-reg copy, use an empty encoding - } else { - MacroAssembler _masm(&cbuf); - - __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding)); - } +void emit_cmpfp_fixup(MacroAssembler& _masm) { + Label exit; + __ jccb(Assembler::noParity, exit); + __ pushf(); + // + // comiss/ucomiss instructions set ZF,PF,CF flags and + // zero OF,AF,SF for NaN values. + // Fixup flags by zeroing ZF,PF so that compare of NaN + // values returns 'less than' result (CF is set). + // Leave the rest of flags unchanged. + // + // 7 6 5 4 3 2 1 0 + // |S|Z|r|A|r|P|r|C| (r - reserved bit) + // 0 0 1 0 1 0 1 1 (0x2B) + // + __ andl(Address(rsp, 0), 0xffffff2b); + __ popf(); + __ bind(exit); +} + +void emit_cmpfp3(MacroAssembler& _masm, Register dst) { + Label done; + __ movl(dst, -1); + __ jcc(Assembler::parity, done); + __ jcc(Assembler::below, done); + __ setb(Assembler::notEqual, dst); + __ movzbl(dst, dst); + __ bind(done); } @@ -792,92 +812,88 @@ // Helper for XMM registers. Extra opcode bits, limited syntax. static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { - if( cbuf ) { - if( reg_lo+1 == reg_hi ) { // double move? - if( is_load && !UseXmmLoadAndClearUpper ) - emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load - else - emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise + if (cbuf) { + MacroAssembler _masm(cbuf); + if (reg_lo+1 == reg_hi) { // double move? + if (is_load) { + __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); + } else { + __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); + } } else { - emit_opcode(*cbuf, 0xF3 ); + if (is_load) { + __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); + } else { + __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); + } } - emit_opcode(*cbuf, 0x0F ); - if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper ) - emit_opcode(*cbuf, 0x12 ); // use 'movlpd' for load - else - emit_opcode(*cbuf, is_load ? 0x10 : 0x11 ); - encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false); #ifndef PRODUCT - } else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); - if( reg_lo+1 == reg_hi ) { // double move? - if( is_load ) st->print("%s %s,[ESP + #%d]", - UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", - Matcher::regName[reg_lo], offset); - else st->print("MOVSD [ESP + #%d],%s", - offset, Matcher::regName[reg_lo]); + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + if (reg_lo+1 == reg_hi) { // double move? + if (is_load) st->print("%s %s,[ESP + #%d]", + UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", + Matcher::regName[reg_lo], offset); + else st->print("MOVSD [ESP + #%d],%s", + offset, Matcher::regName[reg_lo]); } else { - if( is_load ) st->print("MOVSS %s,[ESP + #%d]", - Matcher::regName[reg_lo], offset); - else st->print("MOVSS [ESP + #%d],%s", - offset, Matcher::regName[reg_lo]); + if (is_load) st->print("MOVSS %s,[ESP + #%d]", + Matcher::regName[reg_lo], offset); + else st->print("MOVSS [ESP + #%d],%s", + offset, Matcher::regName[reg_lo]); } #endif } int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); + // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes. return size+5+offset_size; } static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, int src_hi, int dst_hi, int size, outputStream* st ) { - if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers - if( cbuf ) { - if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) { - emit_opcode(*cbuf, 0x66 ); - } - emit_opcode(*cbuf, 0x0F ); - emit_opcode(*cbuf, 0x28 ); - emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] ); + if (cbuf) { + MacroAssembler _masm(cbuf); + if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? + __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), + as_XMMRegister(Matcher::_regEncode[src_lo])); + } else { + __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), + as_XMMRegister(Matcher::_regEncode[src_lo])); + } #ifndef PRODUCT - } else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); - if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers + if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); } else { st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); } -#endif - } - return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3); - } else { - if( cbuf ) { - emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 ); - emit_opcode(*cbuf, 0x0F ); - emit_opcode(*cbuf, 0x10 ); - emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] ); -#ifndef PRODUCT - } else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); + } else { if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); } else { st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); } + } #endif - } - return size+4; } + // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes. + // Only MOVAPS SSE prefix uses 1 byte. + int sz = 4; + if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && + UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; + return size + sz; } static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, int src_hi, int dst_hi, int size, outputStream* st ) { // 32-bit if (cbuf) { - emit_opcode(*cbuf, 0x66); - emit_opcode(*cbuf, 0x0F); - emit_opcode(*cbuf, 0x6E); - emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7); + MacroAssembler _masm(cbuf); + __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), + as_Register(Matcher::_regEncode[src_lo])); #ifndef PRODUCT } else if (!do_size) { st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); @@ -891,10 +907,9 @@ int src_hi, int dst_hi, int size, outputStream* st ) { // 32-bit if (cbuf) { - emit_opcode(*cbuf, 0x66); - emit_opcode(*cbuf, 0x0F); - emit_opcode(*cbuf, 0x7E); - emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7); + MacroAssembler _masm(cbuf); + __ movdl(as_Register(Matcher::_regEncode[dst_lo]), + as_XMMRegister(Matcher::_regEncode[src_lo])); #ifndef PRODUCT } else if (!do_size) { st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); @@ -1760,7 +1775,7 @@ emit_cc(cbuf, $secondary, $cop$$cmpcode); %} - enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV + enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); emit_d8(cbuf, op >> 8 ); emit_d8(cbuf, op & 255); @@ -1931,11 +1946,6 @@ %} - enc_class Xor_Reg (eRegI dst) %{ - emit_opcode(cbuf, 0x33); - emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); - %} - // Following encoding is no longer used, but may be restored if calling // convention changes significantly. // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) @@ -2013,64 +2023,6 @@ %} - enc_class MovI2X_reg(regX dst, eRegI src) %{ - emit_opcode(cbuf, 0x66 ); // MOVD dst,src - emit_opcode(cbuf, 0x0F ); - emit_opcode(cbuf, 0x6E ); - emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); - %} - - enc_class MovX2I_reg(eRegI dst, regX src) %{ - emit_opcode(cbuf, 0x66 ); // MOVD dst,src - emit_opcode(cbuf, 0x0F ); - emit_opcode(cbuf, 0x7E ); - emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg); - %} - - enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{ - { // MOVD $dst,$src.lo - emit_opcode(cbuf,0x66); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,0x6E); - emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); - } - { // MOVD $tmp,$src.hi - emit_opcode(cbuf,0x66); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,0x6E); - emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); - } - { // PUNPCKLDQ $dst,$tmp - emit_opcode(cbuf,0x66); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,0x62); - emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg); - } - %} - - enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{ - { // MOVD $dst.lo,$src - emit_opcode(cbuf,0x66); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,0x7E); - emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg); - } - { // PSHUFLW $tmp,$src,0x4E (01001110b) - emit_opcode(cbuf,0xF2); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,0x70); - emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); - emit_d8(cbuf, 0x4E); - } - { // MOVD $dst.hi,$tmp - emit_opcode(cbuf,0x66); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,0x7E); - emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg)); - } - %} - - // Encode a reg-reg copy. If it is useless, then empty encoding. enc_class enc_Copy( eRegI dst, eRegI src ) %{ encode_Copy( cbuf, $dst$$reg, $src$$reg ); @@ -2080,11 +2032,6 @@ encode_Copy( cbuf, $dst$$reg, $src$$reg ); %} - // Encode xmm reg-reg copy. If it is useless, then empty encoding. - enc_class enc_CopyXD( RegXD dst, RegXD src ) %{ - encode_CopyXD( cbuf, $dst$$reg, $src$$reg ); - %} - enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many) emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} @@ -2116,14 +2063,14 @@ $$$emit32$src$$constant; %} - enc_class Con32F_as_bits(immF src) %{ // storeF_imm + enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm // Output Float immediate bits jfloat jf = $src$$constant; int jf_as_bits = jint_cast( jf ); emit_d32(cbuf, jf_as_bits); %} - enc_class Con32XF_as_bits(immXF src) %{ // storeX_imm + enc_class Con32F_as_bits(immF src) %{ // storeX_imm // Output Float immediate bits jfloat jf = $src$$constant; int jf_as_bits = jint_cast( jf ); @@ -2336,7 +2283,7 @@ emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} - enc_class enc_FP_store(memory mem, regD src) %{ + enc_class enc_FPR_store(memory mem, regDPR src) %{ // If src is FPR1, we can just FST to store it. // Else we need to FLD it to FPR1, then FSTP to store/pop it. int reg_encoding = 0x2; // Just store @@ -2485,7 +2432,7 @@ // ----------------- Encodings for floating point unit ----------------- // May leave result in FPU-TOS or FPU reg depending on opcodes - enc_class OpcReg_F (regF src) %{ // FMUL, FDIV + enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV $$$emit8$primary; emit_rm(cbuf, 0x3, $secondary, $src$$reg ); %} @@ -2497,17 +2444,17 @@ %} // !!!!! equivalent to Pop_Reg_F - enc_class Pop_Reg_D( regD dst ) %{ + enc_class Pop_Reg_DPR( regDPR dst ) %{ emit_opcode( cbuf, 0xDD ); // FSTP ST(i) emit_d8( cbuf, 0xD8+$dst$$reg ); %} - enc_class Push_Reg_D( regD dst ) %{ + enc_class Push_Reg_DPR( regDPR dst ) %{ emit_opcode( cbuf, 0xD9 ); emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) %} - enc_class strictfp_bias1( regD dst ) %{ + enc_class strictfp_bias1( regDPR dst ) %{ emit_opcode( cbuf, 0xDB ); // FLD m80real emit_opcode( cbuf, 0x2D ); emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); @@ -2515,7 +2462,7 @@ emit_opcode( cbuf, 0xC8+$dst$$reg ); %} - enc_class strictfp_bias2( regD dst ) %{ + enc_class strictfp_bias2( regDPR dst ) %{ emit_opcode( cbuf, 0xDB ); // FLD m80real emit_opcode( cbuf, 0x2D ); emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); @@ -2541,39 +2488,29 @@ store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); %} - // Push the float in stackSlot 'src' onto FP-stack - enc_class Push_Mem_F( memory src ) %{ // FLD_S [ESP+src] - store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp ); - %} - - // Push the double in stackSlot 'src' onto FP-stack - enc_class Push_Mem_D( memory src ) %{ // FLD_D [ESP+src] - store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp ); - %} - // Push FPU's TOS float to a stack-slot, and pop FPU-stack - enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst] + enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); %} // Same as Pop_Mem_F except for opcode // Push FPU's TOS double to a stack-slot, and pop FPU-stack - enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst] + enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); %} - enc_class Pop_Reg_F( regF dst ) %{ + enc_class Pop_Reg_FPR( regFPR dst ) %{ emit_opcode( cbuf, 0xDD ); // FSTP ST(i) emit_d8( cbuf, 0xD8+$dst$$reg ); %} - enc_class Push_Reg_F( regF dst ) %{ + enc_class Push_Reg_FPR( regFPR dst ) %{ emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) emit_d8( cbuf, 0xC0-1+$dst$$reg ); %} // Push FPU's float to a stack-slot, and pop FPU-stack - enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{ + enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ int pop = 0x02; if ($src$$reg != FPR1L_enc) { emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) @@ -2584,7 +2521,7 @@ %} // Push FPU's double to a stack-slot, and pop FPU-stack - enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{ + enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ int pop = 0x02; if ($src$$reg != FPR1L_enc) { emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) @@ -2595,7 +2532,7 @@ %} // Push FPU's double to a FPU-stack-slot, and pop FPU-stack - enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{ + enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ int pop = 0xD0 - 1; // -1 since we skip FLD if ($src$$reg != FPR1L_enc) { emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) @@ -2607,16 +2544,7 @@ %} - enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{ - MacroAssembler masm(&cbuf); - masm.fld_s( $src1$$reg-1); // nothing at TOS, load TOS from src1.reg - masm.fmul( $src2$$reg+0); // value at TOS - masm.fadd( $src$$reg+0); // value at TOS - masm.fstp_d( $dst$$reg+0); // value at TOS, popped off after store - %} - - - enc_class Push_Reg_Mod_D( regD dst, regD src) %{ + enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ // load dst in FPR0 emit_opcode( cbuf, 0xD9 ); emit_d8( cbuf, 0xC0-1+$dst$$reg ); @@ -2634,116 +2562,59 @@ } %} - enc_class Push_ModD_encoding( regXD src0, regXD src1) %{ - // Allocate a word - emit_opcode(cbuf,0x83); // SUB ESP,8 - emit_opcode(cbuf,0xEC); - emit_d8(cbuf,0x08); - - emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src1 - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x11 ); - encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xDD ); // FLD_D [ESP] - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); - - emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src0 - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x11 ); - encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xDD ); // FLD_D [ESP] - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); - - %} - - enc_class Push_ModX_encoding( regX src0, regX src1) %{ - // Allocate a word - emit_opcode(cbuf,0x83); // SUB ESP,4 - emit_opcode(cbuf,0xEC); - emit_d8(cbuf,0x04); - - emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src1 - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x11 ); - encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xD9 ); // FLD [ESP] - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); - - emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src0 - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x11 ); - encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xD9 ); // FLD [ESP] - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); - - %} - - enc_class Push_ResultXD(regXD dst) %{ - store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP] - - // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp] - emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66); - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12); - encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0x83); // ADD ESP,8 - emit_opcode(cbuf,0xC4); - emit_d8(cbuf,0x08); - %} - - enc_class Push_ResultX(regX dst, immI d8) %{ - store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP] - - emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP] - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x10 ); - encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0x83); // ADD ESP,d8 (4 or 8) - emit_opcode(cbuf,0xC4); - emit_d8(cbuf,$d8$$constant); - %} - - enc_class Push_SrcXD(regXD src) %{ - // Allocate a word - emit_opcode(cbuf,0x83); // SUB ESP,8 - emit_opcode(cbuf,0xEC); - emit_d8(cbuf,0x08); - - emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x11 ); - encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xDD ); // FLD_D [ESP] - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); + enc_class Push_ModD_encoding(regD src0, regD src1) %{ + MacroAssembler _masm(&cbuf); + __ subptr(rsp, 8); + __ movdbl(Address(rsp, 0), $src1$$XMMRegister); + __ fld_d(Address(rsp, 0)); + __ movdbl(Address(rsp, 0), $src0$$XMMRegister); + __ fld_d(Address(rsp, 0)); + %} + + enc_class Push_ModF_encoding(regF src0, regF src1) %{ + MacroAssembler _masm(&cbuf); + __ subptr(rsp, 4); + __ movflt(Address(rsp, 0), $src1$$XMMRegister); + __ fld_s(Address(rsp, 0)); + __ movflt(Address(rsp, 0), $src0$$XMMRegister); + __ fld_s(Address(rsp, 0)); + %} + + enc_class Push_ResultD(regD dst) %{ + MacroAssembler _masm(&cbuf); + __ fstp_d(Address(rsp, 0)); + __ movdbl($dst$$XMMRegister, Address(rsp, 0)); + __ addptr(rsp, 8); + %} + + enc_class Push_ResultF(regF dst, immI d8) %{ + MacroAssembler _masm(&cbuf); + __ fstp_s(Address(rsp, 0)); + __ movflt($dst$$XMMRegister, Address(rsp, 0)); + __ addptr(rsp, $d8$$constant); + %} + + enc_class Push_SrcD(regD src) %{ + MacroAssembler _masm(&cbuf); + __ subptr(rsp, 8); + __ movdbl(Address(rsp, 0), $src$$XMMRegister); + __ fld_d(Address(rsp, 0)); %} enc_class push_stack_temp_qword() %{ - emit_opcode(cbuf,0x83); // SUB ESP,8 - emit_opcode(cbuf,0xEC); - emit_d8 (cbuf,0x08); + MacroAssembler _masm(&cbuf); + __ subptr(rsp, 8); %} enc_class pop_stack_temp_qword() %{ - emit_opcode(cbuf,0x83); // ADD ESP,8 - emit_opcode(cbuf,0xC4); - emit_d8 (cbuf,0x08); - %} - - enc_class push_xmm_to_fpr1( regXD xmm_src ) %{ - emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], xmm_src - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x11 ); - encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xDD ); // FLD_D [ESP] - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); + MacroAssembler _masm(&cbuf); + __ addptr(rsp, 8); + %} + + enc_class push_xmm_to_fpr1(regD src) %{ + MacroAssembler _masm(&cbuf); + __ movdbl(Address(rsp, 0), $src$$XMMRegister); + __ fld_d(Address(rsp, 0)); %} // Compute X^Y using Intel's fast hardware instructions, if possible. @@ -2785,10 +2656,7 @@ encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false); %} -// enc_class Pop_Reg_Mod_D( regD dst, regD src) -// was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X() - - enc_class Push_Result_Mod_D( regD src) %{ + enc_class Push_Result_Mod_DPR( regDPR src) %{ if ($src$$reg != FPR1L_enc) { // fincstp emit_opcode (cbuf, 0xD9); @@ -2817,7 +2685,7 @@ emit_opcode( cbuf, 0x05 ); %} - enc_class emitModD() %{ + enc_class emitModDPR() %{ // fprem must be iterative // :: loop // fprem @@ -2922,24 +2790,6 @@ %} - // XMM version of CmpF_Result. Because the XMM compare - // instructions set the EFLAGS directly. It becomes simpler than - // the float version above. - enc_class CmpX_Result(eRegI dst) %{ - MacroAssembler _masm(&cbuf); - Label nan, inc, done; - - __ jccb(Assembler::parity, nan); - __ jccb(Assembler::equal, done); - __ jccb(Assembler::above, inc); - __ bind(nan); - __ decrement(as_Register($dst$$reg)); // NO L qqq - __ jmpb(done); - __ bind(inc); - __ increment(as_Register($dst$$reg)); // NO L qqq - __ bind(done); - %} - // Compare the longs and set flags // BROKEN! Do Not use as-is enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ @@ -3162,48 +3012,6 @@ emit_d8 (cbuf,0 ); %} - enc_class movq_ld(regXD dst, memory mem) %{ - MacroAssembler _masm(&cbuf); - __ movq($dst$$XMMRegister, $mem$$Address); - %} - - enc_class movq_st(memory mem, regXD src) %{ - MacroAssembler _masm(&cbuf); - __ movq($mem$$Address, $src$$XMMRegister); - %} - - enc_class pshufd_8x8(regX dst, regX src) %{ - MacroAssembler _masm(&cbuf); - - encode_CopyXD(cbuf, $dst$$reg, $src$$reg); - __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg)); - __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00); - %} - - enc_class pshufd_4x16(regX dst, regX src) %{ - MacroAssembler _masm(&cbuf); - - __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00); - %} - - enc_class pshufd(regXD dst, regXD src, int mode) %{ - MacroAssembler _masm(&cbuf); - - __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode); - %} - - enc_class pxor(regXD dst, regXD src) %{ - MacroAssembler _masm(&cbuf); - - __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg)); - %} - - enc_class mov_i2x(regXD dst, eRegI src) %{ - MacroAssembler _masm(&cbuf); - - __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg)); - %} - // Because the transitions from emitted code to the runtime // monitorenter/exit helper stubs are so slow it's critical that @@ -3757,7 +3565,7 @@ // 'zero', store the darned double down as an int, and reset the // rounding mode to 'nearest'. The hardware throws an exception which // patches up the correct value directly to the stack. - enc_class D2I_encoding( regD src ) %{ + enc_class DPR2I_encoding( regDPR src ) %{ // Flip to round-to-zero mode. We attempted to allow invalid-op // exceptions here, so that a NAN or other corner-case value will // thrown an exception (but normal values get converted at full speed). @@ -3800,7 +3608,7 @@ // Carry on here... %} - enc_class D2L_encoding( regD src ) %{ + enc_class DPR2L_encoding( regDPR src ) %{ emit_opcode(cbuf,0xD9); // FLDCW trunc emit_opcode(cbuf,0x2D); emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); @@ -3842,294 +3650,27 @@ // Carry on here... %} - enc_class X2L_encoding( regX src ) %{ - // Allocate a word - emit_opcode(cbuf,0x83); // SUB ESP,8 - emit_opcode(cbuf,0xEC); - emit_d8(cbuf,0x08); - - emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x11 ); - encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xD9 ); // FLD_S [ESP] - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xD9); // FLDCW trunc - emit_opcode(cbuf,0x2D); - emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); - - // Encoding assumes a double has been pushed into FPR0. - // Store down the double as a long, popping the FPU stack - emit_opcode(cbuf,0xDF); // FISTP [ESP] - emit_opcode(cbuf,0x3C); - emit_d8(cbuf,0x24); - - // Restore the rounding mode; mask the exception - emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode - emit_opcode(cbuf,0x2D); - emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() - ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() - : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); - - // Load the converted int; adjust CPU stack - emit_opcode(cbuf,0x58); // POP EAX - - emit_opcode(cbuf,0x5A); // POP EDX - - emit_opcode(cbuf,0x81); // CMP EDX,imm - emit_d8 (cbuf,0xFA); // rdx - emit_d32 (cbuf,0x80000000);// 0x80000000 - - emit_opcode(cbuf,0x75); // JNE around_slow_call - emit_d8 (cbuf,0x13+4); // Size of slow_call - - emit_opcode(cbuf,0x85); // TEST EAX,EAX - emit_opcode(cbuf,0xC0); // 2/rax,/rax, - - emit_opcode(cbuf,0x75); // JNE around_slow_call - emit_d8 (cbuf,0x13); // Size of slow_call - - // Allocate a word - emit_opcode(cbuf,0x83); // SUB ESP,4 - emit_opcode(cbuf,0xEC); - emit_d8(cbuf,0x04); - - emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x11 ); - encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xD9 ); // FLD_S [ESP] - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0x83); // ADD ESP,4 - emit_opcode(cbuf,0xC4); - emit_d8(cbuf,0x04); - - // CALL directly to the runtime - cbuf.set_insts_mark(); - emit_opcode(cbuf,0xE8); // Call into runtime - emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); - // Carry on here... - %} - - enc_class XD2L_encoding( regXD src ) %{ - // Allocate a word - emit_opcode(cbuf,0x83); // SUB ESP,8 - emit_opcode(cbuf,0xEC); - emit_d8(cbuf,0x08); - - emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x11 ); - encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xDD ); // FLD_D [ESP] - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xD9); // FLDCW trunc - emit_opcode(cbuf,0x2D); - emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); - - // Encoding assumes a double has been pushed into FPR0. - // Store down the double as a long, popping the FPU stack - emit_opcode(cbuf,0xDF); // FISTP [ESP] - emit_opcode(cbuf,0x3C); - emit_d8(cbuf,0x24); - - // Restore the rounding mode; mask the exception - emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode - emit_opcode(cbuf,0x2D); - emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() - ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() - : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); - - // Load the converted int; adjust CPU stack - emit_opcode(cbuf,0x58); // POP EAX - - emit_opcode(cbuf,0x5A); // POP EDX - - emit_opcode(cbuf,0x81); // CMP EDX,imm - emit_d8 (cbuf,0xFA); // rdx - emit_d32 (cbuf,0x80000000); // 0x80000000 - - emit_opcode(cbuf,0x75); // JNE around_slow_call - emit_d8 (cbuf,0x13+4); // Size of slow_call - - emit_opcode(cbuf,0x85); // TEST EAX,EAX - emit_opcode(cbuf,0xC0); // 2/rax,/rax, - - emit_opcode(cbuf,0x75); // JNE around_slow_call - emit_d8 (cbuf,0x13); // Size of slow_call - - // Push src onto stack slow-path - // Allocate a word - emit_opcode(cbuf,0x83); // SUB ESP,8 - emit_opcode(cbuf,0xEC); - emit_d8(cbuf,0x08); - - emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x11 ); - encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xDD ); // FLD_D [ESP] - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0x83); // ADD ESP,8 - emit_opcode(cbuf,0xC4); - emit_d8(cbuf,0x08); - - // CALL directly to the runtime - cbuf.set_insts_mark(); - emit_opcode(cbuf,0xE8); // Call into runtime - emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); - // Carry on here... - %} - - enc_class D2X_encoding( regX dst, regD src ) %{ - // Allocate a word - emit_opcode(cbuf,0x83); // SUB ESP,4 - emit_opcode(cbuf,0xEC); - emit_d8(cbuf,0x04); - int pop = 0x02; - if ($src$$reg != FPR1L_enc) { - emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) - emit_d8( cbuf, 0xC0-1+$src$$reg ); - pop = 0x03; - } - store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST

_S [ESP] - - emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP] - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x10 ); - encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0x83); // ADD ESP,4 - emit_opcode(cbuf,0xC4); - emit_d8(cbuf,0x04); - // Carry on here... - %} - - enc_class FX2I_encoding( regX src, eRegI dst ) %{ - emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); - - // Compare the result to see if we need to go to the slow path - emit_opcode(cbuf,0x81); // CMP dst,imm - emit_rm (cbuf,0x3,0x7,$dst$$reg); - emit_d32 (cbuf,0x80000000); // 0x80000000 - - emit_opcode(cbuf,0x75); // JNE around_slow_call - emit_d8 (cbuf,0x13); // Size of slow_call - // Store xmm to a temp memory - // location and push it onto stack. - - emit_opcode(cbuf,0x83); // SUB ESP,4 - emit_opcode(cbuf,0xEC); - emit_d8(cbuf, $primary ? 0x8 : 0x4); - - emit_opcode (cbuf, $primary ? 0xF2 : 0xF3 ); // MOVSS [ESP], xmm - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x11 ); - encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf, $primary ? 0xDD : 0xD9 ); // FLD [ESP] - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0x83); // ADD ESP,4 - emit_opcode(cbuf,0xC4); - emit_d8(cbuf, $primary ? 0x8 : 0x4); - - // CALL directly to the runtime - cbuf.set_insts_mark(); - emit_opcode(cbuf,0xE8); // Call into runtime - emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); - - // Carry on here... - %} - - enc_class X2D_encoding( regD dst, regX src ) %{ - // Allocate a word - emit_opcode(cbuf,0x83); // SUB ESP,4 - emit_opcode(cbuf,0xEC); - emit_d8(cbuf,0x04); - - emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], xmm - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x11 ); - encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xD9 ); // FLD_S [ESP] - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0x83); // ADD ESP,4 - emit_opcode(cbuf,0xC4); - emit_d8(cbuf,0x04); - - // Carry on here... - %} - - enc_class AbsXF_encoding(regX dst) %{ - address signmask_address=(address)float_signmask_pool; - // andpd:\tANDPS $dst,[signconst] - emit_opcode(cbuf, 0x0F); - emit_opcode(cbuf, 0x54); - emit_rm(cbuf, 0x0, $dst$$reg, 0x5); - emit_d32(cbuf, (int)signmask_address); - %} - - enc_class AbsXD_encoding(regXD dst) %{ - address signmask_address=(address)double_signmask_pool; - // andpd:\tANDPD $dst,[signconst] - emit_opcode(cbuf, 0x66); - emit_opcode(cbuf, 0x0F); - emit_opcode(cbuf, 0x54); - emit_rm(cbuf, 0x0, $dst$$reg, 0x5); - emit_d32(cbuf, (int)signmask_address); - %} - - enc_class NegXF_encoding(regX dst) %{ - address signmask_address=(address)float_signflip_pool; - // andpd:\tXORPS $dst,[signconst] - emit_opcode(cbuf, 0x0F); - emit_opcode(cbuf, 0x57); - emit_rm(cbuf, 0x0, $dst$$reg, 0x5); - emit_d32(cbuf, (int)signmask_address); - %} - - enc_class NegXD_encoding(regXD dst) %{ - address signmask_address=(address)double_signflip_pool; - // andpd:\tXORPD $dst,[signconst] - emit_opcode(cbuf, 0x66); - emit_opcode(cbuf, 0x0F); - emit_opcode(cbuf, 0x57); - emit_rm(cbuf, 0x0, $dst$$reg, 0x5); - emit_d32(cbuf, (int)signmask_address); - %} - - enc_class FMul_ST_reg( eRegF src1 ) %{ + enc_class FMul_ST_reg( eRegFPR src1 ) %{ // Operand was loaded from memory into fp ST (stack top) // FMUL ST,$src /* D8 C8+i */ emit_opcode(cbuf, 0xD8); emit_opcode(cbuf, 0xC8 + $src1$$reg); %} - enc_class FAdd_ST_reg( eRegF src2 ) %{ + enc_class FAdd_ST_reg( eRegFPR src2 ) %{ // FADDP ST,src2 /* D8 C0+i */ emit_opcode(cbuf, 0xD8); emit_opcode(cbuf, 0xC0 + $src2$$reg); //could use FADDP src2,fpST /* DE C0+i */ %} - enc_class FAddP_reg_ST( eRegF src2 ) %{ + enc_class FAddP_reg_ST( eRegFPR src2 ) %{ // FADDP src2,ST /* DE C0+i */ emit_opcode(cbuf, 0xDE); emit_opcode(cbuf, 0xC0 + $src2$$reg); %} - enc_class subF_divF_encode( eRegF src1, eRegF src2) %{ + enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ // Operand has been loaded into fp ST (stack top) // FSUB ST,$src1 emit_opcode(cbuf, 0xD8); @@ -4140,7 +3681,7 @@ emit_opcode(cbuf, 0xF0 + $src2$$reg); %} - enc_class MulFAddF (eRegF src1, eRegF src2) %{ + enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ // Operand was loaded from memory into fp ST (stack top) // FADD ST,$src /* D8 C0+i */ emit_opcode(cbuf, 0xD8); @@ -4152,7 +3693,7 @@ %} - enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{ + enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ // Operand was loaded from memory into fp ST (stack top) // FADD ST,$src /* D8 C0+i */ emit_opcode(cbuf, 0xD8); @@ -4176,66 +3717,6 @@ store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); %} - enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{ - { // Atomic long load - // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem - emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12); - int base = $mem$$base; - int index = $mem$$index; - int scale = $mem$$scale; - int displace = $mem$$disp; - bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals - encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); - } - { // MOVSD $dst,$tmp ! atomic long store - emit_opcode(cbuf,0xF2); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,0x11); - int base = $dst$$base; - int index = $dst$$index; - int scale = $dst$$scale; - int displace = $dst$$disp; - bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals - encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); - } - %} - - enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{ - { // Atomic long load - // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem - emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12); - int base = $mem$$base; - int index = $mem$$index; - int scale = $mem$$scale; - int displace = $mem$$disp; - bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals - encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); - } - { // MOVD $dst.lo,$tmp - emit_opcode(cbuf,0x66); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,0x7E); - emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg); - } - { // PSRLQ $tmp,32 - emit_opcode(cbuf,0x66); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,0x73); - emit_rm(cbuf, 0x3, 0x02, $tmp$$reg); - emit_d8(cbuf, 0x20); - } - { // MOVD $dst.hi,$tmp - emit_opcode(cbuf,0x66); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,0x7E); - emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg)); - } - %} - // Volatile Store Long. Must be atomic, so move it into // the FP TOS and then do a 64-bit FIST. Has to probe the // target address before the store (for null-ptr checks) @@ -4253,66 +3734,6 @@ encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop); %} - enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{ - { // Atomic long load - // UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src] - emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12); - int base = $src$$base; - int index = $src$$index; - int scale = $src$$scale; - int displace = $src$$disp; - bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals - encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); - } - cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop - { // MOVSD $mem,$tmp ! atomic long store - emit_opcode(cbuf,0xF2); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,0x11); - int base = $mem$$base; - int index = $mem$$index; - int scale = $mem$$scale; - int displace = $mem$$disp; - bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals - encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); - } - %} - - enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{ - { // MOVD $tmp,$src.lo - emit_opcode(cbuf,0x66); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,0x6E); - emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); - } - { // MOVD $tmp2,$src.hi - emit_opcode(cbuf,0x66); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,0x6E); - emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg)); - } - { // PUNPCKLDQ $tmp,$tmp2 - emit_opcode(cbuf,0x66); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,0x62); - emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg); - } - cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop - { // MOVSD $mem,$tmp ! atomic long store - emit_opcode(cbuf,0xF2); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,0x11); - int base = $mem$$base; - int index = $mem$$index; - int scale = $mem$$scale; - int displace = $mem$$disp; - bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals - encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); - } - %} - // Safepoint Poll. This polls the safepoint page, and causes an // exception if it is not readable. Unfortunately, it kills the condition code // in the process @@ -4705,7 +4126,7 @@ %} //Double Immediate zero -operand immD0() %{ +operand immDPR0() %{ // Do additional (and counter-intuitive) test against NaN to work around VC++ // bug that generates code such that NaNs compare equal to 0.0 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); @@ -4717,7 +4138,7 @@ %} // Double Immediate one -operand immD1() %{ +operand immDPR1() %{ predicate( UseSSE<=1 && n->getd() == 1.0 ); match(ConD); @@ -4727,7 +4148,7 @@ %} // Double Immediate -operand immD() %{ +operand immDPR() %{ predicate(UseSSE<=1); match(ConD); @@ -4736,7 +4157,7 @@ interface(CONST_INTER); %} -operand immXD() %{ +operand immD() %{ predicate(UseSSE>=2); match(ConD); @@ -4746,7 +4167,7 @@ %} // Double Immediate zero -operand immXD0() %{ +operand immD0() %{ // Do additional (and counter-intuitive) test against NaN to work around VC++ // bug that generates code such that NaNs compare equal to 0.0 AND do not // compare equal to -0.0. @@ -4758,7 +4179,7 @@ %} // Float Immediate zero -operand immF0() %{ +operand immFPR0() %{ predicate(UseSSE == 0 && n->getf() == 0.0F); match(ConF); @@ -4768,7 +4189,7 @@ %} // Float Immediate one -operand immF1() %{ +operand immFPR1() %{ predicate(UseSSE == 0 && n->getf() == 1.0F); match(ConF); @@ -4778,17 +4199,17 @@ %} // Float Immediate +operand immFPR() %{ + predicate( UseSSE == 0 ); + match(ConF); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +// Float Immediate operand immF() %{ - predicate( UseSSE == 0 ); - match(ConF); - - op_cost(5); - format %{ %} - interface(CONST_INTER); -%} - -// Float Immediate -operand immXF() %{ predicate(UseSSE >= 1); match(ConF); @@ -4798,7 +4219,7 @@ %} // Float Immediate zero. Zero and not -0.0 -operand immXF0() %{ +operand immF0() %{ predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); match(ConF); @@ -5174,7 +4595,7 @@ %} // Float register operands -operand regD() %{ +operand regDPR() %{ predicate( UseSSE < 2 ); constraint(ALLOC_IN_RC(dbl_reg)); match(RegD); @@ -5184,7 +4605,7 @@ interface(REG_INTER); %} -operand regDPR1(regD reg) %{ +operand regDPR1(regDPR reg) %{ predicate( UseSSE < 2 ); constraint(ALLOC_IN_RC(dbl_reg0)); match(reg); @@ -5192,7 +4613,7 @@ interface(REG_INTER); %} -operand regDPR2(regD reg) %{ +operand regDPR2(regDPR reg) %{ predicate( UseSSE < 2 ); constraint(ALLOC_IN_RC(dbl_reg1)); match(reg); @@ -5200,7 +4621,7 @@ interface(REG_INTER); %} -operand regnotDPR1(regD reg) %{ +operand regnotDPR1(regDPR reg) %{ predicate( UseSSE < 2 ); constraint(ALLOC_IN_RC(dbl_notreg0)); match(reg); @@ -5209,18 +4630,18 @@ %} // XMM Double register operands -operand regXD() %{ +operand regD() %{ predicate( UseSSE>=2 ); constraint(ALLOC_IN_RC(xdb_reg)); match(RegD); - match(regXD6); - match(regXD7); + match(regD6); + match(regD7); format %{ %} interface(REG_INTER); %} // XMM6 double register operands -operand regXD6(regXD reg) %{ +operand regD6(regD reg) %{ predicate( UseSSE>=2 ); constraint(ALLOC_IN_RC(xdb_reg6)); match(reg); @@ -5229,7 +4650,7 @@ %} // XMM7 double register operands -operand regXD7(regXD reg) %{ +operand regD7(regD reg) %{ predicate( UseSSE>=2 ); constraint(ALLOC_IN_RC(xdb_reg7)); match(reg); @@ -5238,7 +4659,7 @@ %} // Float register operands -operand regF() %{ +operand regFPR() %{ predicate( UseSSE < 2 ); constraint(ALLOC_IN_RC(flt_reg)); match(RegF); @@ -5248,7 +4669,7 @@ %} // Float register operands -operand regFPR1(regF reg) %{ +operand regFPR1(regFPR reg) %{ predicate( UseSSE < 2 ); constraint(ALLOC_IN_RC(flt_reg0)); match(reg); @@ -5257,7 +4678,7 @@ %} // XMM register operands -operand regX() %{ +operand regF() %{ predicate( UseSSE>=1 ); constraint(ALLOC_IN_RC(xmm_reg)); match(RegF); @@ -6001,7 +5422,7 @@ %} // Conditional move double reg-reg -pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{ +pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ single_instruction; dst : S4(write); src : S3(read); @@ -6010,7 +5431,7 @@ %} // Float reg-reg operation -pipe_class fpu_reg(regD dst) %{ +pipe_class fpu_reg(regDPR dst) %{ instruction_count(2); dst : S3(read); DECODE : S0(2); // any 2 decoders @@ -6018,7 +5439,7 @@ %} // Float reg-reg operation -pipe_class fpu_reg_reg(regD dst, regD src) %{ +pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ instruction_count(2); dst : S4(write); src : S3(read); @@ -6027,7 +5448,7 @@ %} // Float reg-reg operation -pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{ +pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ instruction_count(3); dst : S4(write); src1 : S3(read); @@ -6037,7 +5458,7 @@ %} // Float reg-reg operation -pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ +pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ instruction_count(4); dst : S4(write); src1 : S3(read); @@ -6048,7 +5469,7 @@ %} // Float reg-reg operation -pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{ +pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ instruction_count(4); dst : S4(write); src1 : S3(read); @@ -6061,7 +5482,7 @@ %} // Float reg-mem operation -pipe_class fpu_reg_mem(regD dst, memory mem) %{ +pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ instruction_count(2); dst : S5(write); mem : S3(read); @@ -6072,7 +5493,7 @@ %} // Float reg-mem operation -pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{ +pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ instruction_count(3); dst : S5(write); src1 : S3(read); @@ -6084,7 +5505,7 @@ %} // Float mem-reg operation -pipe_class fpu_mem_reg(memory mem, regD src) %{ +pipe_class fpu_mem_reg(memory mem, regDPR src) %{ instruction_count(2); src : S5(read); mem : S3(read); @@ -6094,7 +5515,7 @@ MEM : S3; // any mem %} -pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{ +pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ instruction_count(3); src1 : S3(read); src2 : S3(read); @@ -6105,7 +5526,7 @@ MEM : S3; // any mem %} -pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{ +pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ instruction_count(3); src1 : S3(read); src2 : S3(read); @@ -6134,7 +5555,7 @@ MEM : S3(3); // any mem %} -pipe_class fpu_mem_reg_con(memory mem, regD src1) %{ +pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ instruction_count(3); src1 : S4(read); mem : S4(read); @@ -6145,7 +5566,7 @@ %} // Float load constant -pipe_class fpu_reg_con(regD dst) %{ +pipe_class fpu_reg_con(regDPR dst) %{ instruction_count(2); dst : S5(write); D0 : S0; // big decoder only for the load @@ -6155,7 +5576,7 @@ %} // Float load constant -pipe_class fpu_reg_reg_con(regD dst, regD src) %{ +pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ instruction_count(3); dst : S5(write); src : S3(read); @@ -6870,18 +6291,21 @@ ins_pipe( fpu_reg_mem ); %} -instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{ +instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); match(Set dst (LoadL mem)); effect(TEMP tmp); ins_cost(180); format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" "MOVSD $dst,$tmp" %} - ins_encode(enc_loadLX_volatile(mem, dst, tmp)); - ins_pipe( pipe_slow ); -%} - -instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{ + ins_encode %{ + __ movdbl($tmp$$XMMRegister, $mem$$Address); + __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); match(Set dst (LoadL mem)); effect(TEMP tmp); @@ -6890,7 +6314,12 @@ "MOVD $dst.lo,$tmp\n\t" "PSRLQ $tmp,32\n\t" "MOVD $dst.hi,$tmp" %} - ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp)); + ins_encode %{ + __ movdbl($tmp$$XMMRegister, $mem$$Address); + __ movdl($dst$$Register, $tmp$$XMMRegister); + __ psrlq($tmp$$XMMRegister, 32); + __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); + %} ins_pipe( pipe_slow ); %} @@ -6929,7 +6358,7 @@ %} // Load Double -instruct loadD(regD dst, memory mem) %{ +instruct loadDPR(regDPR dst, memory mem) %{ predicate(UseSSE<=1); match(Set dst (LoadD mem)); @@ -6938,42 +6367,48 @@ "FSTP $dst" %} opcode(0xDD); /* DD /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), - Pop_Reg_D(dst) ); + Pop_Reg_DPR(dst) ); ins_pipe( fpu_reg_mem ); %} // Load Double to XMM -instruct loadXD(regXD dst, memory mem) %{ +instruct loadD(regD dst, memory mem) %{ predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); match(Set dst (LoadD mem)); ins_cost(145); format %{ "MOVSD $dst,$mem" %} - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem)); - ins_pipe( pipe_slow ); -%} - -instruct loadXD_partial(regXD dst, memory mem) %{ + ins_encode %{ + __ movdbl ($dst$$XMMRegister, $mem$$Address); + %} + ins_pipe( pipe_slow ); +%} + +instruct loadD_partial(regD dst, memory mem) %{ predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); match(Set dst (LoadD mem)); ins_cost(145); format %{ "MOVLPD $dst,$mem" %} - ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem)); + ins_encode %{ + __ movdbl ($dst$$XMMRegister, $mem$$Address); + %} ins_pipe( pipe_slow ); %} // Load to XMM register (single-precision floating point) // MOVSS instruction -instruct loadX(regX dst, memory mem) %{ +instruct loadF(regF dst, memory mem) %{ predicate(UseSSE>=1); match(Set dst (LoadF mem)); ins_cost(145); format %{ "MOVSS $dst,$mem" %} - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem)); + ins_encode %{ + __ movflt ($dst$$XMMRegister, $mem$$Address); + %} ins_pipe( pipe_slow ); %} // Load Float -instruct loadF(regF dst, memory mem) %{ +instruct loadFPR(regFPR dst, memory mem) %{ predicate(UseSSE==0); match(Set dst (LoadF mem)); @@ -6982,57 +6417,67 @@ "FSTP $dst" %} opcode(0xD9); /* D9 /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), - Pop_Reg_F(dst) ); + Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_mem ); %} // Load Aligned Packed Byte to XMM register -instruct loadA8B(regXD dst, memory mem) %{ +instruct loadA8B(regD dst, memory mem) %{ predicate(UseSSE>=1); match(Set dst (Load8B mem)); ins_cost(125); format %{ "MOVQ $dst,$mem\t! packed8B" %} - ins_encode( movq_ld(dst, mem)); + ins_encode %{ + __ movq($dst$$XMMRegister, $mem$$Address); + %} ins_pipe( pipe_slow ); %} // Load Aligned Packed Short to XMM register -instruct loadA4S(regXD dst, memory mem) %{ +instruct loadA4S(regD dst, memory mem) %{ predicate(UseSSE>=1); match(Set dst (Load4S mem)); ins_cost(125); format %{ "MOVQ $dst,$mem\t! packed4S" %} - ins_encode( movq_ld(dst, mem)); + ins_encode %{ + __ movq($dst$$XMMRegister, $mem$$Address); + %} ins_pipe( pipe_slow ); %} // Load Aligned Packed Char to XMM register -instruct loadA4C(regXD dst, memory mem) %{ +instruct loadA4C(regD dst, memory mem) %{ predicate(UseSSE>=1); match(Set dst (Load4C mem)); ins_cost(125); format %{ "MOVQ $dst,$mem\t! packed4C" %} - ins_encode( movq_ld(dst, mem)); + ins_encode %{ + __ movq($dst$$XMMRegister, $mem$$Address); + %} ins_pipe( pipe_slow ); %} // Load Aligned Packed Integer to XMM register -instruct load2IU(regXD dst, memory mem) %{ +instruct load2IU(regD dst, memory mem) %{ predicate(UseSSE>=1); match(Set dst (Load2I mem)); ins_cost(125); format %{ "MOVQ $dst,$mem\t! packed2I" %} - ins_encode( movq_ld(dst, mem)); + ins_encode %{ + __ movq($dst$$XMMRegister, $mem$$Address); + %} ins_pipe( pipe_slow ); %} // Load Aligned Packed Single to XMM -instruct loadA2F(regXD dst, memory mem) %{ +instruct loadA2F(regD dst, memory mem) %{ predicate(UseSSE>=1); match(Set dst (Load2F mem)); ins_cost(145); format %{ "MOVQ $dst,$mem\t! packed2F" %} - ins_encode( movq_ld(dst, mem)); + ins_encode %{ + __ movq($dst$$XMMRegister, $mem$$Address); + %} ins_pipe( pipe_slow ); %} @@ -7139,8 +6584,8 @@ ins_pipe( ialu_reg_long ); %} -// The instruction usage is guarded by predicate in operand immF(). -instruct loadConF(regF dst, immF con) %{ +// The instruction usage is guarded by predicate in operand immFPR(). +instruct loadConFPR(regFPR dst, immFPR con) %{ match(Set dst con); ins_cost(125); format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" @@ -7152,8 +6597,8 @@ ins_pipe(fpu_reg_con); %} -// The instruction usage is guarded by predicate in operand immF0(). -instruct loadConF0(regF dst, immF0 con) %{ +// The instruction usage is guarded by predicate in operand immFPR0(). +instruct loadConFPR0(regFPR dst, immFPR0 con) %{ match(Set dst con); ins_cost(125); format %{ "FLDZ ST\n\t" @@ -7165,8 +6610,8 @@ ins_pipe(fpu_reg_con); %} -// The instruction usage is guarded by predicate in operand immF1(). -instruct loadConF1(regF dst, immF1 con) %{ +// The instruction usage is guarded by predicate in operand immFPR1(). +instruct loadConFPR1(regFPR dst, immFPR1 con) %{ match(Set dst con); ins_cost(125); format %{ "FLD1 ST\n\t" @@ -7178,8 +6623,8 @@ ins_pipe(fpu_reg_con); %} -// The instruction usage is guarded by predicate in operand immXF(). -instruct loadConX(regX dst, immXF con) %{ +// The instruction usage is guarded by predicate in operand immF(). +instruct loadConF(regF dst, immF con) %{ match(Set dst con); ins_cost(125); format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} @@ -7189,8 +6634,8 @@ ins_pipe(pipe_slow); %} -// The instruction usage is guarded by predicate in operand immXF0(). -instruct loadConX0(regX dst, immXF0 src) %{ +// The instruction usage is guarded by predicate in operand immF0(). +instruct loadConF0(regF dst, immF0 src) %{ match(Set dst src); ins_cost(100); format %{ "XORPS $dst,$dst\t# float 0.0" %} @@ -7200,8 +6645,8 @@ ins_pipe(pipe_slow); %} -// The instruction usage is guarded by predicate in operand immD(). -instruct loadConD(regD dst, immD con) %{ +// The instruction usage is guarded by predicate in operand immDPR(). +instruct loadConDPR(regDPR dst, immDPR con) %{ match(Set dst con); ins_cost(125); @@ -7214,8 +6659,8 @@ ins_pipe(fpu_reg_con); %} -// The instruction usage is guarded by predicate in operand immD0(). -instruct loadConD0(regD dst, immD0 con) %{ +// The instruction usage is guarded by predicate in operand immDPR0(). +instruct loadConDPR0(regDPR dst, immDPR0 con) %{ match(Set dst con); ins_cost(125); @@ -7228,8 +6673,8 @@ ins_pipe(fpu_reg_con); %} -// The instruction usage is guarded by predicate in operand immD1(). -instruct loadConD1(regD dst, immD1 con) %{ +// The instruction usage is guarded by predicate in operand immDPR1(). +instruct loadConDPR1(regDPR dst, immDPR1 con) %{ match(Set dst con); ins_cost(125); @@ -7242,8 +6687,8 @@ ins_pipe(fpu_reg_con); %} -// The instruction usage is guarded by predicate in operand immXD(). -instruct loadConXD(regXD dst, immXD con) %{ +// The instruction usage is guarded by predicate in operand immD(). +instruct loadConD(regD dst, immD con) %{ match(Set dst con); ins_cost(125); format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} @@ -7253,12 +6698,14 @@ ins_pipe(pipe_slow); %} -// The instruction usage is guarded by predicate in operand immXD0(). -instruct loadConXD0(regXD dst, immXD0 src) %{ +// The instruction usage is guarded by predicate in operand immD0(). +instruct loadConD0(regD dst, immD0 src) %{ match(Set dst src); ins_cost(100); format %{ "XORPD $dst,$dst\t# double 0.0" %} - ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst)); + ins_encode %{ + __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); + %} ins_pipe( pipe_slow ); %} @@ -7296,7 +6743,7 @@ %} // Load Stack Slot -instruct loadSSF(regF dst, stackSlotF src) %{ +instruct loadSSF(regFPR dst, stackSlotF src) %{ match(Set dst src); ins_cost(125); @@ -7304,12 +6751,12 @@ "FSTP $dst" %} opcode(0xD9); /* D9 /0, FLD m32real */ ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), - Pop_Reg_F(dst) ); + Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_mem ); %} // Load Stack Slot -instruct loadSSD(regD dst, stackSlotD src) %{ +instruct loadSSD(regDPR dst, stackSlotD src) %{ match(Set dst src); ins_cost(125); @@ -7317,7 +6764,7 @@ "FSTP $dst" %} opcode(0xDD); /* DD /0, FLD m64real */ ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), - Pop_Reg_D(dst) ); + Pop_Reg_DPR(dst) ); ins_pipe( fpu_reg_mem ); %} @@ -7552,7 +6999,7 @@ ins_pipe( fpu_reg_mem ); %} -instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{ +instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); match(Set mem (StoreL mem src)); effect( TEMP tmp, KILL cr ); @@ -7560,12 +7007,15 @@ format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" "MOVSD $tmp,$src\n\t" "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} - opcode(0x3B); - ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp)); - ins_pipe( pipe_slow ); -%} - -instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{ + ins_encode %{ + __ cmpl(rax, $mem$$Address); + __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); + __ movdbl($mem$$Address, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); match(Set mem (StoreL mem src)); effect( TEMP tmp2 , TEMP tmp, KILL cr ); @@ -7575,8 +7025,13 @@ "MOVD $tmp2,$src.hi\n\t" "PUNPCKLDQ $tmp,$tmp2\n\t" "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} - opcode(0x3B); - ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2)); + ins_encode %{ + __ cmpl(rax, $mem$$Address); + __ movdl($tmp$$XMMRegister, $src$$Register); + __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); + __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); + __ movdbl($mem$$Address, $tmp$$XMMRegister); + %} ins_pipe( pipe_slow ); %} @@ -7638,32 +7093,38 @@ %} // Store Aligned Packed Byte XMM register to memory -instruct storeA8B(memory mem, regXD src) %{ +instruct storeA8B(memory mem, regD src) %{ predicate(UseSSE>=1); match(Set mem (Store8B mem src)); ins_cost(145); format %{ "MOVQ $mem,$src\t! packed8B" %} - ins_encode( movq_st(mem, src)); + ins_encode %{ + __ movq($mem$$Address, $src$$XMMRegister); + %} ins_pipe( pipe_slow ); %} // Store Aligned Packed Char/Short XMM register to memory -instruct storeA4C(memory mem, regXD src) %{ +instruct storeA4C(memory mem, regD src) %{ predicate(UseSSE>=1); match(Set mem (Store4C mem src)); ins_cost(145); format %{ "MOVQ $mem,$src\t! packed4C" %} - ins_encode( movq_st(mem, src)); + ins_encode %{ + __ movq($mem$$Address, $src$$XMMRegister); + %} ins_pipe( pipe_slow ); %} // Store Aligned Packed Integer XMM register to memory -instruct storeA2I(memory mem, regXD src) %{ +instruct storeA2I(memory mem, regD src) %{ predicate(UseSSE>=1); match(Set mem (Store2I mem src)); ins_cost(145); format %{ "MOVQ $mem,$src\t! packed2I" %} - ins_encode( movq_st(mem, src)); + ins_encode %{ + __ movq($mem$$Address, $src$$XMMRegister); + %} ins_pipe( pipe_slow ); %} @@ -7679,98 +7140,116 @@ %} // Store Double -instruct storeD( memory mem, regDPR1 src) %{ +instruct storeDPR( memory mem, regDPR1 src) %{ predicate(UseSSE<=1); match(Set mem (StoreD mem src)); ins_cost(100); format %{ "FST_D $mem,$src" %} opcode(0xDD); /* DD /2 */ - ins_encode( enc_FP_store(mem,src) ); + ins_encode( enc_FPR_store(mem,src) ); ins_pipe( fpu_mem_reg ); %} // Store double does rounding on x86 -instruct storeD_rounded( memory mem, regDPR1 src) %{ +instruct storeDPR_rounded( memory mem, regDPR1 src) %{ predicate(UseSSE<=1); match(Set mem (StoreD mem (RoundDouble src))); ins_cost(100); format %{ "FST_D $mem,$src\t# round" %} opcode(0xDD); /* DD /2 */ - ins_encode( enc_FP_store(mem,src) ); + ins_encode( enc_FPR_store(mem,src) ); ins_pipe( fpu_mem_reg ); %} // Store XMM register to memory (double-precision floating points) // MOVSD instruction -instruct storeXD(memory mem, regXD src) %{ +instruct storeD(memory mem, regD src) %{ predicate(UseSSE>=2); match(Set mem (StoreD mem src)); ins_cost(95); format %{ "MOVSD $mem,$src" %} - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem)); + ins_encode %{ + __ movdbl($mem$$Address, $src$$XMMRegister); + %} ins_pipe( pipe_slow ); %} // Store XMM register to memory (single-precision floating point) // MOVSS instruction -instruct storeX(memory mem, regX src) %{ +instruct storeF(memory mem, regF src) %{ predicate(UseSSE>=1); match(Set mem (StoreF mem src)); ins_cost(95); format %{ "MOVSS $mem,$src" %} - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem)); + ins_encode %{ + __ movflt($mem$$Address, $src$$XMMRegister); + %} ins_pipe( pipe_slow ); %} // Store Aligned Packed Single Float XMM register to memory -instruct storeA2F(memory mem, regXD src) %{ +instruct storeA2F(memory mem, regD src) %{ predicate(UseSSE>=1); match(Set mem (Store2F mem src)); ins_cost(145); format %{ "MOVQ $mem,$src\t! packed2F" %} - ins_encode( movq_st(mem, src)); + ins_encode %{ + __ movq($mem$$Address, $src$$XMMRegister); + %} ins_pipe( pipe_slow ); %} // Store Float -instruct storeF( memory mem, regFPR1 src) %{ +instruct storeFPR( memory mem, regFPR1 src) %{ predicate(UseSSE==0); match(Set mem (StoreF mem src)); ins_cost(100); format %{ "FST_S $mem,$src" %} opcode(0xD9); /* D9 /2 */ - ins_encode( enc_FP_store(mem,src) ); + ins_encode( enc_FPR_store(mem,src) ); ins_pipe( fpu_mem_reg ); %} // Store Float does rounding on x86 -instruct storeF_rounded( memory mem, regFPR1 src) %{ +instruct storeFPR_rounded( memory mem, regFPR1 src) %{ predicate(UseSSE==0); match(Set mem (StoreF mem (RoundFloat src))); ins_cost(100); format %{ "FST_S $mem,$src\t# round" %} opcode(0xD9); /* D9 /2 */ - ins_encode( enc_FP_store(mem,src) ); + ins_encode( enc_FPR_store(mem,src) ); ins_pipe( fpu_mem_reg ); %} // Store Float does rounding on x86 -instruct storeF_Drounded( memory mem, regDPR1 src) %{ +instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ predicate(UseSSE<=1); match(Set mem (StoreF mem (ConvD2F src))); ins_cost(100); format %{ "FST_S $mem,$src\t# D-round" %} opcode(0xD9); /* D9 /2 */ - ins_encode( enc_FP_store(mem,src) ); + ins_encode( enc_FPR_store(mem,src) ); ins_pipe( fpu_mem_reg ); %} // Store immediate Float value (it is faster than store from FPU register) +// The instruction usage is guarded by predicate in operand immFPR(). +instruct storeFPR_imm( memory mem, immFPR src) %{ + match(Set mem (StoreF mem src)); + + ins_cost(50); + format %{ "MOV $mem,$src\t# store float" %} + opcode(0xC7); /* C7 /0 */ + ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); + ins_pipe( ialu_mem_imm ); +%} + +// Store immediate Float value (it is faster than store from XMM register) // The instruction usage is guarded by predicate in operand immF(). instruct storeF_imm( memory mem, immF src) %{ match(Set mem (StoreF mem src)); @@ -7782,18 +7261,6 @@ ins_pipe( ialu_mem_imm ); %} -// Store immediate Float value (it is faster than store from XMM register) -// The instruction usage is guarded by predicate in operand immXF(). -instruct storeX_imm( memory mem, immXF src) %{ - match(Set mem (StoreF mem src)); - - ins_cost(50); - format %{ "MOV $mem,$src\t# store float" %} - opcode(0xC7); /* C7 /0 */ - ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32XF_as_bits( src )); - ins_pipe( ialu_mem_imm ); -%} - // Store Integer to stack slot instruct storeSSI(stackSlotI dst, eRegI src) %{ match(Set dst src); @@ -7901,6 +7368,16 @@ ins_pipe(empty); %} +instruct membar_storestore() %{ + match(MemBarStoreStore); + ins_cost(0); + + size(0); + format %{ "MEMBAR-storestore (empty encoding)" %} + ins_encode( ); + ins_pipe(empty); +%} + //----------Move Instructions-------------------------------------------------- instruct castX2P(eAXRegP dst, eAXRegI src) %{ match(Set dst (CastX2P src)); @@ -8088,29 +7565,29 @@ //%} // Conditional move -instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{ +instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ predicate(UseSSE<=1); match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "FCMOV$cop $dst,$src\t# double" %} opcode(0xDA); - ins_encode( enc_cmov_d(cop,src) ); - ins_pipe( pipe_cmovD_reg ); + ins_encode( enc_cmov_dpr(cop,src) ); + ins_pipe( pipe_cmovDPR_reg ); %} // Conditional move -instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{ +instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ predicate(UseSSE==0); match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "FCMOV$cop $dst,$src\t# float" %} opcode(0xDA); - ins_encode( enc_cmov_d(cop,src) ); - ins_pipe( pipe_cmovD_reg ); + ins_encode( enc_cmov_dpr(cop,src) ); + ins_pipe( pipe_cmovDPR_reg ); %} // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. -instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ +instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ predicate(UseSSE<=1); match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); ins_cost(200); @@ -8118,12 +7595,12 @@ "MOV $dst,$src\t# double\n" "skip:" %} opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ - ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) ); - ins_pipe( pipe_cmovD_reg ); + ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); + ins_pipe( pipe_cmovDPR_reg ); %} // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. -instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ +instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ predicate(UseSSE==0); match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); ins_cost(200); @@ -8131,12 +7608,12 @@ "MOV $dst,$src\t# float\n" "skip:" %} opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ - ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) ); - ins_pipe( pipe_cmovD_reg ); + ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); + ins_pipe( pipe_cmovDPR_reg ); %} // No CMOVE with SSE/SSE2 -instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{ +instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ predicate (UseSSE>=1); match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); ins_cost(200); @@ -8154,7 +7631,7 @@ %} // No CMOVE with SSE/SSE2 -instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{ +instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ predicate (UseSSE>=2); match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); ins_cost(200); @@ -8172,7 +7649,7 @@ %} // unsigned version -instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{ +instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ predicate (UseSSE>=1); match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); ins_cost(200); @@ -8189,17 +7666,17 @@ ins_pipe( pipe_slow ); %} -instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{ +instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ predicate (UseSSE>=1); match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); ins_cost(200); expand %{ - fcmovX_regU(cop, cr, dst, src); + fcmovF_regU(cop, cr, dst, src); %} %} // unsigned version -instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{ +instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ predicate (UseSSE>=2); match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); ins_cost(200); @@ -8216,12 +7693,12 @@ ins_pipe( pipe_slow ); %} -instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{ +instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ predicate (UseSSE>=2); match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); ins_cost(200); expand %{ - fcmovXD_regU(cop, cr, dst, src); + fcmovD_regU(cop, cr, dst, src); %} %} @@ -8440,7 +7917,7 @@ %} // LoadLong-locked - same as a volatile long load when used with compare-swap -instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{ +instruct loadLLocked(stackSlotL dst, memory mem) %{ predicate(UseSSE<=1); match(Set dst (LoadLLocked mem)); @@ -8451,18 +7928,21 @@ ins_pipe( fpu_reg_mem ); %} -instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{ +instruct loadLX_Locked(stackSlotL dst, memory mem, regD tmp) %{ predicate(UseSSE>=2); match(Set dst (LoadLLocked mem)); effect(TEMP tmp); ins_cost(180); format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" "MOVSD $dst,$tmp" %} - ins_encode(enc_loadLX_volatile(mem, dst, tmp)); - ins_pipe( pipe_slow ); -%} - -instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{ + ins_encode %{ + __ movdbl($tmp$$XMMRegister, $mem$$Address); + __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct loadLX_reg_Locked(eRegL dst, memory mem, regD tmp) %{ predicate(UseSSE>=2); match(Set dst (LoadLLocked mem)); effect(TEMP tmp); @@ -8471,7 +7951,12 @@ "MOVD $dst.lo,$tmp\n\t" "PSRLQ $tmp,32\n\t" "MOVD $dst.hi,$tmp" %} - ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp)); + ins_encode %{ + __ movdbl($tmp$$XMMRegister, $mem$$Address); + __ movdl($dst$$Register, $tmp$$XMMRegister); + __ psrlq($tmp$$XMMRegister, 32); + __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); + %} ins_pipe( pipe_slow ); %} @@ -10054,7 +9539,7 @@ // Compare & branch // P6 version of float compare, sets condition codes in EFLAGS -instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{ +instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ predicate(VM_Version::supports_cmov() && UseSSE <=1); match(Set cr (CmpD src1 src2)); effect(KILL rax); @@ -10066,26 +9551,26 @@ "SAHF\n" "exit:\tNOP // avoid branch to branch" %} opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ - ins_encode( Push_Reg_D(src1), + ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2), cmpF_P6_fixup ); ins_pipe( pipe_slow ); %} -instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{ +instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ predicate(VM_Version::supports_cmov() && UseSSE <=1); match(Set cr (CmpD src1 src2)); ins_cost(150); format %{ "FLD $src1\n\t" "FUCOMIP ST,$src2 // P6 instruction" %} opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ - ins_encode( Push_Reg_D(src1), + ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2)); ins_pipe( pipe_slow ); %} // Compare & branch -instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{ +instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ predicate(UseSSE<=1); match(Set cr (CmpD src1 src2)); effect(KILL rax); @@ -10098,138 +9583,140 @@ "MOV AH,1\t# unordered treat as LT\n" "flags:\tSAHF" %} opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ - ins_encode( Push_Reg_D(src1), + ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2), fpu_flags); ins_pipe( pipe_slow ); %} // Compare vs zero into -1,0,1 -instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{ +instruct cmpDPR_0(eRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE<=1); match(Set dst (CmpD3 src1 zero)); effect(KILL cr, KILL rax); ins_cost(280); format %{ "FTSTD $dst,$src1" %} opcode(0xE4, 0xD9); - ins_encode( Push_Reg_D(src1), + ins_encode( Push_Reg_DPR(src1), OpcS, OpcP, PopFPU, CmpF_Result(dst)); ins_pipe( pipe_slow ); %} // Compare into -1,0,1 -instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{ +instruct cmpDPR_reg(eRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE<=1); match(Set dst (CmpD3 src1 src2)); effect(KILL cr, KILL rax); ins_cost(300); format %{ "FCMPD $dst,$src1,$src2" %} opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ - ins_encode( Push_Reg_D(src1), + ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2), CmpF_Result(dst)); ins_pipe( pipe_slow ); %} // float compare and set condition codes in EFLAGS by XMM regs -instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{ +instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ predicate(UseSSE>=2); - match(Set cr (CmpD dst src)); - effect(KILL rax); - ins_cost(125); - format %{ "COMISD $dst,$src\n" - "\tJNP exit\n" - "\tMOV ah,1 // saw a NaN, set CF\n" - "\tSAHF\n" - "exit:\tNOP // avoid branch to branch" %} - opcode(0x66, 0x0F, 0x2F); - ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup); - ins_pipe( pipe_slow ); -%} - -instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{ + match(Set cr (CmpD src1 src2)); + ins_cost(145); + format %{ "UCOMISD $src1,$src2\n\t" + "JNP,s exit\n\t" + "PUSHF\t# saw NaN, set CF\n\t" + "AND [rsp], #0xffffff2b\n\t" + "POPF\n" + "exit:" %} + ins_encode %{ + __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); + emit_cmpfp_fixup(_masm); + %} + ins_pipe( pipe_slow ); +%} + +instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ predicate(UseSSE>=2); - match(Set cr (CmpD dst src)); + match(Set cr (CmpD src1 src2)); ins_cost(100); - format %{ "COMISD $dst,$src" %} - opcode(0x66, 0x0F, 0x2F); - ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); + format %{ "UCOMISD $src1,$src2" %} + ins_encode %{ + __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); + %} ins_pipe( pipe_slow ); %} // float compare and set condition codes in EFLAGS by XMM regs -instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{ +instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ predicate(UseSSE>=2); - match(Set cr (CmpD dst (LoadD src))); - effect(KILL rax); + match(Set cr (CmpD src1 (LoadD src2))); ins_cost(145); - format %{ "COMISD $dst,$src\n" - "\tJNP exit\n" - "\tMOV ah,1 // saw a NaN, set CF\n" - "\tSAHF\n" - "exit:\tNOP // avoid branch to branch" %} - opcode(0x66, 0x0F, 0x2F); - ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup); - ins_pipe( pipe_slow ); -%} - -instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{ + format %{ "UCOMISD $src1,$src2\n\t" + "JNP,s exit\n\t" + "PUSHF\t# saw NaN, set CF\n\t" + "AND [rsp], #0xffffff2b\n\t" + "POPF\n" + "exit:" %} + ins_encode %{ + __ ucomisd($src1$$XMMRegister, $src2$$Address); + emit_cmpfp_fixup(_masm); + %} + ins_pipe( pipe_slow ); +%} + +instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ predicate(UseSSE>=2); - match(Set cr (CmpD dst (LoadD src))); + match(Set cr (CmpD src1 (LoadD src2))); ins_cost(100); - format %{ "COMISD $dst,$src" %} - opcode(0x66, 0x0F, 0x2F); - ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src)); + format %{ "UCOMISD $src1,$src2" %} + ins_encode %{ + __ ucomisd($src1$$XMMRegister, $src2$$Address); + %} ins_pipe( pipe_slow ); %} // Compare into -1,0,1 in XMM -instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{ +instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ predicate(UseSSE>=2); match(Set dst (CmpD3 src1 src2)); effect(KILL cr); ins_cost(255); - format %{ "XOR $dst,$dst\n" - "\tCOMISD $src1,$src2\n" - "\tJP,s nan\n" - "\tJEQ,s exit\n" - "\tJA,s inc\n" - "nan:\tDEC $dst\n" - "\tJMP,s exit\n" - "inc:\tINC $dst\n" - "exit:" - %} - opcode(0x66, 0x0F, 0x2F); - ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2), - CmpX_Result(dst)); + format %{ "UCOMISD $src1, $src2\n\t" + "MOV $dst, #-1\n\t" + "JP,s done\n\t" + "JB,s done\n\t" + "SETNE $dst\n\t" + "MOVZB $dst, $dst\n" + "done:" %} + ins_encode %{ + __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); + emit_cmpfp3(_masm, $dst$$Register); + %} ins_pipe( pipe_slow ); %} // Compare into -1,0,1 in XMM and memory -instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{ +instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ predicate(UseSSE>=2); - match(Set dst (CmpD3 src1 (LoadD mem))); + match(Set dst (CmpD3 src1 (LoadD src2))); effect(KILL cr); ins_cost(275); - format %{ "COMISD $src1,$mem\n" - "\tMOV $dst,0\t\t# do not blow flags\n" - "\tJP,s nan\n" - "\tJEQ,s exit\n" - "\tJA,s inc\n" - "nan:\tDEC $dst\n" - "\tJMP,s exit\n" - "inc:\tINC $dst\n" - "exit:" - %} - opcode(0x66, 0x0F, 0x2F); - ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem), - LdImmI(dst,0x0), CmpX_Result(dst)); - ins_pipe( pipe_slow ); -%} - - -instruct subD_reg(regD dst, regD src) %{ + format %{ "UCOMISD $src1, $src2\n\t" + "MOV $dst, #-1\n\t" + "JP,s done\n\t" + "JB,s done\n\t" + "SETNE $dst\n\t" + "MOVZB $dst, $dst\n" + "done:" %} + ins_encode %{ + __ ucomisd($src1$$XMMRegister, $src2$$Address); + emit_cmpfp3(_masm, $dst$$Register); + %} + ins_pipe( pipe_slow ); +%} + + +instruct subDPR_reg(regDPR dst, regDPR src) %{ predicate (UseSSE <=1); match(Set dst (SubD dst src)); @@ -10237,12 +9724,12 @@ "DSUBp $dst,ST" %} opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ ins_cost(150); - ins_encode( Push_Reg_D(src), + ins_encode( Push_Reg_DPR(src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_reg ); %} -instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{ +instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ predicate (UseSSE <=1); match(Set dst (RoundDouble (SubD src1 src2))); ins_cost(250); @@ -10251,13 +9738,13 @@ "DSUB ST,$src1\n\t" "FSTP_D $dst\t# D-round" %} opcode(0xD8, 0x5); - ins_encode( Push_Reg_D(src2), - OpcP, RegOpc(src1), Pop_Mem_D(dst) ); + ins_encode( Push_Reg_DPR(src2), + OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); ins_pipe( fpu_mem_reg_reg ); %} -instruct subD_reg_mem(regD dst, memory src) %{ +instruct subDPR_reg_mem(regDPR dst, memory src) %{ predicate (UseSSE <=1); match(Set dst (SubD dst (LoadD src))); ins_cost(150); @@ -10270,7 +9757,7 @@ ins_pipe( fpu_reg_mem ); %} -instruct absD_reg(regDPR1 dst, regDPR1 src) %{ +instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ predicate (UseSSE<=1); match(Set dst (AbsD src)); ins_cost(100); @@ -10280,15 +9767,7 @@ ins_pipe( fpu_reg_reg ); %} -instruct absXD_reg( regXD dst ) %{ - predicate(UseSSE>=2); - match(Set dst (AbsD dst)); - format %{ "ANDPD $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %} - ins_encode( AbsXD_encoding(dst)); - ins_pipe( pipe_slow ); -%} - -instruct negD_reg(regDPR1 dst, regDPR1 src) %{ +instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ predicate(UseSSE<=1); match(Set dst (NegD src)); ins_cost(100); @@ -10298,18 +9777,7 @@ ins_pipe( fpu_reg_reg ); %} -instruct negXD_reg( regXD dst ) %{ - predicate(UseSSE>=2); - match(Set dst (NegD dst)); - format %{ "XORPD $dst,[0x8000000000000000]\t# CHS D by sign flipping" %} - ins_encode %{ - __ xorpd($dst$$XMMRegister, - ExternalAddress((address)double_signflip_pool)); - %} - ins_pipe( pipe_slow ); -%} - -instruct addD_reg(regD dst, regD src) %{ +instruct addDPR_reg(regDPR dst, regDPR src) %{ predicate(UseSSE<=1); match(Set dst (AddD dst src)); format %{ "FLD $src\n\t" @@ -10317,13 +9785,13 @@ size(4); ins_cost(150); opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ - ins_encode( Push_Reg_D(src), + ins_encode( Push_Reg_DPR(src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_reg ); %} -instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{ +instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ predicate(UseSSE<=1); match(Set dst (RoundDouble (AddD src1 src2))); ins_cost(250); @@ -10332,13 +9800,13 @@ "DADD ST,$src1\n\t" "FSTP_D $dst\t# D-round" %} opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ - ins_encode( Push_Reg_D(src2), - OpcP, RegOpc(src1), Pop_Mem_D(dst) ); + ins_encode( Push_Reg_DPR(src2), + OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); ins_pipe( fpu_mem_reg_reg ); %} -instruct addD_reg_mem(regD dst, memory src) %{ +instruct addDPR_reg_mem(regDPR dst, memory src) %{ predicate(UseSSE<=1); match(Set dst (AddD dst (LoadD src))); ins_cost(150); @@ -10352,7 +9820,7 @@ %} // add-to-memory -instruct addD_mem_reg(memory dst, regD src) %{ +instruct addDPR_mem_reg(memory dst, regDPR src) %{ predicate(UseSSE<=1); match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); ins_cost(150); @@ -10368,7 +9836,7 @@ ins_pipe( fpu_reg_mem ); %} -instruct addD_reg_imm1(regD dst, immD1 con) %{ +instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ predicate(UseSSE<=1); match(Set dst (AddD dst con)); ins_cost(125); @@ -10381,7 +9849,7 @@ ins_pipe(fpu_reg); %} -instruct addD_reg_imm(regD dst, immD con) %{ +instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); match(Set dst (AddD dst con)); ins_cost(200); @@ -10394,7 +9862,7 @@ ins_pipe(fpu_reg_mem); %} -instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{ +instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); match(Set dst (RoundDouble (AddD src con))); ins_cost(200); @@ -10409,124 +9877,14 @@ ins_pipe(fpu_mem_reg_con); %} -// Add two double precision floating point values in xmm -instruct addXD_reg(regXD dst, regXD src) %{ - predicate(UseSSE>=2); - match(Set dst (AddD dst src)); - format %{ "ADDSD $dst,$src" %} - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src)); - ins_pipe( pipe_slow ); -%} - -instruct addXD_imm(regXD dst, immXD con) %{ - predicate(UseSSE>=2); - match(Set dst (AddD dst con)); - format %{ "ADDSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} - ins_encode %{ - __ addsd($dst$$XMMRegister, $constantaddress($con)); - %} - ins_pipe(pipe_slow); -%} - -instruct addXD_mem(regXD dst, memory mem) %{ - predicate(UseSSE>=2); - match(Set dst (AddD dst (LoadD mem))); - format %{ "ADDSD $dst,$mem" %} - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem)); - ins_pipe( pipe_slow ); -%} - -// Sub two double precision floating point values in xmm -instruct subXD_reg(regXD dst, regXD src) %{ - predicate(UseSSE>=2); - match(Set dst (SubD dst src)); - format %{ "SUBSD $dst,$src" %} - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src)); - ins_pipe( pipe_slow ); -%} - -instruct subXD_imm(regXD dst, immXD con) %{ - predicate(UseSSE>=2); - match(Set dst (SubD dst con)); - format %{ "SUBSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} - ins_encode %{ - __ subsd($dst$$XMMRegister, $constantaddress($con)); - %} - ins_pipe(pipe_slow); -%} - -instruct subXD_mem(regXD dst, memory mem) %{ - predicate(UseSSE>=2); - match(Set dst (SubD dst (LoadD mem))); - format %{ "SUBSD $dst,$mem" %} - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem)); - ins_pipe( pipe_slow ); -%} - -// Mul two double precision floating point values in xmm -instruct mulXD_reg(regXD dst, regXD src) %{ - predicate(UseSSE>=2); - match(Set dst (MulD dst src)); - format %{ "MULSD $dst,$src" %} - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src)); - ins_pipe( pipe_slow ); -%} - -instruct mulXD_imm(regXD dst, immXD con) %{ - predicate(UseSSE>=2); - match(Set dst (MulD dst con)); - format %{ "MULSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} - ins_encode %{ - __ mulsd($dst$$XMMRegister, $constantaddress($con)); - %} - ins_pipe(pipe_slow); -%} - -instruct mulXD_mem(regXD dst, memory mem) %{ - predicate(UseSSE>=2); - match(Set dst (MulD dst (LoadD mem))); - format %{ "MULSD $dst,$mem" %} - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem)); - ins_pipe( pipe_slow ); -%} - -// Div two double precision floating point values in xmm -instruct divXD_reg(regXD dst, regXD src) %{ - predicate(UseSSE>=2); - match(Set dst (DivD dst src)); - format %{ "DIVSD $dst,$src" %} - opcode(0xF2, 0x0F, 0x5E); - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src)); - ins_pipe( pipe_slow ); -%} - -instruct divXD_imm(regXD dst, immXD con) %{ - predicate(UseSSE>=2); - match(Set dst (DivD dst con)); - format %{ "DIVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} - ins_encode %{ - __ divsd($dst$$XMMRegister, $constantaddress($con)); - %} - ins_pipe(pipe_slow); -%} - -instruct divXD_mem(regXD dst, memory mem) %{ - predicate(UseSSE>=2); - match(Set dst (DivD dst (LoadD mem))); - format %{ "DIVSD $dst,$mem" %} - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem)); - ins_pipe( pipe_slow ); -%} - - -instruct mulD_reg(regD dst, regD src) %{ +instruct mulDPR_reg(regDPR dst, regDPR src) %{ predicate(UseSSE<=1); match(Set dst (MulD dst src)); format %{ "FLD $src\n\t" "DMULp $dst,ST" %} opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ ins_cost(150); - ins_encode( Push_Reg_D(src), + ins_encode( Push_Reg_DPR(src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_reg ); %} @@ -10539,7 +9897,7 @@ // multiply scaled arg1 by arg2 // rescale product by 2^(15360) // -instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{ +instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); match(Set dst (MulD dst src)); ins_cost(1); // Select this instruction for all strict FP double multiplies @@ -10552,13 +9910,13 @@ "DMULp $dst,ST\n\t" %} opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ ins_encode( strictfp_bias1(dst), - Push_Reg_D(src), + Push_Reg_DPR(src), OpcP, RegOpc(dst), strictfp_bias2(dst) ); ins_pipe( fpu_reg_reg ); %} -instruct mulD_reg_imm(regD dst, immD con) %{ +instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); match(Set dst (MulD dst con)); ins_cost(200); @@ -10572,7 +9930,7 @@ %} -instruct mulD_reg_mem(regD dst, memory src) %{ +instruct mulDPR_reg_mem(regDPR dst, memory src) %{ predicate( UseSSE<=1 ); match(Set dst (MulD dst (LoadD src))); ins_cost(200); @@ -10586,7 +9944,7 @@ // // Cisc-alternate to reg-reg multiply -instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{ +instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ predicate( UseSSE<=1 ); match(Set dst (MulD src (LoadD mem))); ins_cost(250); @@ -10595,17 +9953,17 @@ "FSTP_D $dst" %} opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), - OpcReg_F(src), - Pop_Reg_D(dst) ); + OpcReg_FPR(src), + Pop_Reg_DPR(dst) ); ins_pipe( fpu_reg_reg_mem ); %} -// MACRO3 -- addD a mulD +// MACRO3 -- addDPR a mulDPR // This instruction is a '2-address' instruction in that the result goes // back to src2. This eliminates a move from the macro; possibly the // register allocator will have to add it back (and maybe not). -instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{ +instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ predicate( UseSSE<=1 ); match(Set src2 (AddD (MulD src0 src1) src2)); format %{ "FLD $src0\t# ===MACRO3d===\n\t" @@ -10613,29 +9971,29 @@ "DADDp $src2,ST" %} ins_cost(250); opcode(0xDD); /* LoadD DD /0 */ - ins_encode( Push_Reg_F(src0), + ins_encode( Push_Reg_FPR(src0), FMul_ST_reg(src1), FAddP_reg_ST(src2) ); ins_pipe( fpu_reg_reg_reg ); %} -// MACRO3 -- subD a mulD -instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{ +// MACRO3 -- subDPR a mulDPR +instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ predicate( UseSSE<=1 ); match(Set src2 (SubD (MulD src0 src1) src2)); format %{ "FLD $src0\t# ===MACRO3d===\n\t" "DMUL ST,$src1\n\t" "DSUBRp $src2,ST" %} ins_cost(250); - ins_encode( Push_Reg_F(src0), + ins_encode( Push_Reg_FPR(src0), FMul_ST_reg(src1), Opcode(0xDE), Opc_plus(0xE0,src2)); ins_pipe( fpu_reg_reg_reg ); %} -instruct divD_reg(regD dst, regD src) %{ +instruct divDPR_reg(regDPR dst, regDPR src) %{ predicate( UseSSE<=1 ); match(Set dst (DivD dst src)); @@ -10643,7 +10001,7 @@ "FDIVp $dst,ST" %} opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ ins_cost(150); - ins_encode( Push_Reg_D(src), + ins_encode( Push_Reg_DPR(src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_reg ); %} @@ -10656,7 +10014,7 @@ // divide scaled dividend by divisor // rescale quotient by 2^(15360) // -instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{ +instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ predicate (UseSSE<=1); match(Set dst (DivD dst src)); predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); @@ -10670,13 +10028,13 @@ "DMULp $dst,ST\n\t" %} opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ ins_encode( strictfp_bias1(dst), - Push_Reg_D(src), + Push_Reg_DPR(src), OpcP, RegOpc(dst), strictfp_bias2(dst) ); ins_pipe( fpu_reg_reg ); %} -instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{ +instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); match(Set dst (RoundDouble (DivD src1 src2))); @@ -10684,27 +10042,27 @@ "FDIV ST,$src2\n\t" "FSTP_D $dst\t# D-round" %} opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ - ins_encode( Push_Reg_D(src1), - OpcP, RegOpc(src2), Pop_Mem_D(dst) ); + ins_encode( Push_Reg_DPR(src1), + OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); ins_pipe( fpu_mem_reg_reg ); %} -instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{ +instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE<=1); match(Set dst (ModD dst src)); - effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS + effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS format %{ "DMOD $dst,$src" %} ins_cost(250); - ins_encode(Push_Reg_Mod_D(dst, src), - emitModD(), - Push_Result_Mod_D(src), - Pop_Reg_D(dst)); - ins_pipe( pipe_slow ); -%} - -instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{ + ins_encode(Push_Reg_Mod_DPR(dst, src), + emitModDPR(), + Push_Result_Mod_DPR(src), + Pop_Reg_DPR(dst)); + ins_pipe( pipe_slow ); +%} + +instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE>=2); match(Set dst (ModD src0 src1)); effect(KILL rax, KILL cr); @@ -10725,11 +10083,11 @@ "\tFSTP ST0\t # Restore FPU Stack" %} ins_cost(250); - ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU); - ins_pipe( pipe_slow ); -%} - -instruct sinD_reg(regDPR1 dst, regDPR1 src) %{ + ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); + ins_pipe( pipe_slow ); +%} + +instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{ predicate (UseSSE<=1); match(Set dst (SinD src)); ins_cost(1800); @@ -10739,18 +10097,18 @@ ins_pipe( pipe_slow ); %} -instruct sinXD_reg(regXD dst, eFlagsReg cr) %{ +instruct sinD_reg(regD dst, eFlagsReg cr) %{ predicate (UseSSE>=2); match(Set dst (SinD dst)); - effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" + effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" ins_cost(1800); format %{ "DSIN $dst" %} opcode(0xD9, 0xFE); - ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) ); - ins_pipe( pipe_slow ); -%} - -instruct cosD_reg(regDPR1 dst, regDPR1 src) %{ + ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); + ins_pipe( pipe_slow ); +%} + +instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{ predicate (UseSSE<=1); match(Set dst (CosD src)); ins_cost(1800); @@ -10760,18 +10118,18 @@ ins_pipe( pipe_slow ); %} -instruct cosXD_reg(regXD dst, eFlagsReg cr) %{ +instruct cosD_reg(regD dst, eFlagsReg cr) %{ predicate (UseSSE>=2); match(Set dst (CosD dst)); - effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" + effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" ins_cost(1800); format %{ "DCOS $dst" %} opcode(0xD9, 0xFF); - ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) ); - ins_pipe( pipe_slow ); -%} - -instruct tanD_reg(regDPR1 dst, regDPR1 src) %{ + ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); + ins_pipe( pipe_slow ); +%} + +instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ predicate (UseSSE<=1); match(Set dst(TanD src)); format %{ "DTAN $dst" %} @@ -10780,50 +10138,50 @@ ins_pipe( pipe_slow ); %} -instruct tanXD_reg(regXD dst, eFlagsReg cr) %{ +instruct tanD_reg(regD dst, eFlagsReg cr) %{ predicate (UseSSE>=2); match(Set dst(TanD dst)); - effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" + effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" format %{ "DTAN $dst" %} - ins_encode( Push_SrcXD(dst), + ins_encode( Push_SrcD(dst), Opcode(0xD9), Opcode(0xF2), // fptan Opcode(0xDD), Opcode(0xD8), // fstp st - Push_ResultXD(dst) ); - ins_pipe( pipe_slow ); -%} - -instruct atanD_reg(regD dst, regD src) %{ + Push_ResultD(dst) ); + ins_pipe( pipe_slow ); +%} + +instruct atanDPR_reg(regDPR dst, regDPR src) %{ predicate (UseSSE<=1); match(Set dst(AtanD dst src)); format %{ "DATA $dst,$src" %} opcode(0xD9, 0xF3); - ins_encode( Push_Reg_D(src), + ins_encode( Push_Reg_DPR(src), OpcP, OpcS, RegOpc(dst) ); ins_pipe( pipe_slow ); %} -instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{ +instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ predicate (UseSSE>=2); match(Set dst(AtanD dst src)); - effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" + effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" format %{ "DATA $dst,$src" %} opcode(0xD9, 0xF3); - ins_encode( Push_SrcXD(src), - OpcP, OpcS, Push_ResultXD(dst) ); - ins_pipe( pipe_slow ); -%} - -instruct sqrtD_reg(regD dst, regD src) %{ + ins_encode( Push_SrcD(src), + OpcP, OpcS, Push_ResultD(dst) ); + ins_pipe( pipe_slow ); +%} + +instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ predicate (UseSSE<=1); match(Set dst (SqrtD src)); format %{ "DSQRT $dst,$src" %} opcode(0xFA, 0xD9); - ins_encode( Push_Reg_D(src), - OpcS, OpcP, Pop_Reg_D(dst) ); - ins_pipe( pipe_slow ); -%} - -instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ + ins_encode( Push_Reg_DPR(src), + OpcS, OpcP, Pop_Reg_DPR(dst) ); + ins_pipe( pipe_slow ); +%} + +instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ predicate (UseSSE<=1); match(Set Y (PowD X Y)); // Raise X to the Yth power effect(KILL rax, KILL rbx, KILL rcx); @@ -10852,14 +10210,14 @@ "ADD ESP,8" %} ins_encode( push_stack_temp_qword, - Push_Reg_D(X), + Push_Reg_DPR(X), Opcode(0xD9), Opcode(0xF1), // fyl2x pow_exp_core_encoding, pop_stack_temp_qword); ins_pipe( pipe_slow ); %} -instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{ +instruct powD_reg(regD dst, regD src0, regD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{ predicate (UseSSE>=2); match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx ); @@ -10897,12 +10255,12 @@ push_xmm_to_fpr1(src0), Opcode(0xD9), Opcode(0xF1), // fyl2x pow_exp_core_encoding, - Push_ResultXD(dst) ); - ins_pipe( pipe_slow ); -%} - - -instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ + Push_ResultD(dst) ); + ins_pipe( pipe_slow ); +%} + + +instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ predicate (UseSSE<=1); match(Set dpr1 (ExpD dpr1)); effect(KILL rax, KILL rbx, KILL rcx); @@ -10938,7 +10296,7 @@ ins_pipe( pipe_slow ); %} -instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ +instruct expD_reg(regD dst, regD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ predicate (UseSSE>=2); match(Set dst (ExpD src)); effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx); @@ -10969,17 +10327,17 @@ "MOVSD $dst,[ESP]\n\t" "ADD ESP,8" %} - ins_encode( Push_SrcXD(src), + ins_encode( Push_SrcD(src), Opcode(0xD9), Opcode(0xEA), // fldl2e Opcode(0xDE), Opcode(0xC9), // fmulp pow_exp_core_encoding, - Push_ResultXD(dst) ); - ins_pipe( pipe_slow ); -%} - - - -instruct log10D_reg(regDPR1 dst, regDPR1 src) %{ + Push_ResultD(dst) ); + ins_pipe( pipe_slow ); +%} + + + +instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ predicate (UseSSE<=1); // The source Double operand on FPU stack match(Set dst (Log10D src)); @@ -10997,7 +10355,7 @@ ins_pipe( pipe_slow ); %} -instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{ +instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ predicate (UseSSE>=2); effect(KILL cr); match(Set dst (Log10D src)); @@ -11007,14 +10365,14 @@ "FYL2X \t\t\t# Q=Log10*Log_2(x)" %} ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 - Push_SrcXD(src), + Push_SrcD(src), Opcode(0xD9), Opcode(0xF1), // fyl2x - Push_ResultXD(dst)); - - ins_pipe( pipe_slow ); -%} - -instruct logD_reg(regDPR1 dst, regDPR1 src) %{ + Push_ResultD(dst)); + + ins_pipe( pipe_slow ); +%} + +instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{ predicate (UseSSE<=1); // The source Double operand on FPU stack match(Set dst (LogD src)); @@ -11032,7 +10390,7 @@ ins_pipe( pipe_slow ); %} -instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{ +instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{ predicate (UseSSE>=2); effect(KILL cr); // The source and result Double operands in XMM registers @@ -11043,9 +10401,9 @@ "FYL2X \t\t\t# Q=Log_e*Log_2(x)" %} ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 - Push_SrcXD(src), + Push_SrcD(src), Opcode(0xD9), Opcode(0xF1), // fyl2x - Push_ResultXD(dst)); + Push_ResultD(dst)); ins_pipe( pipe_slow ); %} @@ -11066,7 +10424,7 @@ // exit: // P6 version of float compare, sets condition codes in EFLAGS -instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{ +instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ predicate(VM_Version::supports_cmov() && UseSSE == 0); match(Set cr (CmpF src1 src2)); effect(KILL rax); @@ -11078,27 +10436,27 @@ "SAHF\n" "exit:\tNOP // avoid branch to branch" %} opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ - ins_encode( Push_Reg_D(src1), + ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2), cmpF_P6_fixup ); ins_pipe( pipe_slow ); %} -instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{ +instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ predicate(VM_Version::supports_cmov() && UseSSE == 0); match(Set cr (CmpF src1 src2)); ins_cost(100); format %{ "FLD $src1\n\t" "FUCOMIP ST,$src2 // P6 instruction" %} opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ - ins_encode( Push_Reg_D(src1), + ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2)); ins_pipe( pipe_slow ); %} // Compare & branch -instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{ +instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ predicate(UseSSE == 0); match(Set cr (CmpF src1 src2)); effect(KILL rax); @@ -11111,328 +10469,190 @@ "MOV AH,1\t# unordered treat as LT\n" "flags:\tSAHF" %} opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ - ins_encode( Push_Reg_D(src1), + ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2), fpu_flags); ins_pipe( pipe_slow ); %} // Compare vs zero into -1,0,1 -instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{ +instruct cmpFPR_0(eRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE == 0); match(Set dst (CmpF3 src1 zero)); effect(KILL cr, KILL rax); ins_cost(280); format %{ "FTSTF $dst,$src1" %} opcode(0xE4, 0xD9); - ins_encode( Push_Reg_D(src1), + ins_encode( Push_Reg_DPR(src1), OpcS, OpcP, PopFPU, CmpF_Result(dst)); ins_pipe( pipe_slow ); %} // Compare into -1,0,1 -instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{ +instruct cmpFPR_reg(eRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE == 0); match(Set dst (CmpF3 src1 src2)); effect(KILL cr, KILL rax); ins_cost(300); format %{ "FCMPF $dst,$src1,$src2" %} opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ - ins_encode( Push_Reg_D(src1), + ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2), CmpF_Result(dst)); ins_pipe( pipe_slow ); %} // float compare and set condition codes in EFLAGS by XMM regs -instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{ +instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ predicate(UseSSE>=1); - match(Set cr (CmpF dst src)); - effect(KILL rax); + match(Set cr (CmpF src1 src2)); ins_cost(145); - format %{ "COMISS $dst,$src\n" - "\tJNP exit\n" - "\tMOV ah,1 // saw a NaN, set CF\n" - "\tSAHF\n" - "exit:\tNOP // avoid branch to branch" %} - opcode(0x0F, 0x2F); - ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup); - ins_pipe( pipe_slow ); -%} - -instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{ + format %{ "UCOMISS $src1,$src2\n\t" + "JNP,s exit\n\t" + "PUSHF\t# saw NaN, set CF\n\t" + "AND [rsp], #0xffffff2b\n\t" + "POPF\n" + "exit:" %} + ins_encode %{ + __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); + emit_cmpfp_fixup(_masm); + %} + ins_pipe( pipe_slow ); +%} + +instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ predicate(UseSSE>=1); - match(Set cr (CmpF dst src)); + match(Set cr (CmpF src1 src2)); ins_cost(100); - format %{ "COMISS $dst,$src" %} - opcode(0x0F, 0x2F); - ins_encode(OpcP, OpcS, RegReg(dst, src)); + format %{ "UCOMISS $src1,$src2" %} + ins_encode %{ + __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); + %} ins_pipe( pipe_slow ); %} // float compare and set condition codes in EFLAGS by XMM regs -instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{ +instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ predicate(UseSSE>=1); - match(Set cr (CmpF dst (LoadF src))); - effect(KILL rax); + match(Set cr (CmpF src1 (LoadF src2))); ins_cost(165); - format %{ "COMISS $dst,$src\n" - "\tJNP exit\n" - "\tMOV ah,1 // saw a NaN, set CF\n" - "\tSAHF\n" - "exit:\tNOP // avoid branch to branch" %} - opcode(0x0F, 0x2F); - ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup); - ins_pipe( pipe_slow ); -%} - -instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{ + format %{ "UCOMISS $src1,$src2\n\t" + "JNP,s exit\n\t" + "PUSHF\t# saw NaN, set CF\n\t" + "AND [rsp], #0xffffff2b\n\t" + "POPF\n" + "exit:" %} + ins_encode %{ + __ ucomiss($src1$$XMMRegister, $src2$$Address); + emit_cmpfp_fixup(_masm); + %} + ins_pipe( pipe_slow ); +%} + +instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ predicate(UseSSE>=1); - match(Set cr (CmpF dst (LoadF src))); + match(Set cr (CmpF src1 (LoadF src2))); ins_cost(100); - format %{ "COMISS $dst,$src" %} - opcode(0x0F, 0x2F); - ins_encode(OpcP, OpcS, RegMem(dst, src)); + format %{ "UCOMISS $src1,$src2" %} + ins_encode %{ + __ ucomiss($src1$$XMMRegister, $src2$$Address); + %} ins_pipe( pipe_slow ); %} // Compare into -1,0,1 in XMM -instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{ +instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ predicate(UseSSE>=1); match(Set dst (CmpF3 src1 src2)); effect(KILL cr); ins_cost(255); - format %{ "XOR $dst,$dst\n" - "\tCOMISS $src1,$src2\n" - "\tJP,s nan\n" - "\tJEQ,s exit\n" - "\tJA,s inc\n" - "nan:\tDEC $dst\n" - "\tJMP,s exit\n" - "inc:\tINC $dst\n" - "exit:" - %} - opcode(0x0F, 0x2F); - ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst)); + format %{ "UCOMISS $src1, $src2\n\t" + "MOV $dst, #-1\n\t" + "JP,s done\n\t" + "JB,s done\n\t" + "SETNE $dst\n\t" + "MOVZB $dst, $dst\n" + "done:" %} + ins_encode %{ + __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); + emit_cmpfp3(_masm, $dst$$Register); + %} ins_pipe( pipe_slow ); %} // Compare into -1,0,1 in XMM and memory -instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{ +instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ predicate(UseSSE>=1); - match(Set dst (CmpF3 src1 (LoadF mem))); + match(Set dst (CmpF3 src1 (LoadF src2))); effect(KILL cr); ins_cost(275); - format %{ "COMISS $src1,$mem\n" - "\tMOV $dst,0\t\t# do not blow flags\n" - "\tJP,s nan\n" - "\tJEQ,s exit\n" - "\tJA,s inc\n" - "nan:\tDEC $dst\n" - "\tJMP,s exit\n" - "inc:\tINC $dst\n" - "exit:" - %} - opcode(0x0F, 0x2F); - ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst)); + format %{ "UCOMISS $src1, $src2\n\t" + "MOV $dst, #-1\n\t" + "JP,s done\n\t" + "JB,s done\n\t" + "SETNE $dst\n\t" + "MOVZB $dst, $dst\n" + "done:" %} + ins_encode %{ + __ ucomiss($src1$$XMMRegister, $src2$$Address); + emit_cmpfp3(_masm, $dst$$Register); + %} ins_pipe( pipe_slow ); %} // Spill to obtain 24-bit precision -instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{ +instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (SubF src1 src2)); format %{ "FSUB $dst,$src1 - $src2" %} opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ - ins_encode( Push_Reg_F(src1), - OpcReg_F(src2), - Pop_Mem_F(dst) ); + ins_encode( Push_Reg_FPR(src1), + OpcReg_FPR(src2), + Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_reg_reg ); %} // // This instruction does not round to 24-bits -instruct subF_reg(regF dst, regF src) %{ +instruct subFPR_reg(regFPR dst, regFPR src) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (SubF dst src)); format %{ "FSUB $dst,$src" %} opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ - ins_encode( Push_Reg_F(src), + ins_encode( Push_Reg_FPR(src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_reg ); %} // Spill to obtain 24-bit precision -instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{ +instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (AddF src1 src2)); format %{ "FADD $dst,$src1,$src2" %} opcode(0xD8, 0x0); /* D8 C0+i */ - ins_encode( Push_Reg_F(src2), - OpcReg_F(src1), - Pop_Mem_F(dst) ); + ins_encode( Push_Reg_FPR(src2), + OpcReg_FPR(src1), + Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_reg_reg ); %} // // This instruction does not round to 24-bits -instruct addF_reg(regF dst, regF src) %{ +instruct addFPR_reg(regFPR dst, regFPR src) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (AddF dst src)); format %{ "FLD $src\n\t" "FADDp $dst,ST" %} opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ - ins_encode( Push_Reg_F(src), + ins_encode( Push_Reg_FPR(src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_reg ); %} -// Add two single precision floating point values in xmm -instruct addX_reg(regX dst, regX src) %{ - predicate(UseSSE>=1); - match(Set dst (AddF dst src)); - format %{ "ADDSS $dst,$src" %} - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src)); - ins_pipe( pipe_slow ); -%} - -instruct addX_imm(regX dst, immXF con) %{ - predicate(UseSSE>=1); - match(Set dst (AddF dst con)); - format %{ "ADDSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} - ins_encode %{ - __ addss($dst$$XMMRegister, $constantaddress($con)); - %} - ins_pipe(pipe_slow); -%} - -instruct addX_mem(regX dst, memory mem) %{ - predicate(UseSSE>=1); - match(Set dst (AddF dst (LoadF mem))); - format %{ "ADDSS $dst,$mem" %} - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem)); - ins_pipe( pipe_slow ); -%} - -// Subtract two single precision floating point values in xmm -instruct subX_reg(regX dst, regX src) %{ - predicate(UseSSE>=1); - match(Set dst (SubF dst src)); - format %{ "SUBSS $dst,$src" %} - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src)); - ins_pipe( pipe_slow ); -%} - -instruct subX_imm(regX dst, immXF con) %{ - predicate(UseSSE>=1); - match(Set dst (SubF dst con)); - format %{ "SUBSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} - ins_encode %{ - __ subss($dst$$XMMRegister, $constantaddress($con)); - %} - ins_pipe(pipe_slow); -%} - -instruct subX_mem(regX dst, memory mem) %{ - predicate(UseSSE>=1); - match(Set dst (SubF dst (LoadF mem))); - format %{ "SUBSS $dst,$mem" %} - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem)); - ins_pipe( pipe_slow ); -%} - -// Multiply two single precision floating point values in xmm -instruct mulX_reg(regX dst, regX src) %{ - predicate(UseSSE>=1); - match(Set dst (MulF dst src)); - format %{ "MULSS $dst,$src" %} - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src)); - ins_pipe( pipe_slow ); -%} - -instruct mulX_imm(regX dst, immXF con) %{ - predicate(UseSSE>=1); - match(Set dst (MulF dst con)); - format %{ "MULSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} - ins_encode %{ - __ mulss($dst$$XMMRegister, $constantaddress($con)); - %} - ins_pipe(pipe_slow); -%} - -instruct mulX_mem(regX dst, memory mem) %{ - predicate(UseSSE>=1); - match(Set dst (MulF dst (LoadF mem))); - format %{ "MULSS $dst,$mem" %} - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem)); - ins_pipe( pipe_slow ); -%} - -// Divide two single precision floating point values in xmm -instruct divX_reg(regX dst, regX src) %{ - predicate(UseSSE>=1); - match(Set dst (DivF dst src)); - format %{ "DIVSS $dst,$src" %} - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src)); - ins_pipe( pipe_slow ); -%} - -instruct divX_imm(regX dst, immXF con) %{ - predicate(UseSSE>=1); - match(Set dst (DivF dst con)); - format %{ "DIVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} - ins_encode %{ - __ divss($dst$$XMMRegister, $constantaddress($con)); - %} - ins_pipe(pipe_slow); -%} - -instruct divX_mem(regX dst, memory mem) %{ - predicate(UseSSE>=1); - match(Set dst (DivF dst (LoadF mem))); - format %{ "DIVSS $dst,$mem" %} - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem)); - ins_pipe( pipe_slow ); -%} - -// Get the square root of a single precision floating point values in xmm -instruct sqrtX_reg(regX dst, regX src) %{ - predicate(UseSSE>=1); - match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); - format %{ "SQRTSS $dst,$src" %} - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src)); - ins_pipe( pipe_slow ); -%} - -instruct sqrtX_mem(regX dst, memory mem) %{ - predicate(UseSSE>=1); - match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem))))); - format %{ "SQRTSS $dst,$mem" %} - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem)); - ins_pipe( pipe_slow ); -%} - -// Get the square root of a double precision floating point values in xmm -instruct sqrtXD_reg(regXD dst, regXD src) %{ - predicate(UseSSE>=2); - match(Set dst (SqrtD src)); - format %{ "SQRTSD $dst,$src" %} - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src)); - ins_pipe( pipe_slow ); -%} - -instruct sqrtXD_mem(regXD dst, memory mem) %{ - predicate(UseSSE>=2); - match(Set dst (SqrtD (LoadD mem))); - format %{ "SQRTSD $dst,$mem" %} - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem)); - ins_pipe( pipe_slow ); -%} - -instruct absF_reg(regFPR1 dst, regFPR1 src) %{ +instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ predicate(UseSSE==0); match(Set dst (AbsF src)); ins_cost(100); @@ -11442,15 +10662,7 @@ ins_pipe( fpu_reg_reg ); %} -instruct absX_reg(regX dst ) %{ - predicate(UseSSE>=1); - match(Set dst (AbsF dst)); - format %{ "ANDPS $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %} - ins_encode( AbsXF_encoding(dst)); - ins_pipe( pipe_slow ); -%} - -instruct negF_reg(regFPR1 dst, regFPR1 src) %{ +instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ predicate(UseSSE==0); match(Set dst (NegF src)); ins_cost(100); @@ -11460,17 +10672,9 @@ ins_pipe( fpu_reg_reg ); %} -instruct negX_reg( regX dst ) %{ - predicate(UseSSE>=1); - match(Set dst (NegF dst)); - format %{ "XORPS $dst,[0x80000000]\t# CHS F by sign flipping" %} - ins_encode( NegXF_encoding(dst)); - ins_pipe( pipe_slow ); -%} - -// Cisc-alternate to addF_reg +// Cisc-alternate to addFPR_reg // Spill to obtain 24-bit precision -instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{ +instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (AddF src1 (LoadF src2))); @@ -11479,14 +10683,14 @@ "FSTP_S $dst" %} opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), - OpcReg_F(src1), - Pop_Mem_F(dst) ); + OpcReg_FPR(src1), + Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_reg_mem ); %} // -// Cisc-alternate to addF_reg +// Cisc-alternate to addFPR_reg // This instruction does not round to 24-bits -instruct addF_reg_mem(regF dst, memory src) %{ +instruct addFPR_reg_mem(regFPR dst, memory src) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (AddF dst (LoadF src))); @@ -11499,21 +10703,21 @@ // // Following two instructions for _222_mpegaudio // Spill to obtain 24-bit precision -instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{ +instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (AddF src1 src2)); format %{ "FADD $dst,$src1,$src2" %} opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), - OpcReg_F(src2), - Pop_Mem_F(dst) ); + OpcReg_FPR(src2), + Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_reg_mem ); %} // Cisc-spill variant // Spill to obtain 24-bit precision -instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ +instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (AddF src1 (LoadF src2))); @@ -11522,12 +10726,12 @@ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), set_instruction_start, OpcP, RMopc_Mem(secondary,src1), - Pop_Mem_F(dst) ); + Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_mem_mem ); %} // Spill to obtain 24-bit precision -instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ +instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (AddF src1 src2)); @@ -11536,13 +10740,13 @@ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), set_instruction_start, OpcP, RMopc_Mem(secondary,src1), - Pop_Mem_F(dst) ); + Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_mem_mem ); %} // Spill to obtain 24-bit precision -instruct addF24_reg_imm(stackSlotF dst, regF src, immF con) %{ +instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (AddF src con)); format %{ "FLD $src\n\t" @@ -11557,7 +10761,7 @@ %} // // This instruction does not round to 24-bits -instruct addF_reg_imm(regF dst, regF src, immF con) %{ +instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (AddF src con)); format %{ "FLD $src\n\t" @@ -11572,7 +10776,7 @@ %} // Spill to obtain 24-bit precision -instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{ +instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (MulF src1 src2)); @@ -11580,14 +10784,14 @@ "FMUL $src2\n\t" "FSTP_S $dst" %} opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ - ins_encode( Push_Reg_F(src1), - OpcReg_F(src2), - Pop_Mem_F(dst) ); + ins_encode( Push_Reg_FPR(src1), + OpcReg_FPR(src2), + Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_reg_reg ); %} // // This instruction does not round to 24-bits -instruct mulF_reg(regF dst, regF src1, regF src2) %{ +instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (MulF src1 src2)); @@ -11595,16 +10799,16 @@ "FMUL $src2\n\t" "FSTP_S $dst" %} opcode(0xD8, 0x1); /* D8 C8+i */ - ins_encode( Push_Reg_F(src2), - OpcReg_F(src1), - Pop_Reg_F(dst) ); + ins_encode( Push_Reg_FPR(src2), + OpcReg_FPR(src1), + Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_reg_reg ); %} // Spill to obtain 24-bit precision // Cisc-alternate to reg-reg multiply -instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{ +instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (MulF src1 (LoadF src2))); @@ -11613,27 +10817,27 @@ "FSTP_S $dst" %} opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), - OpcReg_F(src1), - Pop_Mem_F(dst) ); + OpcReg_FPR(src1), + Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_reg_mem ); %} // // This instruction does not round to 24-bits // Cisc-alternate to reg-reg multiply -instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ +instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (MulF src1 (LoadF src2))); format %{ "FMUL $dst,$src1,$src2" %} opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), - OpcReg_F(src1), - Pop_Reg_F(dst) ); + OpcReg_FPR(src1), + Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_reg_mem ); %} // Spill to obtain 24-bit precision -instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ +instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (MulF src1 src2)); @@ -11642,12 +10846,12 @@ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), set_instruction_start, OpcP, RMopc_Mem(secondary,src1), - Pop_Mem_F(dst) ); + Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_mem_mem ); %} // Spill to obtain 24-bit precision -instruct mulF24_reg_imm(stackSlotF dst, regF src, immF con) %{ +instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (MulF src con)); @@ -11663,7 +10867,7 @@ %} // // This instruction does not round to 24-bits -instruct mulF_reg_imm(regF dst, regF src, immF con) %{ +instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (MulF src con)); @@ -11680,9 +10884,9 @@ // -// MACRO1 -- subsume unshared load into mulF +// MACRO1 -- subsume unshared load into mulFPR // This instruction does not round to 24-bits -instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{ +instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (MulF (LoadF mem1) src)); @@ -11691,36 +10895,36 @@ "FSTP $dst" %} opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), - OpcReg_F(src), - Pop_Reg_F(dst) ); + OpcReg_FPR(src), + Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_reg_mem ); %} // -// MACRO2 -- addF a mulF which subsumed an unshared load +// MACRO2 -- addFPR a mulFPR which subsumed an unshared load // This instruction does not round to 24-bits -instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{ +instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); ins_cost(95); format %{ "FLD $mem1 ===MACRO2===\n\t" - "FMUL ST,$src1 subsume mulF left load\n\t" + "FMUL ST,$src1 subsume mulFPR left load\n\t" "FADD ST,$src2\n\t" "FSTP $dst" %} opcode(0xD9); /* LoadF D9 /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem1), FMul_ST_reg(src1), FAdd_ST_reg(src2), - Pop_Reg_F(dst) ); + Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_mem_reg_reg ); %} -// MACRO3 -- addF a mulF +// MACRO3 -- addFPR a mulFPR // This instruction does not round to 24-bits. It is a '2-address' // instruction in that the result goes back to src2. This eliminates // a move from the macro; possibly the register allocator will have // to add it back (and maybe not). -instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{ +instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set src2 (AddF (MulF src0 src1) src2)); @@ -11728,15 +10932,15 @@ "FMUL ST,$src1\n\t" "FADDP $src2,ST" %} opcode(0xD9); /* LoadF D9 /0 */ - ins_encode( Push_Reg_F(src0), + ins_encode( Push_Reg_FPR(src0), FMul_ST_reg(src1), FAddP_reg_ST(src2) ); ins_pipe( fpu_reg_reg_reg ); %} -// MACRO4 -- divF subF +// MACRO4 -- divFPR subFPR // This instruction does not round to 24-bits -instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{ +instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (DivF (SubF src2 src1) src3)); @@ -11745,67 +10949,67 @@ "FDIV ST,$src3\n\t" "FSTP $dst" %} opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ - ins_encode( Push_Reg_F(src2), - subF_divF_encode(src1,src3), - Pop_Reg_F(dst) ); + ins_encode( Push_Reg_FPR(src2), + subFPR_divFPR_encode(src1,src3), + Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_reg_reg_reg ); %} // Spill to obtain 24-bit precision -instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{ +instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (DivF src1 src2)); format %{ "FDIV $dst,$src1,$src2" %} opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ - ins_encode( Push_Reg_F(src1), - OpcReg_F(src2), - Pop_Mem_F(dst) ); + ins_encode( Push_Reg_FPR(src1), + OpcReg_FPR(src2), + Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_reg_reg ); %} // // This instruction does not round to 24-bits -instruct divF_reg(regF dst, regF src) %{ +instruct divFPR_reg(regFPR dst, regFPR src) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (DivF dst src)); format %{ "FDIV $dst,$src" %} opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ - ins_encode( Push_Reg_F(src), + ins_encode( Push_Reg_FPR(src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_reg ); %} // Spill to obtain 24-bit precision -instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{ +instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (ModF src1 src2)); - effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS + effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS format %{ "FMOD $dst,$src1,$src2" %} - ins_encode( Push_Reg_Mod_D(src1, src2), - emitModD(), - Push_Result_Mod_D(src2), - Pop_Mem_F(dst)); + ins_encode( Push_Reg_Mod_DPR(src1, src2), + emitModDPR(), + Push_Result_Mod_DPR(src2), + Pop_Mem_FPR(dst)); ins_pipe( pipe_slow ); %} // // This instruction does not round to 24-bits -instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{ +instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (ModF dst src)); - effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS + effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS format %{ "FMOD $dst,$src" %} - ins_encode(Push_Reg_Mod_D(dst, src), - emitModD(), - Push_Result_Mod_D(src), - Pop_Reg_F(dst)); - ins_pipe( pipe_slow ); -%} - -instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{ + ins_encode(Push_Reg_Mod_DPR(dst, src), + emitModDPR(), + Push_Result_Mod_DPR(src), + Pop_Reg_FPR(dst)); + ins_pipe( pipe_slow ); +%} + +instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE>=1); match(Set dst (ModF src0 src1)); effect(KILL rax, KILL cr); @@ -11825,7 +11029,7 @@ "\tFSTP ST0\t # Restore FPU Stack" %} ins_cost(250); - ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU); + ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); ins_pipe( pipe_slow ); %} @@ -11833,26 +11037,26 @@ //----------Arithmetic Conversion Instructions--------------------------------- // The conversions operations are all Alpha sorted. Please keep it that way! -instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{ +instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ predicate(UseSSE==0); match(Set dst (RoundFloat src)); ins_cost(125); format %{ "FST_S $dst,$src\t# F-round" %} - ins_encode( Pop_Mem_Reg_F(dst, src) ); + ins_encode( Pop_Mem_Reg_FPR(dst, src) ); ins_pipe( fpu_mem_reg ); %} -instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{ +instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ predicate(UseSSE<=1); match(Set dst (RoundDouble src)); ins_cost(125); format %{ "FST_D $dst,$src\t# D-round" %} - ins_encode( Pop_Mem_Reg_D(dst, src) ); + ins_encode( Pop_Mem_Reg_DPR(dst, src) ); ins_pipe( fpu_mem_reg ); %} // Force rounding to 24-bit precision and 6-bit exponent -instruct convD2F_reg(stackSlotF dst, regD src) %{ +instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ predicate(UseSSE==0); match(Set dst (ConvD2F src)); format %{ "FST_S $dst,$src\t# F-round" %} @@ -11862,7 +11066,7 @@ %} // Force rounding to 24-bit precision and 6-bit exponent -instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{ +instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ predicate(UseSSE==1); match(Set dst (ConvD2F src)); effect( KILL cr ); @@ -11870,29 +11074,40 @@ "FST_S [ESP],$src\t# F-round\n\t" "MOVSS $dst,[ESP]\n\t" "ADD ESP,4" %} - ins_encode( D2X_encoding(dst, src) ); + ins_encode %{ + __ subptr(rsp, 4); + if ($src$$reg != FPR1L_enc) { + __ fld_s($src$$reg-1); + __ fstp_s(Address(rsp, 0)); + } else { + __ fst_s(Address(rsp, 0)); + } + __ movflt($dst$$XMMRegister, Address(rsp, 0)); + __ addptr(rsp, 4); + %} ins_pipe( pipe_slow ); %} // Force rounding double precision to single precision -instruct convXD2X_reg(regX dst, regXD src) %{ +instruct convD2F_reg(regF dst, regD src) %{ predicate(UseSSE>=2); match(Set dst (ConvD2F src)); format %{ "CVTSD2SS $dst,$src\t# F-round" %} - opcode(0xF2, 0x0F, 0x5A); - ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); - ins_pipe( pipe_slow ); -%} - -instruct convF2D_reg_reg(regD dst, regF src) %{ + ins_encode %{ + __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ predicate(UseSSE==0); match(Set dst (ConvF2D src)); format %{ "FST_S $dst,$src\t# D-round" %} - ins_encode( Pop_Reg_Reg_D(dst, src)); + ins_encode( Pop_Reg_Reg_DPR(dst, src)); ins_pipe( fpu_reg_reg ); %} -instruct convF2D_reg(stackSlotD dst, regF src) %{ +instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ predicate(UseSSE==1); match(Set dst (ConvF2D src)); format %{ "FST_D $dst,$src\t# D-round" %} @@ -11901,7 +11116,7 @@ %} %} -instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{ +instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ predicate(UseSSE==1); match(Set dst (ConvF2D src)); effect( KILL cr ); @@ -11910,21 +11125,28 @@ "FLD_S [ESP]\n\t" "ADD ESP,4\n\t" "FSTP $dst\t# D-round" %} - ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst)); - ins_pipe( pipe_slow ); -%} - -instruct convX2XD_reg(regXD dst, regX src) %{ + ins_encode %{ + __ subptr(rsp, 4); + __ movflt(Address(rsp, 0), $src$$XMMRegister); + __ fld_s(Address(rsp, 0)); + __ addptr(rsp, 4); + __ fstp_d($dst$$reg); + %} + ins_pipe( pipe_slow ); +%} + +instruct convF2D_reg(regD dst, regF src) %{ predicate(UseSSE>=2); match(Set dst (ConvF2D src)); format %{ "CVTSS2SD $dst,$src\t# D-round" %} - opcode(0xF3, 0x0F, 0x5A); - ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); + ins_encode %{ + __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); + %} ins_pipe( pipe_slow ); %} // Convert a double to an int. If the double is a NAN, stuff a zero in instead. -instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ +instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ predicate(UseSSE<=1); match(Set dst (ConvD2I src)); effect( KILL tmp, KILL cr ); @@ -11939,12 +11161,12 @@ "FLD_D $src\n\t" "CALL d2i_wrapper\n" "fast:" %} - ins_encode( Push_Reg_D(src), D2I_encoding(src) ); + ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); ins_pipe( pipe_slow ); %} // Convert a double to an int. If the double is a NAN, stuff a zero in instead. -instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{ +instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ predicate(UseSSE>=2); match(Set dst (ConvD2I src)); effect( KILL tmp, KILL cr ); @@ -11957,12 +11179,22 @@ "ADD ESP, 8\n\t" "CALL d2i_wrapper\n" "fast:" %} - opcode(0x1); // double-precision conversion - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst)); - ins_pipe( pipe_slow ); -%} - -instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ + ins_encode %{ + Label fast; + __ cvttsd2sil($dst$$Register, $src$$XMMRegister); + __ cmpl($dst$$Register, 0x80000000); + __ jccb(Assembler::notEqual, fast); + __ subptr(rsp, 8); + __ movdbl(Address(rsp, 0), $src$$XMMRegister); + __ fld_d(Address(rsp, 0)); + __ addptr(rsp, 8); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); + __ bind(fast); + %} + ins_pipe( pipe_slow ); +%} + +instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ predicate(UseSSE<=1); match(Set dst (ConvD2L src)); effect( KILL cr ); @@ -11980,12 +11212,12 @@ "FLD $src\n\t" "CALL d2l_wrapper\n" "fast:" %} - ins_encode( Push_Reg_D(src), D2L_encoding(src) ); + ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); ins_pipe( pipe_slow ); %} // XMM lacks a float/double->long conversion, so use the old FPU stack. -instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{ +instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ predicate (UseSSE>=2); match(Set dst (ConvD2L src)); effect( KILL cr ); @@ -12004,9 +11236,36 @@ "SUB ESP,8\n\t" "MOVSD [ESP],$src\n\t" "FLD_D [ESP]\n\t" + "ADD ESP,8\n\t" "CALL d2l_wrapper\n" "fast:" %} - ins_encode( XD2L_encoding(src) ); + ins_encode %{ + Label fast; + __ subptr(rsp, 8); + __ movdbl(Address(rsp, 0), $src$$XMMRegister); + __ fld_d(Address(rsp, 0)); + __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); + __ fistp_d(Address(rsp, 0)); + // Restore the rounding mode, mask the exception + if (Compile::current()->in_24_bit_fp_mode()) { + __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); + } else { + __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); + } + // Load the converted long, adjust CPU stack + __ pop(rax); + __ pop(rdx); + __ cmpl(rdx, 0x80000000); + __ jccb(Assembler::notEqual, fast); + __ testl(rax, rax); + __ jccb(Assembler::notEqual, fast); + __ subptr(rsp, 8); + __ movdbl(Address(rsp, 0), $src$$XMMRegister); + __ fld_d(Address(rsp, 0)); + __ addptr(rsp, 8); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); + __ bind(fast); + %} ins_pipe( pipe_slow ); %} @@ -12016,7 +11275,7 @@ // rounding mode to 'nearest'. The hardware stores a flag value down // if we would overflow or converted a NAN; we check for this and // and go the slow path if needed. -instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ +instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ predicate(UseSSE==0); match(Set dst (ConvF2I src)); effect( KILL tmp, KILL cr ); @@ -12031,13 +11290,13 @@ "FLD $src\n\t" "CALL d2i_wrapper\n" "fast:" %} - // D2I_encoding works for F2I - ins_encode( Push_Reg_F(src), D2I_encoding(src) ); + // DPR2I_encoding works for FPR2I + ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); ins_pipe( pipe_slow ); %} // Convert a float in xmm to an int reg. -instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{ +instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ predicate(UseSSE>=1); match(Set dst (ConvF2I src)); effect( KILL tmp, KILL cr ); @@ -12050,12 +11309,22 @@ "ADD ESP, 4\n\t" "CALL d2i_wrapper\n" "fast:" %} - opcode(0x0); // single-precision conversion - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst)); - ins_pipe( pipe_slow ); -%} - -instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ + ins_encode %{ + Label fast; + __ cvttss2sil($dst$$Register, $src$$XMMRegister); + __ cmpl($dst$$Register, 0x80000000); + __ jccb(Assembler::notEqual, fast); + __ subptr(rsp, 4); + __ movflt(Address(rsp, 0), $src$$XMMRegister); + __ fld_s(Address(rsp, 0)); + __ addptr(rsp, 4); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); + __ bind(fast); + %} + ins_pipe( pipe_slow ); +%} + +instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ predicate(UseSSE==0); match(Set dst (ConvF2L src)); effect( KILL cr ); @@ -12073,13 +11342,13 @@ "FLD $src\n\t" "CALL d2l_wrapper\n" "fast:" %} - // D2L_encoding works for F2L - ins_encode( Push_Reg_F(src), D2L_encoding(src) ); + // DPR2L_encoding works for FPR2L + ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); ins_pipe( pipe_slow ); %} // XMM lacks a float/double->long conversion, so use the old FPU stack. -instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{ +instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ predicate (UseSSE>=1); match(Set dst (ConvF2L src)); effect( KILL cr ); @@ -12101,39 +11370,67 @@ "ADD ESP,4\n\t" "CALL d2l_wrapper\n" "fast:" %} - ins_encode( X2L_encoding(src) ); - ins_pipe( pipe_slow ); -%} - -instruct convI2D_reg(regD dst, stackSlotI src) %{ + ins_encode %{ + Label fast; + __ subptr(rsp, 8); + __ movflt(Address(rsp, 0), $src$$XMMRegister); + __ fld_s(Address(rsp, 0)); + __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); + __ fistp_d(Address(rsp, 0)); + // Restore the rounding mode, mask the exception + if (Compile::current()->in_24_bit_fp_mode()) { + __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); + } else { + __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); + } + // Load the converted long, adjust CPU stack + __ pop(rax); + __ pop(rdx); + __ cmpl(rdx, 0x80000000); + __ jccb(Assembler::notEqual, fast); + __ testl(rax, rax); + __ jccb(Assembler::notEqual, fast); + __ subptr(rsp, 4); + __ movflt(Address(rsp, 0), $src$$XMMRegister); + __ fld_s(Address(rsp, 0)); + __ addptr(rsp, 4); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); + __ bind(fast); + %} + ins_pipe( pipe_slow ); +%} + +instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ predicate( UseSSE<=1 ); match(Set dst (ConvI2D src)); format %{ "FILD $src\n\t" "FSTP $dst" %} opcode(0xDB, 0x0); /* DB /0 */ - ins_encode(Push_Mem_I(src), Pop_Reg_D(dst)); + ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); ins_pipe( fpu_reg_mem ); %} -instruct convI2XD_reg(regXD dst, eRegI src) %{ +instruct convI2D_reg(regD dst, eRegI src) %{ predicate( UseSSE>=2 && !UseXmmI2D ); match(Set dst (ConvI2D src)); format %{ "CVTSI2SD $dst,$src" %} - opcode(0xF2, 0x0F, 0x2A); - ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); - ins_pipe( pipe_slow ); -%} - -instruct convI2XD_mem(regXD dst, memory mem) %{ + ins_encode %{ + __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct convI2D_mem(regD dst, memory mem) %{ predicate( UseSSE>=2 ); match(Set dst (ConvI2D (LoadI mem))); format %{ "CVTSI2SD $dst,$mem" %} - opcode(0xF2, 0x0F, 0x2A); - ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem)); - ins_pipe( pipe_slow ); -%} - -instruct convXI2XD_reg(regXD dst, eRegI src) + ins_encode %{ + __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); + %} + ins_pipe( pipe_slow ); +%} + +instruct convXI2D_reg(regD dst, eRegI src) %{ predicate( UseSSE>=2 && UseXmmI2D ); match(Set dst (ConvI2D src)); @@ -12147,31 +11444,31 @@ ins_pipe(pipe_slow); // XXX %} -instruct convI2D_mem(regD dst, memory mem) %{ +instruct convI2DPR_mem(regDPR dst, memory mem) %{ predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); match(Set dst (ConvI2D (LoadI mem))); format %{ "FILD $mem\n\t" "FSTP $dst" %} opcode(0xDB); /* DB /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), - Pop_Reg_D(dst)); + Pop_Reg_DPR(dst)); ins_pipe( fpu_reg_mem ); %} // Convert a byte to a float; no rounding step needed. -instruct conv24I2F_reg(regF dst, stackSlotI src) %{ +instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); match(Set dst (ConvI2F src)); format %{ "FILD $src\n\t" "FSTP $dst" %} opcode(0xDB, 0x0); /* DB /0 */ - ins_encode(Push_Mem_I(src), Pop_Reg_F(dst)); + ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); ins_pipe( fpu_reg_mem ); %} // In 24-bit mode, force exponent rounding by storing back out -instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{ +instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (ConvI2F src)); ins_cost(200); @@ -12179,12 +11476,12 @@ "FSTP_S $dst" %} opcode(0xDB, 0x0); /* DB /0 */ ins_encode( Push_Mem_I(src), - Pop_Mem_F(dst)); + Pop_Mem_FPR(dst)); ins_pipe( fpu_mem_mem ); %} // In 24-bit mode, force exponent rounding by storing back out -instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{ +instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (ConvI2F (LoadI mem))); ins_cost(200); @@ -12192,46 +11489,46 @@ "FSTP_S $dst" %} opcode(0xDB); /* DB /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), - Pop_Mem_F(dst)); + Pop_Mem_FPR(dst)); ins_pipe( fpu_mem_mem ); %} // This instruction does not round to 24-bits -instruct convI2F_reg(regF dst, stackSlotI src) %{ +instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (ConvI2F src)); format %{ "FILD $src\n\t" "FSTP $dst" %} opcode(0xDB, 0x0); /* DB /0 */ ins_encode( Push_Mem_I(src), - Pop_Reg_F(dst)); + Pop_Reg_FPR(dst)); ins_pipe( fpu_reg_mem ); %} // This instruction does not round to 24-bits -instruct convI2F_mem(regF dst, memory mem) %{ +instruct convI2FPR_mem(regFPR dst, memory mem) %{ predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (ConvI2F (LoadI mem))); format %{ "FILD $mem\n\t" "FSTP $dst" %} opcode(0xDB); /* DB /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), - Pop_Reg_F(dst)); + Pop_Reg_FPR(dst)); ins_pipe( fpu_reg_mem ); %} // Convert an int to a float in xmm; no rounding step needed. -instruct convI2X_reg(regX dst, eRegI src) %{ +instruct convI2F_reg(regF dst, eRegI src) %{ predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); match(Set dst (ConvI2F src)); format %{ "CVTSI2SS $dst, $src" %} - - opcode(0xF3, 0x0F, 0x2A); /* F3 0F 2A /r */ - ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); - ins_pipe( pipe_slow ); -%} - - instruct convXI2X_reg(regX dst, eRegI src) + ins_encode %{ + __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); + %} + ins_pipe( pipe_slow ); +%} + + instruct convXI2F_reg(regF dst, eRegI src) %{ predicate( UseSSE>=2 && UseXmmI2F ); match(Set dst (ConvI2F src)); @@ -12280,7 +11577,7 @@ ins_pipe( ialu_reg_reg_long ); %} -instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ +instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ predicate (UseSSE<=1); match(Set dst (ConvL2D src)); effect( KILL cr ); @@ -12290,11 +11587,11 @@ "ADD ESP,8\n\t" "FSTP_D $dst\t# D-round" %} opcode(0xDF, 0x5); /* DF /5 */ - ins_encode(convert_long_double(src), Pop_Mem_D(dst)); - ins_pipe( pipe_slow ); -%} - -instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{ + ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); + ins_pipe( pipe_slow ); +%} + +instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ predicate (UseSSE>=2); match(Set dst (ConvL2D src)); effect( KILL cr ); @@ -12305,11 +11602,11 @@ "MOVSD $dst,[ESP]\n\t" "ADD ESP,8" %} opcode(0xDF, 0x5); /* DF /5 */ - ins_encode(convert_long_double2(src), Push_ResultXD(dst)); - ins_pipe( pipe_slow ); -%} - -instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{ + ins_encode(convert_long_double2(src), Push_ResultD(dst)); + ins_pipe( pipe_slow ); +%} + +instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ predicate (UseSSE>=1); match(Set dst (ConvL2F src)); effect( KILL cr ); @@ -12320,11 +11617,11 @@ "MOVSS $dst,[ESP]\n\t" "ADD ESP,8" %} opcode(0xDF, 0x5); /* DF /5 */ - ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8)); - ins_pipe( pipe_slow ); -%} - -instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ + ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); + ins_pipe( pipe_slow ); +%} + +instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ match(Set dst (ConvL2F src)); effect( KILL cr ); format %{ "PUSH $src.hi\t# Convert long to single float\n\t" @@ -12333,7 +11630,7 @@ "ADD ESP,8\n\t" "FSTP_S $dst\t# F-round" %} opcode(0xDF, 0x5); /* DF /5 */ - ins_encode(convert_long_double(src), Pop_Mem_F(dst)); + ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); ins_pipe( pipe_slow ); %} @@ -12351,40 +11648,45 @@ effect( DEF dst, USE src ); ins_cost(100); format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} - opcode(0x8B); - ins_encode( OpcP, RegMem(dst,src)); + ins_encode %{ + __ movl($dst$$Register, Address(rsp, $src$$disp)); + %} ins_pipe( ialu_reg_mem ); %} -instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{ +instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ predicate(UseSSE==0); match(Set dst (MoveF2I src)); effect( DEF dst, USE src ); ins_cost(125); format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} - ins_encode( Pop_Mem_Reg_F(dst, src) ); + ins_encode( Pop_Mem_Reg_FPR(dst, src) ); ins_pipe( fpu_mem_reg ); %} -instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{ +instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ predicate(UseSSE>=1); match(Set dst (MoveF2I src)); effect( DEF dst, USE src ); ins_cost(95); format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst)); - ins_pipe( pipe_slow ); -%} - -instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{ + ins_encode %{ + __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct MoveF2I_reg_reg_sse(eRegI dst, regF src) %{ predicate(UseSSE>=2); match(Set dst (MoveF2I src)); effect( DEF dst, USE src ); ins_cost(85); format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} - ins_encode( MovX2I_reg(dst, src)); + ins_encode %{ + __ movdl($dst$$Register, $src$$XMMRegister); + %} ins_pipe( pipe_slow ); %} @@ -12394,13 +11696,14 @@ ins_cost(100); format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} - opcode(0x89); - ins_encode( OpcPRegSS( dst, src ) ); + ins_encode %{ + __ movl(Address(rsp, $dst$$disp), $src$$Register); + %} ins_pipe( ialu_mem_reg ); %} -instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{ +instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ predicate(UseSSE==0); match(Set dst (MoveI2F src)); effect(DEF dst, USE src); @@ -12410,29 +11713,33 @@ "FSTP $dst\t# MoveI2F_stack_reg" %} opcode(0xD9); /* D9 /0, FLD m32real */ ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), - Pop_Reg_F(dst) ); + Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_mem ); %} -instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{ +instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ predicate(UseSSE>=1); match(Set dst (MoveI2F src)); effect( DEF dst, USE src ); ins_cost(95); format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src)); - ins_pipe( pipe_slow ); -%} - -instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{ + ins_encode %{ + __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); + %} + ins_pipe( pipe_slow ); +%} + +instruct MoveI2F_reg_reg_sse(regF dst, eRegI src) %{ predicate(UseSSE>=2); match(Set dst (MoveI2F src)); effect( DEF dst, USE src ); ins_cost(85); format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} - ins_encode( MovI2X_reg(dst, src) ); + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + %} ins_pipe( pipe_slow ); %} @@ -12448,29 +11755,30 @@ ins_pipe( ialu_mem_long_reg ); %} -instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{ +instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ predicate(UseSSE<=1); match(Set dst (MoveD2L src)); effect(DEF dst, USE src); ins_cost(125); format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} - ins_encode( Pop_Mem_Reg_D(dst, src) ); + ins_encode( Pop_Mem_Reg_DPR(dst, src) ); ins_pipe( fpu_mem_reg ); %} -instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{ +instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ predicate(UseSSE>=2); match(Set dst (MoveD2L src)); effect(DEF dst, USE src); ins_cost(95); - format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst)); - ins_pipe( pipe_slow ); -%} - -instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{ + ins_encode %{ + __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ predicate(UseSSE>=2); match(Set dst (MoveD2L src)); effect(DEF dst, USE src, TEMP tmp); @@ -12478,7 +11786,11 @@ format %{ "MOVD $dst.lo,$src\n\t" "PSHUFLW $tmp,$src,0x4E\n\t" "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} - ins_encode( MovXD2L_reg(dst, src, tmp) ); + ins_encode %{ + __ movdl($dst$$Register, $src$$XMMRegister); + __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); + __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); + %} ins_pipe( pipe_slow ); %} @@ -12495,7 +11807,7 @@ %} -instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{ +instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ predicate(UseSSE<=1); match(Set dst (MoveL2D src)); effect(DEF dst, USE src); @@ -12505,34 +11817,38 @@ "FSTP $dst\t# MoveL2D_stack_reg" %} opcode(0xDD); /* DD /0, FLD m64real */ ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), - Pop_Reg_D(dst) ); + Pop_Reg_DPR(dst) ); ins_pipe( fpu_reg_mem ); %} -instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{ +instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); match(Set dst (MoveL2D src)); effect(DEF dst, USE src); ins_cost(95); format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src)); - ins_pipe( pipe_slow ); -%} - -instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{ + ins_encode %{ + __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); + %} + ins_pipe( pipe_slow ); +%} + +instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); match(Set dst (MoveL2D src)); effect(DEF dst, USE src); ins_cost(95); format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} - ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src)); - ins_pipe( pipe_slow ); -%} - -instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{ + ins_encode %{ + __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); + %} + ins_pipe( pipe_slow ); +%} + +instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ predicate(UseSSE>=2); match(Set dst (MoveL2D src)); effect(TEMP dst, USE src, TEMP tmp); @@ -12540,149 +11856,192 @@ format %{ "MOVD $dst,$src.lo\n\t" "MOVD $tmp,$src.hi\n\t" "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} - ins_encode( MovL2XD_reg(dst, src, tmp) ); + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); + __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); + %} ins_pipe( pipe_slow ); %} // Replicate scalar to packed byte (1 byte) values in xmm -instruct Repl8B_reg(regXD dst, regXD src) %{ +instruct Repl8B_reg(regD dst, regD src) %{ predicate(UseSSE>=2); match(Set dst (Replicate8B src)); format %{ "MOVDQA $dst,$src\n\t" "PUNPCKLBW $dst,$dst\n\t" "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} - ins_encode( pshufd_8x8(dst, src)); + ins_encode %{ + if ($dst$$reg != $src$$reg) { + __ movdqa($dst$$XMMRegister, $src$$XMMRegister); + } + __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} ins_pipe( pipe_slow ); %} // Replicate scalar to packed byte (1 byte) values in xmm -instruct Repl8B_eRegI(regXD dst, eRegI src) %{ +instruct Repl8B_eRegI(regD dst, eRegI src) %{ predicate(UseSSE>=2); match(Set dst (Replicate8B src)); format %{ "MOVD $dst,$src\n\t" "PUNPCKLBW $dst,$dst\n\t" "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} - ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst)); + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} ins_pipe( pipe_slow ); %} // Replicate scalar zero to packed byte (1 byte) values in xmm -instruct Repl8B_immI0(regXD dst, immI0 zero) %{ +instruct Repl8B_immI0(regD dst, immI0 zero) %{ predicate(UseSSE>=2); match(Set dst (Replicate8B zero)); format %{ "PXOR $dst,$dst\t! replicate8B" %} - ins_encode( pxor(dst, dst)); + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} ins_pipe( fpu_reg_reg ); %} // Replicate scalar to packed shore (2 byte) values in xmm -instruct Repl4S_reg(regXD dst, regXD src) %{ +instruct Repl4S_reg(regD dst, regD src) %{ predicate(UseSSE>=2); match(Set dst (Replicate4S src)); format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %} - ins_encode( pshufd_4x16(dst, src)); + ins_encode %{ + __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00); + %} ins_pipe( fpu_reg_reg ); %} // Replicate scalar to packed shore (2 byte) values in xmm -instruct Repl4S_eRegI(regXD dst, eRegI src) %{ +instruct Repl4S_eRegI(regD dst, eRegI src) %{ predicate(UseSSE>=2); match(Set dst (Replicate4S src)); format %{ "MOVD $dst,$src\n\t" "PSHUFLW $dst,$dst,0x00\t! replicate4S" %} - ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst)); + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} ins_pipe( fpu_reg_reg ); %} // Replicate scalar zero to packed short (2 byte) values in xmm -instruct Repl4S_immI0(regXD dst, immI0 zero) %{ +instruct Repl4S_immI0(regD dst, immI0 zero) %{ predicate(UseSSE>=2); match(Set dst (Replicate4S zero)); format %{ "PXOR $dst,$dst\t! replicate4S" %} - ins_encode( pxor(dst, dst)); + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} ins_pipe( fpu_reg_reg ); %} // Replicate scalar to packed char (2 byte) values in xmm -instruct Repl4C_reg(regXD dst, regXD src) %{ +instruct Repl4C_reg(regD dst, regD src) %{ predicate(UseSSE>=2); match(Set dst (Replicate4C src)); format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %} - ins_encode( pshufd_4x16(dst, src)); + ins_encode %{ + __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00); + %} ins_pipe( fpu_reg_reg ); %} // Replicate scalar to packed char (2 byte) values in xmm -instruct Repl4C_eRegI(regXD dst, eRegI src) %{ +instruct Repl4C_eRegI(regD dst, eRegI src) %{ predicate(UseSSE>=2); match(Set dst (Replicate4C src)); format %{ "MOVD $dst,$src\n\t" "PSHUFLW $dst,$dst,0x00\t! replicate4C" %} - ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst)); + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} ins_pipe( fpu_reg_reg ); %} // Replicate scalar zero to packed char (2 byte) values in xmm -instruct Repl4C_immI0(regXD dst, immI0 zero) %{ +instruct Repl4C_immI0(regD dst, immI0 zero) %{ predicate(UseSSE>=2); match(Set dst (Replicate4C zero)); format %{ "PXOR $dst,$dst\t! replicate4C" %} - ins_encode( pxor(dst, dst)); + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} ins_pipe( fpu_reg_reg ); %} // Replicate scalar to packed integer (4 byte) values in xmm -instruct Repl2I_reg(regXD dst, regXD src) %{ +instruct Repl2I_reg(regD dst, regD src) %{ predicate(UseSSE>=2); match(Set dst (Replicate2I src)); format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %} - ins_encode( pshufd(dst, src, 0x00)); + ins_encode %{ + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); + %} ins_pipe( fpu_reg_reg ); %} // Replicate scalar to packed integer (4 byte) values in xmm -instruct Repl2I_eRegI(regXD dst, eRegI src) %{ +instruct Repl2I_eRegI(regD dst, eRegI src) %{ predicate(UseSSE>=2); match(Set dst (Replicate2I src)); format %{ "MOVD $dst,$src\n\t" "PSHUFD $dst,$dst,0x00\t! replicate2I" %} - ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00)); + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} ins_pipe( fpu_reg_reg ); %} // Replicate scalar zero to packed integer (2 byte) values in xmm -instruct Repl2I_immI0(regXD dst, immI0 zero) %{ +instruct Repl2I_immI0(regD dst, immI0 zero) %{ predicate(UseSSE>=2); match(Set dst (Replicate2I zero)); format %{ "PXOR $dst,$dst\t! replicate2I" %} - ins_encode( pxor(dst, dst)); + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} ins_pipe( fpu_reg_reg ); %} // Replicate scalar to packed single precision floating point values in xmm -instruct Repl2F_reg(regXD dst, regXD src) %{ +instruct Repl2F_reg(regD dst, regD src) %{ predicate(UseSSE>=2); match(Set dst (Replicate2F src)); format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} - ins_encode( pshufd(dst, src, 0xe0)); + ins_encode %{ + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0); + %} ins_pipe( fpu_reg_reg ); %} // Replicate scalar to packed single precision floating point values in xmm -instruct Repl2F_regX(regXD dst, regX src) %{ +instruct Repl2F_regF(regD dst, regF src) %{ predicate(UseSSE>=2); match(Set dst (Replicate2F src)); format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} - ins_encode( pshufd(dst, src, 0xe0)); + ins_encode %{ + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0); + %} ins_pipe( fpu_reg_reg ); %} // Replicate scalar to packed single precision floating point values in xmm -instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{ +instruct Repl2F_immF0(regD dst, immF0 zero) %{ predicate(UseSSE>=2); match(Set dst (Replicate2F zero)); format %{ "PXOR $dst,$dst\t! replicate2F" %} - ins_encode( pxor(dst, dst)); + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} ins_pipe( fpu_reg_reg ); %} @@ -12702,7 +12061,7 @@ %} instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, - eAXRegI result, regXD tmp1, eFlagsReg cr) %{ + eAXRegI result, regD tmp1, eFlagsReg cr) %{ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); @@ -12717,7 +12076,7 @@ // fast string equals instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, - regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ + regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ match(Set result (StrEquals (Binary str1 str2) cnt)); effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); @@ -12732,7 +12091,7 @@ // fast search of substring with known size. instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, - eBXRegI result, regXD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ + eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ predicate(UseSSE42Intrinsics); match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); @@ -12759,7 +12118,7 @@ %} instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, - eBXRegI result, regXD vec, eCXRegI tmp, eFlagsReg cr) %{ + eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ predicate(UseSSE42Intrinsics); match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); @@ -12776,7 +12135,7 @@ // fast array equals instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result, - regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) + regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) %{ match(Set result (AryEq ary1 ary2)); effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); @@ -13602,27 +12961,36 @@ %} // Compare 2 longs and CMOVE doubles -instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ +instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ - fcmovD_regS(cmp,flags,dst,src); + fcmovDPR_regS(cmp,flags,dst,src); %} %} // Compare 2 longs and CMOVE doubles -instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{ +instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ - fcmovXD_regS(cmp,flags,dst,src); + fcmovD_regS(cmp,flags,dst,src); + %} +%} + +instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ + predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); + match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); + ins_cost(200); + expand %{ + fcmovFPR_regS(cmp,flags,dst,src); %} %} instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ - predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); + predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ @@ -13630,15 +12998,6 @@ %} %} -instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{ - predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); - match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovX_regS(cmp,flags,dst,src); - %} -%} - //====== // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{ @@ -13730,27 +13089,36 @@ %} // Compare 2 longs and CMOVE doubles -instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ +instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ - fcmovD_regS(cmp,flags,dst,src); + fcmovDPR_regS(cmp,flags,dst,src); %} %} // Compare 2 longs and CMOVE doubles -instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{ +instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ - fcmovXD_regS(cmp,flags,dst,src); + fcmovD_regS(cmp,flags,dst,src); + %} +%} + +instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ + predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); + match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); + ins_cost(200); + expand %{ + fcmovFPR_regS(cmp,flags,dst,src); %} %} instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ - predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); + predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ @@ -13758,15 +13126,6 @@ %} %} -instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{ - predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); - match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovX_regS(cmp,flags,dst,src); - %} -%} - //====== // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. // Same as cmpL_reg_flags_LEGT except must negate src @@ -13863,27 +13222,37 @@ %} // Compare 2 longs and CMOVE doubles -instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ +instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ - fcmovD_regS(cmp,flags,dst,src); + fcmovDPR_regS(cmp,flags,dst,src); %} %} // Compare 2 longs and CMOVE doubles -instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{ +instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ - fcmovXD_regS(cmp,flags,dst,src); - %} -%} + fcmovD_regS(cmp,flags,dst,src); + %} +%} + +instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ + predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); + match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); + ins_cost(200); + expand %{ + fcmovFPR_regS(cmp,flags,dst,src); + %} +%} + instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ - predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); + predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ @@ -13892,16 +13261,6 @@ %} -instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{ - predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); - match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovX_regS(cmp,flags,dst,src); - %} -%} - - // ============================================================================ // Procedure Call/Return Instructions // Call Java Static Instruction diff -r 7faca6dfa2ed -r 8940fd98d540 src/cpu/x86/vm/x86_64.ad --- a/src/cpu/x86/vm/x86_64.ad Tue Dec 27 12:38:49 2011 -0800 +++ b/src/cpu/x86/vm/x86_64.ad Thu Dec 29 11:37:50 2011 -0800 @@ -552,7 +552,7 @@ #define __ _masm. static int preserve_SP_size() { - return LP64_ONLY(1 +) 2; // [rex,] op, rm(reg/reg) + return 3; // rex.w, op, rm(reg/reg) } // !!!!! Special hack to get all types of calls to specify the byte offset @@ -797,48 +797,35 @@ } } -void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc) -{ - if (dstenc != srcenc) { - if (dstenc < 8) { - if (srcenc >= 8) { - emit_opcode(cbuf, Assembler::REX_B); - srcenc -= 8; - } - } else { - if (srcenc < 8) { - emit_opcode(cbuf, Assembler::REX_R); - } else { - emit_opcode(cbuf, Assembler::REX_RB); - srcenc -= 8; - } - dstenc -= 8; - } - - emit_opcode(cbuf, 0x8B); - emit_rm(cbuf, 0x3, dstenc, srcenc); - } -} - -void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { - if( dst_encoding == src_encoding ) { - // reg-reg copy, use an empty encoding - } else { - MacroAssembler _masm(&cbuf); - - __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding)); - } -} - // This could be in MacroAssembler but it's fairly C2 specific void emit_cmpfp_fixup(MacroAssembler& _masm) { Label exit; __ jccb(Assembler::noParity, exit); __ pushf(); + // + // comiss/ucomiss instructions set ZF,PF,CF flags and + // zero OF,AF,SF for NaN values. + // Fixup flags by zeroing ZF,PF so that compare of NaN + // values returns 'less than' result (CF is set). + // Leave the rest of flags unchanged. + // + // 7 6 5 4 3 2 1 0 + // |S|Z|r|A|r|P|r|C| (r - reserved bit) + // 0 0 1 0 1 0 1 1 (0x2B) + // __ andq(Address(rsp, 0), 0xffffff2b); __ popf(); __ bind(exit); - __ nop(); // (target for branch to avoid branch to branch) +} + +void emit_cmpfp3(MacroAssembler& _masm, Register dst) { + Label done; + __ movl(dst, -1); + __ jcc(Assembler::parity, done); + __ jcc(Assembler::below, done); + __ setb(Assembler::notEqual, dst); + __ movzbl(dst, dst); + __ bind(done); } @@ -1274,16 +1261,8 @@ // 64-bit int offset = ra_->reg2offset(src_first); if (cbuf) { - emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66); - if (Matcher::_regEncode[dst_first] >= 8) { - emit_opcode(*cbuf, Assembler::REX_R); - } - emit_opcode(*cbuf, 0x0F); - emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12); - encode_RegMem(*cbuf, - Matcher::_regEncode[dst_first], - RSP_enc, 0x4, 0, offset, - false); + MacroAssembler _masm(cbuf); + __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); #ifndef PRODUCT } else if (!do_size) { st->print("%s %s, [rsp + #%d]\t# spill", @@ -1294,25 +1273,17 @@ } return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + - ((Matcher::_regEncode[dst_first] < 8) - ? 5 - : 6); // REX + ((Matcher::_regEncode[dst_first] >= 8) + ? 6 + : (5 + ((UseAVX>0)?1:0))); // REX } else { // 32-bit assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); int offset = ra_->reg2offset(src_first); if (cbuf) { - emit_opcode(*cbuf, 0xF3); - if (Matcher::_regEncode[dst_first] >= 8) { - emit_opcode(*cbuf, Assembler::REX_R); - } - emit_opcode(*cbuf, 0x0F); - emit_opcode(*cbuf, 0x10); - encode_RegMem(*cbuf, - Matcher::_regEncode[dst_first], - RSP_enc, 0x4, 0, offset, - false); + MacroAssembler _masm(cbuf); + __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); #ifndef PRODUCT } else if (!do_size) { st->print("movss %s, [rsp + #%d]\t# spill", @@ -1322,9 +1293,9 @@ } return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + - ((Matcher::_regEncode[dst_first] < 8) - ? 5 - : 6); // REX + ((Matcher::_regEncode[dst_first] >= 8) + ? 6 + : (5 + ((UseAVX>0)?1:0))); // REX } } } else if (src_first_rc == rc_int) { @@ -1450,25 +1421,8 @@ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { // 64-bit if (cbuf) { - emit_opcode(*cbuf, 0x66); - if (Matcher::_regEncode[dst_first] < 8) { - if (Matcher::_regEncode[src_first] < 8) { - emit_opcode(*cbuf, Assembler::REX_W); - } else { - emit_opcode(*cbuf, Assembler::REX_WB); - } - } else { - if (Matcher::_regEncode[src_first] < 8) { - emit_opcode(*cbuf, Assembler::REX_WR); - } else { - emit_opcode(*cbuf, Assembler::REX_WRB); - } - } - emit_opcode(*cbuf, 0x0F); - emit_opcode(*cbuf, 0x6E); - emit_rm(*cbuf, 0x3, - Matcher::_regEncode[dst_first] & 7, - Matcher::_regEncode[src_first] & 7); + MacroAssembler _masm(cbuf); + __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); #ifndef PRODUCT } else if (!do_size) { st->print("movdq %s, %s\t# spill", @@ -1482,23 +1436,8 @@ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); if (cbuf) { - emit_opcode(*cbuf, 0x66); - if (Matcher::_regEncode[dst_first] < 8) { - if (Matcher::_regEncode[src_first] >= 8) { - emit_opcode(*cbuf, Assembler::REX_B); - } - } else { - if (Matcher::_regEncode[src_first] < 8) { - emit_opcode(*cbuf, Assembler::REX_R); - } else { - emit_opcode(*cbuf, Assembler::REX_RB); - } - } - emit_opcode(*cbuf, 0x0F); - emit_opcode(*cbuf, 0x6E); - emit_rm(*cbuf, 0x3, - Matcher::_regEncode[dst_first] & 7, - Matcher::_regEncode[src_first] & 7); + MacroAssembler _masm(cbuf); + __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); #ifndef PRODUCT } else if (!do_size) { st->print("movdl %s, %s\t# spill", @@ -1507,9 +1446,9 @@ #endif } return - (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8) - ? 4 - : 5; // REX + (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8) + ? 5 + : (4 + ((UseAVX>0)?1:0)); // REX } } } else if (src_first_rc == rc_float) { @@ -1521,16 +1460,8 @@ // 64-bit int offset = ra_->reg2offset(dst_first); if (cbuf) { - emit_opcode(*cbuf, 0xF2); - if (Matcher::_regEncode[src_first] >= 8) { - emit_opcode(*cbuf, Assembler::REX_R); - } - emit_opcode(*cbuf, 0x0F); - emit_opcode(*cbuf, 0x11); - encode_RegMem(*cbuf, - Matcher::_regEncode[src_first], - RSP_enc, 0x4, 0, offset, - false); + MacroAssembler _masm(cbuf); + __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first])); #ifndef PRODUCT } else if (!do_size) { st->print("movsd [rsp + #%d], %s\t# spill", @@ -1540,25 +1471,17 @@ } return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + - ((Matcher::_regEncode[src_first] < 8) - ? 5 - : 6); // REX + ((Matcher::_regEncode[src_first] >= 8) + ? 6 + : (5 + ((UseAVX>0)?1:0))); // REX } else { // 32-bit assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); int offset = ra_->reg2offset(dst_first); if (cbuf) { - emit_opcode(*cbuf, 0xF3); - if (Matcher::_regEncode[src_first] >= 8) { - emit_opcode(*cbuf, Assembler::REX_R); - } - emit_opcode(*cbuf, 0x0F); - emit_opcode(*cbuf, 0x11); - encode_RegMem(*cbuf, - Matcher::_regEncode[src_first], - RSP_enc, 0x4, 0, offset, - false); + MacroAssembler _masm(cbuf); + __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first])); #ifndef PRODUCT } else if (!do_size) { st->print("movss [rsp + #%d], %s\t# spill", @@ -1568,9 +1491,9 @@ } return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + - ((Matcher::_regEncode[src_first] < 8) - ? 5 - : 6); // REX + ((Matcher::_regEncode[src_first] >=8) + ? 6 + : (5 + ((UseAVX>0)?1:0))); // REX } } else if (dst_first_rc == rc_int) { // xmm -> gpr @@ -1578,25 +1501,8 @@ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { // 64-bit if (cbuf) { - emit_opcode(*cbuf, 0x66); - if (Matcher::_regEncode[dst_first] < 8) { - if (Matcher::_regEncode[src_first] < 8) { - emit_opcode(*cbuf, Assembler::REX_W); - } else { - emit_opcode(*cbuf, Assembler::REX_WR); // attention! - } - } else { - if (Matcher::_regEncode[src_first] < 8) { - emit_opcode(*cbuf, Assembler::REX_WB); // attention! - } else { - emit_opcode(*cbuf, Assembler::REX_WRB); - } - } - emit_opcode(*cbuf, 0x0F); - emit_opcode(*cbuf, 0x7E); - emit_rm(*cbuf, 0x3, - Matcher::_regEncode[src_first] & 7, - Matcher::_regEncode[dst_first] & 7); + MacroAssembler _masm(cbuf); + __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first])); #ifndef PRODUCT } else if (!do_size) { st->print("movdq %s, %s\t# spill", @@ -1610,23 +1516,8 @@ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); if (cbuf) { - emit_opcode(*cbuf, 0x66); - if (Matcher::_regEncode[dst_first] < 8) { - if (Matcher::_regEncode[src_first] >= 8) { - emit_opcode(*cbuf, Assembler::REX_R); // attention! - } - } else { - if (Matcher::_regEncode[src_first] < 8) { - emit_opcode(*cbuf, Assembler::REX_B); // attention! - } else { - emit_opcode(*cbuf, Assembler::REX_RB); - } - } - emit_opcode(*cbuf, 0x0F); - emit_opcode(*cbuf, 0x7E); - emit_rm(*cbuf, 0x3, - Matcher::_regEncode[src_first] & 7, - Matcher::_regEncode[dst_first] & 7); + MacroAssembler _masm(cbuf); + __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first])); #ifndef PRODUCT } else if (!do_size) { st->print("movdl %s, %s\t# spill", @@ -1635,9 +1526,9 @@ #endif } return - (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8) - ? 4 - : 5; // REX + (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8) + ? 5 + : (4 + ((UseAVX>0)?1:0)); // REX } } else if (dst_first_rc == rc_float) { // xmm -> xmm @@ -1645,23 +1536,8 @@ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { // 64-bit if (cbuf) { - emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2); - if (Matcher::_regEncode[dst_first] < 8) { - if (Matcher::_regEncode[src_first] >= 8) { - emit_opcode(*cbuf, Assembler::REX_B); - } - } else { - if (Matcher::_regEncode[src_first] < 8) { - emit_opcode(*cbuf, Assembler::REX_R); - } else { - emit_opcode(*cbuf, Assembler::REX_RB); - } - } - emit_opcode(*cbuf, 0x0F); - emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10); - emit_rm(*cbuf, 0x3, - Matcher::_regEncode[dst_first] & 7, - Matcher::_regEncode[src_first] & 7); + MacroAssembler _masm(cbuf); + __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first])); #ifndef PRODUCT } else if (!do_size) { st->print("%s %s, %s\t# spill", @@ -1671,32 +1547,16 @@ #endif } return - (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8) - ? 4 - : 5; // REX + (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8) + ? 5 + : (4 + ((UseAVX>0)?1:0)); // REX } else { // 32-bit assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); if (cbuf) { - if (!UseXmmRegToRegMoveAll) - emit_opcode(*cbuf, 0xF3); - if (Matcher::_regEncode[dst_first] < 8) { - if (Matcher::_regEncode[src_first] >= 8) { - emit_opcode(*cbuf, Assembler::REX_B); - } - } else { - if (Matcher::_regEncode[src_first] < 8) { - emit_opcode(*cbuf, Assembler::REX_R); - } else { - emit_opcode(*cbuf, Assembler::REX_RB); - } - } - emit_opcode(*cbuf, 0x0F); - emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10); - emit_rm(*cbuf, 0x3, - Matcher::_regEncode[dst_first] & 7, - Matcher::_regEncode[src_first] & 7); + MacroAssembler _masm(cbuf); + __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first])); #ifndef PRODUCT } else if (!do_size) { st->print("%s %s, %s\t# spill", @@ -1705,10 +1565,10 @@ Matcher::regName[src_first]); #endif } - return - (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8) - ? (UseXmmRegToRegMoveAll ? 3 : 4) - : (UseXmmRegToRegMoveAll ? 4 : 5); // REX + return ((UseAVX>0) ? 5: + ((Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8) + ? (UseXmmRegToRegMoveAll ? 4 : 5) + : (UseXmmRegToRegMoveAll ? 3 : 4))); // REX } } } @@ -2205,47 +2065,6 @@ emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7); %} - enc_class cmpfp_fixup() %{ - MacroAssembler _masm(&cbuf); - emit_cmpfp_fixup(_masm); - %} - - enc_class cmpfp3(rRegI dst) - %{ - int dstenc = $dst$$reg; - - // movl $dst, -1 - if (dstenc >= 8) { - emit_opcode(cbuf, Assembler::REX_B); - } - emit_opcode(cbuf, 0xB8 | (dstenc & 7)); - emit_d32(cbuf, -1); - - // jp,s done - emit_opcode(cbuf, 0x7A); - emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A); - - // jb,s done - emit_opcode(cbuf, 0x72); - emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08); - - // setne $dst - if (dstenc >= 4) { - emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B); - } - emit_opcode(cbuf, 0x0F); - emit_opcode(cbuf, 0x95); - emit_opcode(cbuf, 0xC0 | (dstenc & 7)); - - // movzbl $dst, $dst - if (dstenc >= 4) { - emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB); - } - emit_opcode(cbuf, 0x0F); - emit_opcode(cbuf, 0xB6); - emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7); - %} - enc_class cdql_enc(no_rax_rdx_RegI div) %{ // Full implementation of Java idiv and irem; checks for @@ -2472,55 +2291,6 @@ emit_cc(cbuf, $secondary, $cop$$cmpcode); %} - enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src) - %{ - // Invert sense of branch from sense of cmov - emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1); - emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8) - ? (UseXmmRegToRegMoveAll ? 3 : 4) - : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX - // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src) - if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3); - if ($dst$$reg < 8) { - if ($src$$reg >= 8) { - emit_opcode(cbuf, Assembler::REX_B); - } - } else { - if ($src$$reg < 8) { - emit_opcode(cbuf, Assembler::REX_R); - } else { - emit_opcode(cbuf, Assembler::REX_RB); - } - } - emit_opcode(cbuf, 0x0F); - emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10); - emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7); - %} - - enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src) - %{ - // Invert sense of branch from sense of cmov - emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1); - emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX - - // UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src) - emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2); - if ($dst$$reg < 8) { - if ($src$$reg >= 8) { - emit_opcode(cbuf, Assembler::REX_B); - } - } else { - if ($src$$reg < 8) { - emit_opcode(cbuf, Assembler::REX_R); - } else { - emit_opcode(cbuf, Assembler::REX_RB); - } - } - emit_opcode(cbuf, 0x0F); - emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10); - emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7); - %} - enc_class enc_PartialSubtypeCheck() %{ Register Rrdi = as_Register(RDI_enc); // result register @@ -2751,68 +2521,6 @@ } %} - // Encode a reg-reg copy. If it is useless, then empty encoding. - enc_class enc_copy(rRegI dst, rRegI src) - %{ - encode_copy(cbuf, $dst$$reg, $src$$reg); - %} - - // Encode xmm reg-reg copy. If it is useless, then empty encoding. - enc_class enc_CopyXD( RegD dst, RegD src ) %{ - encode_CopyXD( cbuf, $dst$$reg, $src$$reg ); - %} - - enc_class enc_copy_always(rRegI dst, rRegI src) - %{ - int srcenc = $src$$reg; - int dstenc = $dst$$reg; - - if (dstenc < 8) { - if (srcenc >= 8) { - emit_opcode(cbuf, Assembler::REX_B); - srcenc -= 8; - } - } else { - if (srcenc < 8) { - emit_opcode(cbuf, Assembler::REX_R); - } else { - emit_opcode(cbuf, Assembler::REX_RB); - srcenc -= 8; - } - dstenc -= 8; - } - - emit_opcode(cbuf, 0x8B); - emit_rm(cbuf, 0x3, dstenc, srcenc); - %} - - enc_class enc_copy_wide(rRegL dst, rRegL src) - %{ - int srcenc = $src$$reg; - int dstenc = $dst$$reg; - - if (dstenc != srcenc) { - if (dstenc < 8) { - if (srcenc < 8) { - emit_opcode(cbuf, Assembler::REX_W); - } else { - emit_opcode(cbuf, Assembler::REX_WB); - srcenc -= 8; - } - } else { - if (srcenc < 8) { - emit_opcode(cbuf, Assembler::REX_WR); - } else { - emit_opcode(cbuf, Assembler::REX_WRB); - srcenc -= 8; - } - dstenc -= 8; - } - emit_opcode(cbuf, 0x8B); - emit_rm(cbuf, 0x3, dstenc, srcenc); - } - %} - enc_class Con32(immI src) %{ // Output immediate @@ -3212,92 +2920,19 @@ %} enc_class Push_ResultXD(regD dst) %{ - int dstenc = $dst$$reg; - - store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP] - - // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp] - emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66); - if (dstenc >= 8) { - emit_opcode(cbuf, Assembler::REX_R); - } - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 ); - encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false); - - // add rsp,8 - emit_opcode(cbuf, Assembler::REX_W); - emit_opcode(cbuf,0x83); - emit_rm(cbuf,0x3, 0x0, RSP_enc); - emit_d8(cbuf,0x08); + MacroAssembler _masm(&cbuf); + __ fstp_d(Address(rsp, 0)); + __ movdbl($dst$$XMMRegister, Address(rsp, 0)); + __ addptr(rsp, 8); %} enc_class Push_SrcXD(regD src) %{ - int srcenc = $src$$reg; - - // subq rsp,#8 - emit_opcode(cbuf, Assembler::REX_W); - emit_opcode(cbuf, 0x83); - emit_rm(cbuf, 0x3, 0x5, RSP_enc); - emit_d8(cbuf, 0x8); - - // movsd [rsp],src - emit_opcode(cbuf, 0xF2); - if (srcenc >= 8) { - emit_opcode(cbuf, Assembler::REX_R); - } - emit_opcode(cbuf, 0x0F); - emit_opcode(cbuf, 0x11); - encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); - - // fldd [rsp] - emit_opcode(cbuf, 0x66); - emit_opcode(cbuf, 0xDD); - encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false); - %} - - - enc_class movq_ld(regD dst, memory mem) %{ MacroAssembler _masm(&cbuf); - __ movq($dst$$XMMRegister, $mem$$Address); - %} - - enc_class movq_st(memory mem, regD src) %{ - MacroAssembler _masm(&cbuf); - __ movq($mem$$Address, $src$$XMMRegister); - %} - - enc_class pshufd_8x8(regF dst, regF src) %{ - MacroAssembler _masm(&cbuf); - - encode_CopyXD(cbuf, $dst$$reg, $src$$reg); - __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg)); - __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00); - %} - - enc_class pshufd_4x16(regF dst, regF src) %{ - MacroAssembler _masm(&cbuf); - - __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00); - %} - - enc_class pshufd(regD dst, regD src, int mode) %{ - MacroAssembler _masm(&cbuf); - - __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode); - %} - - enc_class pxor(regD dst, regD src) %{ - MacroAssembler _masm(&cbuf); - - __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg)); - %} - - enc_class mov_i2x(regD dst, rRegI src) %{ - MacroAssembler _masm(&cbuf); - - __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg)); - %} + __ subptr(rsp, 8); + __ movdbl(Address(rsp, 0), $src$$XMMRegister); + __ fld_d(Address(rsp, 0)); + %} + // obj: object to lock // box: box address (header location) -- killed @@ -3534,303 +3169,6 @@ RELOC_DISP32); %} - enc_class absF_encoding(regF dst) - %{ - int dstenc = $dst$$reg; - address signmask_address = (address) StubRoutines::x86::float_sign_mask(); - - cbuf.set_insts_mark(); - if (dstenc >= 8) { - emit_opcode(cbuf, Assembler::REX_R); - dstenc -= 8; - } - // XXX reg_mem doesn't support RIP-relative addressing yet - emit_opcode(cbuf, 0x0F); - emit_opcode(cbuf, 0x54); - emit_rm(cbuf, 0x0, dstenc, 0x5); // 00 reg 101 - emit_d32_reloc(cbuf, signmask_address); - %} - - enc_class absD_encoding(regD dst) - %{ - int dstenc = $dst$$reg; - address signmask_address = (address) StubRoutines::x86::double_sign_mask(); - - cbuf.set_insts_mark(); - emit_opcode(cbuf, 0x66); - if (dstenc >= 8) { - emit_opcode(cbuf, Assembler::REX_R); - dstenc -= 8; - } - // XXX reg_mem doesn't support RIP-relative addressing yet - emit_opcode(cbuf, 0x0F); - emit_opcode(cbuf, 0x54); - emit_rm(cbuf, 0x0, dstenc, 0x5); // 00 reg 101 - emit_d32_reloc(cbuf, signmask_address); - %} - - enc_class negF_encoding(regF dst) - %{ - int dstenc = $dst$$reg; - address signflip_address = (address) StubRoutines::x86::float_sign_flip(); - - cbuf.set_insts_mark(); - if (dstenc >= 8) { - emit_opcode(cbuf, Assembler::REX_R); - dstenc -= 8; - } - // XXX reg_mem doesn't support RIP-relative addressing yet - emit_opcode(cbuf, 0x0F); - emit_opcode(cbuf, 0x57); - emit_rm(cbuf, 0x0, dstenc, 0x5); // 00 reg 101 - emit_d32_reloc(cbuf, signflip_address); - %} - - enc_class negD_encoding(regD dst) - %{ - int dstenc = $dst$$reg; - address signflip_address = (address) StubRoutines::x86::double_sign_flip(); - - cbuf.set_insts_mark(); - emit_opcode(cbuf, 0x66); - if (dstenc >= 8) { - emit_opcode(cbuf, Assembler::REX_R); - dstenc -= 8; - } - // XXX reg_mem doesn't support RIP-relative addressing yet - emit_opcode(cbuf, 0x0F); - emit_opcode(cbuf, 0x57); - emit_rm(cbuf, 0x0, dstenc, 0x5); // 00 reg 101 - emit_d32_reloc(cbuf, signflip_address); - %} - - enc_class f2i_fixup(rRegI dst, regF src) - %{ - int dstenc = $dst$$reg; - int srcenc = $src$$reg; - - // cmpl $dst, #0x80000000 - if (dstenc >= 8) { - emit_opcode(cbuf, Assembler::REX_B); - } - emit_opcode(cbuf, 0x81); - emit_rm(cbuf, 0x3, 0x7, dstenc & 7); - emit_d32(cbuf, 0x80000000); - - // jne,s done - emit_opcode(cbuf, 0x75); - if (srcenc < 8 && dstenc < 8) { - emit_d8(cbuf, 0xF); - } else if (srcenc >= 8 && dstenc >= 8) { - emit_d8(cbuf, 0x11); - } else { - emit_d8(cbuf, 0x10); - } - - // subq rsp, #8 - emit_opcode(cbuf, Assembler::REX_W); - emit_opcode(cbuf, 0x83); - emit_rm(cbuf, 0x3, 0x5, RSP_enc); - emit_d8(cbuf, 8); - - // movss [rsp], $src - emit_opcode(cbuf, 0xF3); - if (srcenc >= 8) { - emit_opcode(cbuf, Assembler::REX_R); - } - emit_opcode(cbuf, 0x0F); - emit_opcode(cbuf, 0x11); - encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes - - // call f2i_fixup - cbuf.set_insts_mark(); - emit_opcode(cbuf, 0xE8); - emit_d32_reloc(cbuf, - (int) - (StubRoutines::x86::f2i_fixup() - cbuf.insts_end() - 4), - runtime_call_Relocation::spec(), - RELOC_DISP32); - - // popq $dst - if (dstenc >= 8) { - emit_opcode(cbuf, Assembler::REX_B); - } - emit_opcode(cbuf, 0x58 | (dstenc & 7)); - - // done: - %} - - enc_class f2l_fixup(rRegL dst, regF src) - %{ - int dstenc = $dst$$reg; - int srcenc = $src$$reg; - address const_address = (address) StubRoutines::x86::double_sign_flip(); - - // cmpq $dst, [0x8000000000000000] - cbuf.set_insts_mark(); - emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR); - emit_opcode(cbuf, 0x39); - // XXX reg_mem doesn't support RIP-relative addressing yet - emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101 - emit_d32_reloc(cbuf, const_address); - - - // jne,s done - emit_opcode(cbuf, 0x75); - if (srcenc < 8 && dstenc < 8) { - emit_d8(cbuf, 0xF); - } else if (srcenc >= 8 && dstenc >= 8) { - emit_d8(cbuf, 0x11); - } else { - emit_d8(cbuf, 0x10); - } - - // subq rsp, #8 - emit_opcode(cbuf, Assembler::REX_W); - emit_opcode(cbuf, 0x83); - emit_rm(cbuf, 0x3, 0x5, RSP_enc); - emit_d8(cbuf, 8); - - // movss [rsp], $src - emit_opcode(cbuf, 0xF3); - if (srcenc >= 8) { - emit_opcode(cbuf, Assembler::REX_R); - } - emit_opcode(cbuf, 0x0F); - emit_opcode(cbuf, 0x11); - encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes - - // call f2l_fixup - cbuf.set_insts_mark(); - emit_opcode(cbuf, 0xE8); - emit_d32_reloc(cbuf, - (int) - (StubRoutines::x86::f2l_fixup() - cbuf.insts_end() - 4), - runtime_call_Relocation::spec(), - RELOC_DISP32); - - // popq $dst - if (dstenc >= 8) { - emit_opcode(cbuf, Assembler::REX_B); - } - emit_opcode(cbuf, 0x58 | (dstenc & 7)); - - // done: - %} - - enc_class d2i_fixup(rRegI dst, regD src) - %{ - int dstenc = $dst$$reg; - int srcenc = $src$$reg; - - // cmpl $dst, #0x80000000 - if (dstenc >= 8) { - emit_opcode(cbuf, Assembler::REX_B); - } - emit_opcode(cbuf, 0x81); - emit_rm(cbuf, 0x3, 0x7, dstenc & 7); - emit_d32(cbuf, 0x80000000); - - // jne,s done - emit_opcode(cbuf, 0x75); - if (srcenc < 8 && dstenc < 8) { - emit_d8(cbuf, 0xF); - } else if (srcenc >= 8 && dstenc >= 8) { - emit_d8(cbuf, 0x11); - } else { - emit_d8(cbuf, 0x10); - } - - // subq rsp, #8 - emit_opcode(cbuf, Assembler::REX_W); - emit_opcode(cbuf, 0x83); - emit_rm(cbuf, 0x3, 0x5, RSP_enc); - emit_d8(cbuf, 8); - - // movsd [rsp], $src - emit_opcode(cbuf, 0xF2); - if (srcenc >= 8) { - emit_opcode(cbuf, Assembler::REX_R); - } - emit_opcode(cbuf, 0x0F); - emit_opcode(cbuf, 0x11); - encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes - - // call d2i_fixup - cbuf.set_insts_mark(); - emit_opcode(cbuf, 0xE8); - emit_d32_reloc(cbuf, - (int) - (StubRoutines::x86::d2i_fixup() - cbuf.insts_end() - 4), - runtime_call_Relocation::spec(), - RELOC_DISP32); - - // popq $dst - if (dstenc >= 8) { - emit_opcode(cbuf, Assembler::REX_B); - } - emit_opcode(cbuf, 0x58 | (dstenc & 7)); - - // done: - %} - - enc_class d2l_fixup(rRegL dst, regD src) - %{ - int dstenc = $dst$$reg; - int srcenc = $src$$reg; - address const_address = (address) StubRoutines::x86::double_sign_flip(); - - // cmpq $dst, [0x8000000000000000] - cbuf.set_insts_mark(); - emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR); - emit_opcode(cbuf, 0x39); - // XXX reg_mem doesn't support RIP-relative addressing yet - emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101 - emit_d32_reloc(cbuf, const_address); - - - // jne,s done - emit_opcode(cbuf, 0x75); - if (srcenc < 8 && dstenc < 8) { - emit_d8(cbuf, 0xF); - } else if (srcenc >= 8 && dstenc >= 8) { - emit_d8(cbuf, 0x11); - } else { - emit_d8(cbuf, 0x10); - } - - // subq rsp, #8 - emit_opcode(cbuf, Assembler::REX_W); - emit_opcode(cbuf, 0x83); - emit_rm(cbuf, 0x3, 0x5, RSP_enc); - emit_d8(cbuf, 8); - - // movsd [rsp], $src - emit_opcode(cbuf, 0xF2); - if (srcenc >= 8) { - emit_opcode(cbuf, Assembler::REX_R); - } - emit_opcode(cbuf, 0x0F); - emit_opcode(cbuf, 0x11); - encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes - - // call d2l_fixup - cbuf.set_insts_mark(); - emit_opcode(cbuf, 0xE8); - emit_d32_reloc(cbuf, - (int) - (StubRoutines::x86::d2l_fixup() - cbuf.insts_end() - 4), - runtime_call_Relocation::spec(), - RELOC_DISP32); - - // popq $dst - if (dstenc >= 8) { - emit_opcode(cbuf, Assembler::REX_B); - } - emit_opcode(cbuf, 0x58 | (dstenc & 7)); - - // done: - %} %} @@ -6156,8 +5494,9 @@ ins_cost(145); // XXX format %{ "movss $dst, $mem\t# float" %} - opcode(0xF3, 0x0F, 0x10); - ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem)); + ins_encode %{ + __ movflt($dst$$XMMRegister, $mem$$Address); + %} ins_pipe(pipe_slow); // XXX %} @@ -6169,8 +5508,9 @@ ins_cost(145); // XXX format %{ "movlpd $dst, $mem\t# double" %} - opcode(0x66, 0x0F, 0x12); - ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem)); + ins_encode %{ + __ movdbl($dst$$XMMRegister, $mem$$Address); + %} ins_pipe(pipe_slow); // XXX %} @@ -6181,8 +5521,9 @@ ins_cost(145); // XXX format %{ "movsd $dst, $mem\t# double" %} - opcode(0xF2, 0x0F, 0x10); - ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem)); + ins_encode %{ + __ movdbl($dst$$XMMRegister, $mem$$Address); + %} ins_pipe(pipe_slow); // XXX %} @@ -6191,7 +5532,9 @@ match(Set dst (Load8B mem)); ins_cost(125); format %{ "MOVQ $dst,$mem\t! packed8B" %} - ins_encode( movq_ld(dst, mem)); + ins_encode %{ + __ movq($dst$$XMMRegister, $mem$$Address); + %} ins_pipe( pipe_slow ); %} @@ -6200,7 +5543,9 @@ match(Set dst (Load4S mem)); ins_cost(125); format %{ "MOVQ $dst,$mem\t! packed4S" %} - ins_encode( movq_ld(dst, mem)); + ins_encode %{ + __ movq($dst$$XMMRegister, $mem$$Address); + %} ins_pipe( pipe_slow ); %} @@ -6209,7 +5554,9 @@ match(Set dst (Load4C mem)); ins_cost(125); format %{ "MOVQ $dst,$mem\t! packed4C" %} - ins_encode( movq_ld(dst, mem)); + ins_encode %{ + __ movq($dst$$XMMRegister, $mem$$Address); + %} ins_pipe( pipe_slow ); %} @@ -6218,16 +5565,20 @@ match(Set dst (Load2I mem)); ins_cost(125); format %{ "MOVQ $dst,$mem\t! packed2I" %} - ins_encode( movq_ld(dst, mem)); + ins_encode %{ + __ movq($dst$$XMMRegister, $mem$$Address); + %} ins_pipe( pipe_slow ); %} // Load Aligned Packed Single to XMM instruct loadA2F(regD dst, memory mem) %{ match(Set dst (Load2F mem)); - ins_cost(145); + ins_cost(125); format %{ "MOVQ $dst,$mem\t! packed2F" %} - ins_encode( movq_ld(dst, mem)); + ins_encode %{ + __ movq($dst$$XMMRegister, $mem$$Address); + %} ins_pipe( pipe_slow ); %} @@ -6540,8 +5891,9 @@ ins_cost(100); format %{ "xorps $dst, $dst\t# float 0.0" %} - opcode(0x0F, 0x57); - ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst)); + ins_encode %{ + __ xorps($dst$$XMMRegister, $dst$$XMMRegister); + %} ins_pipe(pipe_slow); %} @@ -6562,8 +5914,9 @@ ins_cost(100); format %{ "xorpd $dst, $dst\t# double 0.0" %} - opcode(0x66, 0x0F, 0x57); - ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst)); + ins_encode %{ + __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); + %} ins_pipe(pipe_slow); %} @@ -6606,8 +5959,9 @@ ins_cost(125); format %{ "movss $dst, $src\t# float stk" %} - opcode(0xF3, 0x0F, 0x10); - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); + ins_encode %{ + __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); + %} ins_pipe(pipe_slow); // XXX %} @@ -6972,7 +6326,9 @@ match(Set mem (Store8B mem src)); ins_cost(145); format %{ "MOVQ $mem,$src\t! packed8B" %} - ins_encode( movq_st(mem, src)); + ins_encode %{ + __ movq($mem$$Address, $src$$XMMRegister); + %} ins_pipe( pipe_slow ); %} @@ -6981,7 +6337,9 @@ match(Set mem (Store4C mem src)); ins_cost(145); format %{ "MOVQ $mem,$src\t! packed4C" %} - ins_encode( movq_st(mem, src)); + ins_encode %{ + __ movq($mem$$Address, $src$$XMMRegister); + %} ins_pipe( pipe_slow ); %} @@ -6990,7 +6348,9 @@ match(Set mem (Store2I mem src)); ins_cost(145); format %{ "MOVQ $mem,$src\t! packed2I" %} - ins_encode( movq_st(mem, src)); + ins_encode %{ + __ movq($mem$$Address, $src$$XMMRegister); + %} ins_pipe( pipe_slow ); %} @@ -7024,7 +6384,9 @@ match(Set mem (Store2F mem src)); ins_cost(145); format %{ "MOVQ $mem,$src\t! packed2F" %} - ins_encode( movq_st(mem, src)); + ins_encode %{ + __ movq($mem$$Address, $src$$XMMRegister); + %} ins_pipe( pipe_slow ); %} @@ -7035,8 +6397,9 @@ ins_cost(95); // XXX format %{ "movss $mem, $src\t# float" %} - opcode(0xF3, 0x0F, 0x11); - ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem)); + ins_encode %{ + __ movflt($mem$$Address, $src$$XMMRegister); + %} ins_pipe(pipe_slow); // XXX %} @@ -7072,8 +6435,9 @@ ins_cost(95); // XXX format %{ "movsd $mem, $src\t# double" %} - opcode(0xF2, 0x0F, 0x11); - ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem)); + ins_encode %{ + __ movdbl($mem$$Address, $src$$XMMRegister); + %} ins_pipe(pipe_slow); // XXX %} @@ -7142,8 +6506,9 @@ ins_cost(95); // XXX format %{ "movss $dst, $src\t# float stk" %} - opcode(0xF3, 0x0F, 0x11); - ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst)); + ins_encode %{ + __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); + %} ins_pipe(pipe_slow); // XXX %} @@ -7153,8 +6518,9 @@ ins_cost(95); // XXX format %{ "movsd $dst, $src\t# double stk" %} - opcode(0xF2, 0x0F, 0x11); - ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst)); + ins_encode %{ + __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); + %} ins_pipe(pipe_slow); // XXX %} @@ -7444,6 +6810,16 @@ ins_pipe(empty); %} +instruct membar_storestore() %{ + match(MemBarStoreStore); + ins_cost(0); + + size(0); + format %{ "MEMBAR-storestore (empty encoding)" %} + ins_encode( ); + ins_pipe(empty); +%} + //----------Move Instructions-------------------------------------------------- instruct castX2P(rRegP dst, rRegL src) @@ -7451,7 +6827,11 @@ match(Set dst (CastX2P src)); format %{ "movq $dst, $src\t# long->ptr" %} - ins_encode(enc_copy_wide(dst, src)); + ins_encode %{ + if ($dst$$reg != $src$$reg) { + __ movptr($dst$$Register, $src$$Register); + } + %} ins_pipe(ialu_reg_reg); // XXX %} @@ -7460,7 +6840,11 @@ match(Set dst (CastP2X src)); format %{ "movq $dst, $src\t# ptr -> long" %} - ins_encode(enc_copy_wide(dst, src)); + ins_encode %{ + if ($dst$$reg != $src$$reg) { + __ movptr($dst$$Register, $src$$Register); + } + %} ins_pipe(ialu_reg_reg); // XXX %} @@ -7813,7 +7197,13 @@ format %{ "jn$cop skip\t# signed cmove float\n\t" "movss $dst, $src\n" "skip:" %} - ins_encode(enc_cmovf_branch(cop, dst, src)); + ins_encode %{ + Label Lskip; + // Invert sense of branch from sense of CMOV + __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); + __ movflt($dst$$XMMRegister, $src$$XMMRegister); + __ bind(Lskip); + %} ins_pipe(pipe_slow); %} @@ -7837,7 +7227,13 @@ format %{ "jn$cop skip\t# unsigned cmove float\n\t" "movss $dst, $src\n" "skip:" %} - ins_encode(enc_cmovf_branch(cop, dst, src)); + ins_encode %{ + Label Lskip; + // Invert sense of branch from sense of CMOV + __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); + __ movflt($dst$$XMMRegister, $src$$XMMRegister); + __ bind(Lskip); + %} ins_pipe(pipe_slow); %} @@ -7857,7 +7253,13 @@ format %{ "jn$cop skip\t# signed cmove double\n\t" "movsd $dst, $src\n" "skip:" %} - ins_encode(enc_cmovd_branch(cop, dst, src)); + ins_encode %{ + Label Lskip; + // Invert sense of branch from sense of CMOV + __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); + __ movdbl($dst$$XMMRegister, $src$$XMMRegister); + __ bind(Lskip); + %} ins_pipe(pipe_slow); %} @@ -7869,7 +7271,13 @@ format %{ "jn$cop skip\t# unsigned cmove double\n\t" "movsd $dst, $src\n" "skip:" %} - ins_encode(enc_cmovd_branch(cop, dst, src)); + ins_encode %{ + Label Lskip; + // Invert sense of branch from sense of CMOV + __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); + __ movdbl($dst$$XMMRegister, $src$$XMMRegister); + __ bind(Lskip); + %} ins_pipe(pipe_slow); %} @@ -10191,17 +9599,18 @@ "pushfq\t# saw NaN, set CF\n\t" "andq [rsp], #0xffffff2b\n\t" "popfq\n" - "exit: nop\t# avoid branch to branch" %} - opcode(0x0F, 0x2E); - ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2), - cmpfp_fixup); + "exit:" %} + ins_encode %{ + __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); + emit_cmpfp_fixup(_masm); + %} ins_pipe(pipe_slow); %} instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{ match(Set cr (CmpF src1 src2)); - ins_cost(145); + ins_cost(100); format %{ "ucomiss $src1, $src2" %} ins_encode %{ __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); @@ -10219,10 +9628,11 @@ "pushfq\t# saw NaN, set CF\n\t" "andq [rsp], #0xffffff2b\n\t" "popfq\n" - "exit: nop\t# avoid branch to branch" %} - opcode(0x0F, 0x2E); - ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2), - cmpfp_fixup); + "exit:" %} + ins_encode %{ + __ ucomiss($src1$$XMMRegister, $src2$$Address); + emit_cmpfp_fixup(_masm); + %} ins_pipe(pipe_slow); %} @@ -10231,8 +9641,9 @@ ins_cost(100); format %{ "ucomiss $src1, $src2" %} - opcode(0x0F, 0x2E); - ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2)); + ins_encode %{ + __ ucomiss($src1$$XMMRegister, $src2$$Address); + %} ins_pipe(pipe_slow); %} @@ -10245,7 +9656,7 @@ "pushfq\t# saw NaN, set CF\n\t" "andq [rsp], #0xffffff2b\n\t" "popfq\n" - "exit: nop\t# avoid branch to branch" %} + "exit:" %} ins_encode %{ __ ucomiss($src$$XMMRegister, $constantaddress($con)); emit_cmpfp_fixup(_masm); @@ -10273,10 +9684,11 @@ "pushfq\t# saw NaN, set CF\n\t" "andq [rsp], #0xffffff2b\n\t" "popfq\n" - "exit: nop\t# avoid branch to branch" %} - opcode(0x66, 0x0F, 0x2E); - ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2), - cmpfp_fixup); + "exit:" %} + ins_encode %{ + __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); + emit_cmpfp_fixup(_masm); + %} ins_pipe(pipe_slow); %} @@ -10301,10 +9713,11 @@ "pushfq\t# saw NaN, set CF\n\t" "andq [rsp], #0xffffff2b\n\t" "popfq\n" - "exit: nop\t# avoid branch to branch" %} - opcode(0x66, 0x0F, 0x2E); - ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2), - cmpfp_fixup); + "exit:" %} + ins_encode %{ + __ ucomisd($src1$$XMMRegister, $src2$$Address); + emit_cmpfp_fixup(_masm); + %} ins_pipe(pipe_slow); %} @@ -10313,8 +9726,9 @@ ins_cost(100); format %{ "ucomisd $src1, $src2" %} - opcode(0x66, 0x0F, 0x2E); - ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2)); + ins_encode %{ + __ ucomisd($src1$$XMMRegister, $src2$$Address); + %} ins_pipe(pipe_slow); %} @@ -10327,7 +9741,7 @@ "pushfq\t# saw NaN, set CF\n\t" "andq [rsp], #0xffffff2b\n\t" "popfq\n" - "exit: nop\t# avoid branch to branch" %} + "exit:" %} ins_encode %{ __ ucomisd($src$$XMMRegister, $constantaddress($con)); emit_cmpfp_fixup(_masm); @@ -10359,10 +9773,10 @@ "setne $dst\n\t" "movzbl $dst, $dst\n" "done:" %} - - opcode(0x0F, 0x2E); - ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2), - cmpfp3(dst)); + ins_encode %{ + __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); + emit_cmpfp3(_masm, $dst$$Register); + %} ins_pipe(pipe_slow); %} @@ -10380,10 +9794,10 @@ "setne $dst\n\t" "movzbl $dst, $dst\n" "done:" %} - - opcode(0x0F, 0x2E); - ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2), - cmpfp3(dst)); + ins_encode %{ + __ ucomiss($src1$$XMMRegister, $src2$$Address); + emit_cmpfp3(_masm, $dst$$Register); + %} ins_pipe(pipe_slow); %} @@ -10401,15 +9815,8 @@ "movzbl $dst, $dst\n" "done:" %} ins_encode %{ - Label L_done; - Register Rdst = $dst$$Register; __ ucomiss($src$$XMMRegister, $constantaddress($con)); - __ movl(Rdst, -1); - __ jcc(Assembler::parity, L_done); - __ jcc(Assembler::below, L_done); - __ setb(Assembler::notEqual, Rdst); - __ movzbl(Rdst, Rdst); - __ bind(L_done); + emit_cmpfp3(_masm, $dst$$Register); %} ins_pipe(pipe_slow); %} @@ -10428,10 +9835,10 @@ "setne $dst\n\t" "movzbl $dst, $dst\n" "done:" %} - - opcode(0x66, 0x0F, 0x2E); - ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2), - cmpfp3(dst)); + ins_encode %{ + __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); + emit_cmpfp3(_masm, $dst$$Register); + %} ins_pipe(pipe_slow); %} @@ -10449,10 +9856,10 @@ "setne $dst\n\t" "movzbl $dst, $dst\n" "done:" %} - - opcode(0x66, 0x0F, 0x2E); - ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2), - cmpfp3(dst)); + ins_encode %{ + __ ucomisd($src1$$XMMRegister, $src2$$Address); + emit_cmpfp3(_masm, $dst$$Register); + %} ins_pipe(pipe_slow); %} @@ -10470,374 +9877,9 @@ "movzbl $dst, $dst\n" "done:" %} ins_encode %{ - Register Rdst = $dst$$Register; - Label L_done; __ ucomisd($src$$XMMRegister, $constantaddress($con)); - __ movl(Rdst, -1); - __ jcc(Assembler::parity, L_done); - __ jcc(Assembler::below, L_done); - __ setb(Assembler::notEqual, Rdst); - __ movzbl(Rdst, Rdst); - __ bind(L_done); - %} - ins_pipe(pipe_slow); -%} - -instruct addF_reg(regF dst, regF src) -%{ - match(Set dst (AddF dst src)); - - format %{ "addss $dst, $src" %} - ins_cost(150); // XXX - opcode(0xF3, 0x0F, 0x58); - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); - ins_pipe(pipe_slow); -%} - -instruct addF_mem(regF dst, memory src) -%{ - match(Set dst (AddF dst (LoadF src))); - - format %{ "addss $dst, $src" %} - ins_cost(150); // XXX - opcode(0xF3, 0x0F, 0x58); - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); - ins_pipe(pipe_slow); -%} - -instruct addF_imm(regF dst, immF con) %{ - match(Set dst (AddF dst con)); - format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} - ins_cost(150); // XXX - ins_encode %{ - __ addss($dst$$XMMRegister, $constantaddress($con)); - %} - ins_pipe(pipe_slow); -%} - -instruct addD_reg(regD dst, regD src) -%{ - match(Set dst (AddD dst src)); - - format %{ "addsd $dst, $src" %} - ins_cost(150); // XXX - opcode(0xF2, 0x0F, 0x58); - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); - ins_pipe(pipe_slow); -%} - -instruct addD_mem(regD dst, memory src) -%{ - match(Set dst (AddD dst (LoadD src))); - - format %{ "addsd $dst, $src" %} - ins_cost(150); // XXX - opcode(0xF2, 0x0F, 0x58); - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); - ins_pipe(pipe_slow); -%} - -instruct addD_imm(regD dst, immD con) %{ - match(Set dst (AddD dst con)); - format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} - ins_cost(150); // XXX - ins_encode %{ - __ addsd($dst$$XMMRegister, $constantaddress($con)); - %} - ins_pipe(pipe_slow); -%} - -instruct subF_reg(regF dst, regF src) -%{ - match(Set dst (SubF dst src)); - - format %{ "subss $dst, $src" %} - ins_cost(150); // XXX - opcode(0xF3, 0x0F, 0x5C); - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); - ins_pipe(pipe_slow); -%} - -instruct subF_mem(regF dst, memory src) -%{ - match(Set dst (SubF dst (LoadF src))); - - format %{ "subss $dst, $src" %} - ins_cost(150); // XXX - opcode(0xF3, 0x0F, 0x5C); - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); - ins_pipe(pipe_slow); -%} - -instruct subF_imm(regF dst, immF con) %{ - match(Set dst (SubF dst con)); - format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} - ins_cost(150); // XXX - ins_encode %{ - __ subss($dst$$XMMRegister, $constantaddress($con)); - %} - ins_pipe(pipe_slow); -%} - -instruct subD_reg(regD dst, regD src) -%{ - match(Set dst (SubD dst src)); - - format %{ "subsd $dst, $src" %} - ins_cost(150); // XXX - opcode(0xF2, 0x0F, 0x5C); - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); - ins_pipe(pipe_slow); -%} - -instruct subD_mem(regD dst, memory src) -%{ - match(Set dst (SubD dst (LoadD src))); - - format %{ "subsd $dst, $src" %} - ins_cost(150); // XXX - opcode(0xF2, 0x0F, 0x5C); - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); - ins_pipe(pipe_slow); -%} - -instruct subD_imm(regD dst, immD con) %{ - match(Set dst (SubD dst con)); - format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} - ins_cost(150); // XXX - ins_encode %{ - __ subsd($dst$$XMMRegister, $constantaddress($con)); - %} - ins_pipe(pipe_slow); -%} - -instruct mulF_reg(regF dst, regF src) -%{ - match(Set dst (MulF dst src)); - - format %{ "mulss $dst, $src" %} - ins_cost(150); // XXX - opcode(0xF3, 0x0F, 0x59); - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); - ins_pipe(pipe_slow); -%} - -instruct mulF_mem(regF dst, memory src) -%{ - match(Set dst (MulF dst (LoadF src))); - - format %{ "mulss $dst, $src" %} - ins_cost(150); // XXX - opcode(0xF3, 0x0F, 0x59); - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); - ins_pipe(pipe_slow); -%} - -instruct mulF_imm(regF dst, immF con) %{ - match(Set dst (MulF dst con)); - format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} - ins_cost(150); // XXX - ins_encode %{ - __ mulss($dst$$XMMRegister, $constantaddress($con)); - %} - ins_pipe(pipe_slow); -%} - -instruct mulD_reg(regD dst, regD src) -%{ - match(Set dst (MulD dst src)); - - format %{ "mulsd $dst, $src" %} - ins_cost(150); // XXX - opcode(0xF2, 0x0F, 0x59); - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); - ins_pipe(pipe_slow); -%} - -instruct mulD_mem(regD dst, memory src) -%{ - match(Set dst (MulD dst (LoadD src))); - - format %{ "mulsd $dst, $src" %} - ins_cost(150); // XXX - opcode(0xF2, 0x0F, 0x59); - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); - ins_pipe(pipe_slow); -%} - -instruct mulD_imm(regD dst, immD con) %{ - match(Set dst (MulD dst con)); - format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} - ins_cost(150); // XXX - ins_encode %{ - __ mulsd($dst$$XMMRegister, $constantaddress($con)); - %} - ins_pipe(pipe_slow); -%} - -instruct divF_reg(regF dst, regF src) -%{ - match(Set dst (DivF dst src)); - - format %{ "divss $dst, $src" %} - ins_cost(150); // XXX - opcode(0xF3, 0x0F, 0x5E); - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); - ins_pipe(pipe_slow); -%} - -instruct divF_mem(regF dst, memory src) -%{ - match(Set dst (DivF dst (LoadF src))); - - format %{ "divss $dst, $src" %} - ins_cost(150); // XXX - opcode(0xF3, 0x0F, 0x5E); - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); - ins_pipe(pipe_slow); -%} - -instruct divF_imm(regF dst, immF con) %{ - match(Set dst (DivF dst con)); - format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} - ins_cost(150); // XXX - ins_encode %{ - __ divss($dst$$XMMRegister, $constantaddress($con)); - %} - ins_pipe(pipe_slow); -%} - -instruct divD_reg(regD dst, regD src) -%{ - match(Set dst (DivD dst src)); - - format %{ "divsd $dst, $src" %} - ins_cost(150); // XXX - opcode(0xF2, 0x0F, 0x5E); - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); - ins_pipe(pipe_slow); -%} - -instruct divD_mem(regD dst, memory src) -%{ - match(Set dst (DivD dst (LoadD src))); - - format %{ "divsd $dst, $src" %} - ins_cost(150); // XXX - opcode(0xF2, 0x0F, 0x5E); - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); - ins_pipe(pipe_slow); -%} - -instruct divD_imm(regD dst, immD con) %{ - match(Set dst (DivD dst con)); - format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} - ins_cost(150); // XXX - ins_encode %{ - __ divsd($dst$$XMMRegister, $constantaddress($con)); - %} - ins_pipe(pipe_slow); -%} - -instruct sqrtF_reg(regF dst, regF src) -%{ - match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); - - format %{ "sqrtss $dst, $src" %} - ins_cost(150); // XXX - opcode(0xF3, 0x0F, 0x51); - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); - ins_pipe(pipe_slow); -%} - -instruct sqrtF_mem(regF dst, memory src) -%{ - match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); - - format %{ "sqrtss $dst, $src" %} - ins_cost(150); // XXX - opcode(0xF3, 0x0F, 0x51); - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); - ins_pipe(pipe_slow); -%} - -instruct sqrtF_imm(regF dst, immF con) %{ - match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); - format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} - ins_cost(150); // XXX - ins_encode %{ - __ sqrtss($dst$$XMMRegister, $constantaddress($con)); - %} - ins_pipe(pipe_slow); -%} - -instruct sqrtD_reg(regD dst, regD src) -%{ - match(Set dst (SqrtD src)); - - format %{ "sqrtsd $dst, $src" %} - ins_cost(150); // XXX - opcode(0xF2, 0x0F, 0x51); - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); - ins_pipe(pipe_slow); -%} - -instruct sqrtD_mem(regD dst, memory src) -%{ - match(Set dst (SqrtD (LoadD src))); - - format %{ "sqrtsd $dst, $src" %} - ins_cost(150); // XXX - opcode(0xF2, 0x0F, 0x51); - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); - ins_pipe(pipe_slow); -%} - -instruct sqrtD_imm(regD dst, immD con) %{ - match(Set dst (SqrtD con)); - format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} - ins_cost(150); // XXX - ins_encode %{ - __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); - %} - ins_pipe(pipe_slow); -%} - -instruct absF_reg(regF dst) -%{ - match(Set dst (AbsF dst)); - - format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} - ins_encode(absF_encoding(dst)); - ins_pipe(pipe_slow); -%} - -instruct absD_reg(regD dst) -%{ - match(Set dst (AbsD dst)); - - format %{ "andpd $dst, [0x7fffffffffffffff]\t" - "# abs double by sign masking" %} - ins_encode(absD_encoding(dst)); - ins_pipe(pipe_slow); -%} - -instruct negF_reg(regF dst) -%{ - match(Set dst (NegF dst)); - - format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} - ins_encode(negF_encoding(dst)); - ins_pipe(pipe_slow); -%} - -instruct negD_reg(regD dst) -%{ - match(Set dst (NegD dst)); - - format %{ "xorpd $dst, [0x8000000000000000]\t" - "# neg double by sign flipping" %} - ins_encode(negD_encoding(dst)); + emit_cmpfp3(_masm, $dst$$Register); + %} ins_pipe(pipe_slow); %} @@ -10929,8 +9971,9 @@ match(Set dst (ConvF2D src)); format %{ "cvtss2sd $dst, $src" %} - opcode(0xF3, 0x0F, 0x5A); - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); + ins_encode %{ + __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); + %} ins_pipe(pipe_slow); // XXX %} @@ -10939,8 +9982,9 @@ match(Set dst (ConvF2D (LoadF src))); format %{ "cvtss2sd $dst, $src" %} - opcode(0xF3, 0x0F, 0x5A); - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); + ins_encode %{ + __ cvtss2sd ($dst$$XMMRegister, $src$$Address); + %} ins_pipe(pipe_slow); // XXX %} @@ -10949,8 +9993,9 @@ match(Set dst (ConvD2F src)); format %{ "cvtsd2ss $dst, $src" %} - opcode(0xF2, 0x0F, 0x5A); - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); + ins_encode %{ + __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); + %} ins_pipe(pipe_slow); // XXX %} @@ -10959,8 +10004,9 @@ match(Set dst (ConvD2F (LoadD src))); format %{ "cvtsd2ss $dst, $src" %} - opcode(0xF2, 0x0F, 0x5A); - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); + ins_encode %{ + __ cvtsd2ss ($dst$$XMMRegister, $src$$Address); + %} ins_pipe(pipe_slow); // XXX %} @@ -10978,9 +10024,17 @@ "call f2i_fixup\n\t" "popq $dst\n" "done: "%} - opcode(0xF3, 0x0F, 0x2C); - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src), - f2i_fixup(dst, src)); + ins_encode %{ + Label done; + __ cvttss2sil($dst$$Register, $src$$XMMRegister); + __ cmpl($dst$$Register, 0x80000000); + __ jccb(Assembler::notEqual, done); + __ subptr(rsp, 8); + __ movflt(Address(rsp, 0), $src$$XMMRegister); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup()))); + __ pop($dst$$Register); + __ bind(done); + %} ins_pipe(pipe_slow); %} @@ -10997,9 +10051,18 @@ "call f2l_fixup\n\t" "popq $dst\n" "done: "%} - opcode(0xF3, 0x0F, 0x2C); - ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src), - f2l_fixup(dst, src)); + ins_encode %{ + Label done; + __ cvttss2siq($dst$$Register, $src$$XMMRegister); + __ cmp64($dst$$Register, + ExternalAddress((address) StubRoutines::x86::double_sign_flip())); + __ jccb(Assembler::notEqual, done); + __ subptr(rsp, 8); + __ movflt(Address(rsp, 0), $src$$XMMRegister); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup()))); + __ pop($dst$$Register); + __ bind(done); + %} ins_pipe(pipe_slow); %} @@ -11016,9 +10079,17 @@ "call d2i_fixup\n\t" "popq $dst\n" "done: "%} - opcode(0xF2, 0x0F, 0x2C); - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src), - d2i_fixup(dst, src)); + ins_encode %{ + Label done; + __ cvttsd2sil($dst$$Register, $src$$XMMRegister); + __ cmpl($dst$$Register, 0x80000000); + __ jccb(Assembler::notEqual, done); + __ subptr(rsp, 8); + __ movdbl(Address(rsp, 0), $src$$XMMRegister); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup()))); + __ pop($dst$$Register); + __ bind(done); + %} ins_pipe(pipe_slow); %} @@ -11035,9 +10106,18 @@ "call d2l_fixup\n\t" "popq $dst\n" "done: "%} - opcode(0xF2, 0x0F, 0x2C); - ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src), - d2l_fixup(dst, src)); + ins_encode %{ + Label done; + __ cvttsd2siq($dst$$Register, $src$$XMMRegister); + __ cmp64($dst$$Register, + ExternalAddress((address) StubRoutines::x86::double_sign_flip())); + __ jccb(Assembler::notEqual, done); + __ subptr(rsp, 8); + __ movdbl(Address(rsp, 0), $src$$XMMRegister); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup()))); + __ pop($dst$$Register); + __ bind(done); + %} ins_pipe(pipe_slow); %} @@ -11047,8 +10127,9 @@ match(Set dst (ConvI2F src)); format %{ "cvtsi2ssl $dst, $src\t# i2f" %} - opcode(0xF3, 0x0F, 0x2A); - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); + ins_encode %{ + __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); + %} ins_pipe(pipe_slow); // XXX %} @@ -11057,8 +10138,9 @@ match(Set dst (ConvI2F (LoadI src))); format %{ "cvtsi2ssl $dst, $src\t# i2f" %} - opcode(0xF3, 0x0F, 0x2A); - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); + ins_encode %{ + __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address); + %} ins_pipe(pipe_slow); // XXX %} @@ -11068,8 +10150,9 @@ match(Set dst (ConvI2D src)); format %{ "cvtsi2sdl $dst, $src\t# i2d" %} - opcode(0xF2, 0x0F, 0x2A); - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); + ins_encode %{ + __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); + %} ins_pipe(pipe_slow); // XXX %} @@ -11078,8 +10161,9 @@ match(Set dst (ConvI2D (LoadI src))); format %{ "cvtsi2sdl $dst, $src\t# i2d" %} - opcode(0xF2, 0x0F, 0x2A); - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); + ins_encode %{ + __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address); + %} ins_pipe(pipe_slow); // XXX %} @@ -11116,8 +10200,9 @@ match(Set dst (ConvL2F src)); format %{ "cvtsi2ssq $dst, $src\t# l2f" %} - opcode(0xF3, 0x0F, 0x2A); - ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src)); + ins_encode %{ + __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register); + %} ins_pipe(pipe_slow); // XXX %} @@ -11126,8 +10211,9 @@ match(Set dst (ConvL2F (LoadL src))); format %{ "cvtsi2ssq $dst, $src\t# l2f" %} - opcode(0xF3, 0x0F, 0x2A); - ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src)); + ins_encode %{ + __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address); + %} ins_pipe(pipe_slow); // XXX %} @@ -11136,8 +10222,9 @@ match(Set dst (ConvL2D src)); format %{ "cvtsi2sdq $dst, $src\t# l2d" %} - opcode(0xF2, 0x0F, 0x2A); - ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src)); + ins_encode %{ + __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register); + %} ins_pipe(pipe_slow); // XXX %} @@ -11146,8 +10233,9 @@ match(Set dst (ConvL2D (LoadL src))); format %{ "cvtsi2sdq $dst, $src\t# l2d" %} - opcode(0xF2, 0x0F, 0x2A); - ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src)); + ins_encode %{ + __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address); + %} ins_pipe(pipe_slow); // XXX %} @@ -11186,7 +10274,11 @@ match(Set dst (AndL (ConvI2L src) mask)); format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %} - ins_encode(enc_copy(dst, src)); + ins_encode %{ + if ($dst$$reg != $src$$reg) { + __ movl($dst$$Register, $src$$Register); + } + %} ins_pipe(ialu_reg_reg); %} @@ -11196,8 +10288,9 @@ match(Set dst (AndL (ConvI2L (LoadI src)) mask)); format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %} - opcode(0x8B); - ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src)); + ins_encode %{ + __ movl($dst$$Register, $src$$Address); + %} ins_pipe(ialu_reg_mem); %} @@ -11206,7 +10299,9 @@ match(Set dst (AndL src mask)); format %{ "movl $dst, $src\t# zero-extend long" %} - ins_encode(enc_copy_always(dst, src)); + ins_encode %{ + __ movl($dst$$Register, $src$$Register); + %} ins_pipe(ialu_reg_reg); %} @@ -11215,7 +10310,9 @@ match(Set dst (ConvL2I src)); format %{ "movl $dst, $src\t# l2i" %} - ins_encode(enc_copy_always(dst, src)); + ins_encode %{ + __ movl($dst$$Register, $src$$Register); + %} ins_pipe(ialu_reg_reg); %} @@ -11226,8 +10323,9 @@ ins_cost(125); format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %} - opcode(0x8B); - ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src)); + ins_encode %{ + __ movl($dst$$Register, Address(rsp, $src$$disp)); + %} ins_pipe(ialu_reg_mem); %} @@ -11237,8 +10335,9 @@ ins_cost(125); format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %} - opcode(0xF3, 0x0F, 0x10); - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); + ins_encode %{ + __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); + %} ins_pipe(pipe_slow); %} @@ -11248,8 +10347,9 @@ ins_cost(125); format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %} - opcode(0x8B); - ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src)); + ins_encode %{ + __ movq($dst$$Register, Address(rsp, $src$$disp)); + %} ins_pipe(ialu_reg_mem); %} @@ -11260,8 +10360,9 @@ ins_cost(125); format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %} - opcode(0x66, 0x0F, 0x12); - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); + ins_encode %{ + __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); + %} ins_pipe(pipe_slow); %} @@ -11272,8 +10373,9 @@ ins_cost(125); format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %} - opcode(0xF2, 0x0F, 0x10); - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); + ins_encode %{ + __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); + %} ins_pipe(pipe_slow); %} @@ -11284,8 +10386,9 @@ ins_cost(95); // XXX format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %} - opcode(0xF3, 0x0F, 0x11); - ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst)); + ins_encode %{ + __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); + %} ins_pipe(pipe_slow); %} @@ -11295,8 +10398,9 @@ ins_cost(100); format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %} - opcode(0x89); - ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst)); + ins_encode %{ + __ movl(Address(rsp, $dst$$disp), $src$$Register); + %} ins_pipe( ialu_mem_reg ); %} @@ -11306,8 +10410,9 @@ ins_cost(95); // XXX format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %} - opcode(0xF2, 0x0F, 0x11); - ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst)); + ins_encode %{ + __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); + %} ins_pipe(pipe_slow); %} @@ -11317,8 +10422,9 @@ ins_cost(100); format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %} - opcode(0x89); - ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst)); + ins_encode %{ + __ movq(Address(rsp, $dst$$disp), $src$$Register); + %} ins_pipe(ialu_mem_reg); %} @@ -11327,7 +10433,9 @@ effect(DEF dst, USE src); ins_cost(85); format %{ "movd $dst,$src\t# MoveF2I" %} - ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %} + ins_encode %{ + __ movdl($dst$$Register, $src$$XMMRegister); + %} ins_pipe( pipe_slow ); %} @@ -11336,7 +10444,9 @@ effect(DEF dst, USE src); ins_cost(85); format %{ "movd $dst,$src\t# MoveD2L" %} - ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %} + ins_encode %{ + __ movdq($dst$$Register, $src$$XMMRegister); + %} ins_pipe( pipe_slow ); %} @@ -11346,7 +10456,9 @@ effect(DEF dst, USE src); ins_cost(300); format %{ "movd $dst,$src\t# MoveI2F" %} - ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + %} ins_pipe( pipe_slow ); %} @@ -11355,7 +10467,9 @@ effect(DEF dst, USE src); ins_cost(300); format %{ "movd $dst,$src\t# MoveL2D" %} - ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %} + ins_encode %{ + __ movdq($dst$$XMMRegister, $src$$Register); + %} ins_pipe( pipe_slow ); %} @@ -11365,7 +10479,13 @@ format %{ "MOVDQA $dst,$src\n\t" "PUNPCKLBW $dst,$dst\n\t" "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} - ins_encode( pshufd_8x8(dst, src)); + ins_encode %{ + if ($dst$$reg != $src$$reg) { + __ movdqa($dst$$XMMRegister, $src$$XMMRegister); + } + __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} ins_pipe( pipe_slow ); %} @@ -11375,7 +10495,11 @@ format %{ "MOVD $dst,$src\n\t" "PUNPCKLBW $dst,$dst\n\t" "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} - ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst)); + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} ins_pipe( pipe_slow ); %} @@ -11383,7 +10507,9 @@ instruct Repl8B_immI0(regD dst, immI0 zero) %{ match(Set dst (Replicate8B zero)); format %{ "PXOR $dst,$dst\t! replicate8B" %} - ins_encode( pxor(dst, dst)); + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} ins_pipe( fpu_reg_reg ); %} @@ -11391,7 +10517,9 @@ instruct Repl4S_reg(regD dst, regD src) %{ match(Set dst (Replicate4S src)); format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %} - ins_encode( pshufd_4x16(dst, src)); + ins_encode %{ + __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00); + %} ins_pipe( fpu_reg_reg ); %} @@ -11400,7 +10528,10 @@ match(Set dst (Replicate4S src)); format %{ "MOVD $dst,$src\n\t" "PSHUFLW $dst,$dst,0x00\t! replicate4S" %} - ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst)); + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} ins_pipe( fpu_reg_reg ); %} @@ -11408,7 +10539,9 @@ instruct Repl4S_immI0(regD dst, immI0 zero) %{ match(Set dst (Replicate4S zero)); format %{ "PXOR $dst,$dst\t! replicate4S" %} - ins_encode( pxor(dst, dst)); + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} ins_pipe( fpu_reg_reg ); %} @@ -11416,7 +10549,9 @@ instruct Repl4C_reg(regD dst, regD src) %{ match(Set dst (Replicate4C src)); format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %} - ins_encode( pshufd_4x16(dst, src)); + ins_encode %{ + __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00); + %} ins_pipe( fpu_reg_reg ); %} @@ -11425,7 +10560,10 @@ match(Set dst (Replicate4C src)); format %{ "MOVD $dst,$src\n\t" "PSHUFLW $dst,$dst,0x00\t! replicate4C" %} - ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst)); + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} ins_pipe( fpu_reg_reg ); %} @@ -11433,7 +10571,9 @@ instruct Repl4C_immI0(regD dst, immI0 zero) %{ match(Set dst (Replicate4C zero)); format %{ "PXOR $dst,$dst\t! replicate4C" %} - ins_encode( pxor(dst, dst)); + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} ins_pipe( fpu_reg_reg ); %} @@ -11441,7 +10581,9 @@ instruct Repl2I_reg(regD dst, regD src) %{ match(Set dst (Replicate2I src)); format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %} - ins_encode( pshufd(dst, src, 0x00)); + ins_encode %{ + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); + %} ins_pipe( fpu_reg_reg ); %} @@ -11450,7 +10592,10 @@ match(Set dst (Replicate2I src)); format %{ "MOVD $dst,$src\n\t" "PSHUFD $dst,$dst,0x00\t! replicate2I" %} - ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00)); + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} ins_pipe( fpu_reg_reg ); %} @@ -11458,7 +10603,9 @@ instruct Repl2I_immI0(regD dst, immI0 zero) %{ match(Set dst (Replicate2I zero)); format %{ "PXOR $dst,$dst\t! replicate2I" %} - ins_encode( pxor(dst, dst)); + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} ins_pipe( fpu_reg_reg ); %} @@ -11466,7 +10613,9 @@ instruct Repl2F_reg(regD dst, regD src) %{ match(Set dst (Replicate2F src)); format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} - ins_encode( pshufd(dst, src, 0xe0)); + ins_encode %{ + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0); + %} ins_pipe( fpu_reg_reg ); %} @@ -11474,7 +10623,9 @@ instruct Repl2F_regF(regD dst, regF src) %{ match(Set dst (Replicate2F src)); format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} - ins_encode( pshufd(dst, src, 0xe0)); + ins_encode %{ + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0); + %} ins_pipe( fpu_reg_reg ); %} @@ -11482,7 +10633,9 @@ instruct Repl2F_immF0(regD dst, immF0 zero) %{ match(Set dst (Replicate2F zero)); format %{ "PXOR $dst,$dst\t! replicate2F" %} - ins_encode( pxor(dst, dst)); + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} ins_pipe( fpu_reg_reg ); %} @@ -12162,12 +11315,12 @@ effect(KILL rcx, KILL cr); ins_cost(1100); // slightly larger than the next version - format %{ "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t" + format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t" "movl rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t" "addq rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t" "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t" "jne,s miss\t\t# Missed: rdi not-zero\n\t" - "movq [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t" + "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t" "xorq $result, $result\t\t Hit: rdi zero\n\t" "miss:\t" %} @@ -12185,12 +11338,12 @@ effect(KILL rcx, KILL result); ins_cost(1000); - format %{ "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t" + format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t" "movl rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t" "addq rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t" "repne scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t" "jne,s miss\t\t# Missed: flags nz\n\t" - "movq [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t" + "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t" "miss:\t" %} opcode(0x0); // No need to XOR RDI diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/adlc/formssel.cpp --- a/src/share/vm/adlc/formssel.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/adlc/formssel.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -627,6 +627,7 @@ if( strcmp(_matrule->_opType,"MemBarAcquire") == 0 ) return true; if( strcmp(_matrule->_opType,"MemBarReleaseLock") == 0 ) return true; if( strcmp(_matrule->_opType,"MemBarAcquireLock") == 0 ) return true; + if( strcmp(_matrule->_opType,"MemBarStoreStore") == 0 ) return true; return false; } @@ -3978,7 +3979,8 @@ !strcmp(_opType,"MemBarAcquireLock") || !strcmp(_opType,"MemBarReleaseLock") || !strcmp(_opType,"MemBarVolatile" ) || - !strcmp(_opType,"MemBarCPUOrder" ) ; + !strcmp(_opType,"MemBarCPUOrder" ) || + !strcmp(_opType,"MemBarStoreStore" ); } bool MatchRule::is_ideal_loadPC() const { diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/asm/assembler.cpp --- a/src/share/vm/asm/assembler.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/asm/assembler.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -61,6 +61,7 @@ _code_limit = cs->limit(); _code_pos = cs->end(); _oop_recorder= code->oop_recorder(); + DEBUG_ONLY( _short_branch_delta = 0; ) if (_code_begin == NULL) { vm_exit_out_of_memory(0, err_msg("CodeCache: no room for %s", code->name())); diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/asm/assembler.hpp --- a/src/share/vm/asm/assembler.hpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/asm/assembler.hpp Thu Dec 29 11:37:50 2011 -0800 @@ -241,6 +241,33 @@ // Make it return true on platforms which need to verify // instruction boundaries for some operations. inline static bool pd_check_instruction_mark(); + + // Add delta to short branch distance to verify that it still fit into imm8. + int _short_branch_delta; + + int short_branch_delta() const { return _short_branch_delta; } + void set_short_branch_delta() { _short_branch_delta = 32; } + void clear_short_branch_delta() { _short_branch_delta = 0; } + + class ShortBranchVerifier: public StackObj { + private: + AbstractAssembler* _assm; + + public: + ShortBranchVerifier(AbstractAssembler* assm) : _assm(assm) { + assert(assm->short_branch_delta() == 0, "overlapping instructions"); + _assm->set_short_branch_delta(); + } + ~ShortBranchVerifier() { + _assm->clear_short_branch_delta(); + } + }; + #else + // Dummy in product. + class ShortBranchVerifier: public StackObj { + public: + ShortBranchVerifier(AbstractAssembler* assm) {} + }; #endif // Label functions diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/c1/c1_LIR.cpp --- a/src/share/vm/c1/c1_LIR.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/c1/c1_LIR.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -854,6 +854,9 @@ if (opTypeCheck->_info_for_exception) do_info(opTypeCheck->_info_for_exception); if (opTypeCheck->_info_for_patch) do_info(opTypeCheck->_info_for_patch); if (opTypeCheck->_object->is_valid()) do_input(opTypeCheck->_object); + if (op->code() == lir_store_check && opTypeCheck->_object->is_valid()) { + do_temp(opTypeCheck->_object); + } if (opTypeCheck->_array->is_valid()) do_input(opTypeCheck->_array); if (opTypeCheck->_tmp1->is_valid()) do_temp(opTypeCheck->_tmp1); if (opTypeCheck->_tmp2->is_valid()) do_temp(opTypeCheck->_tmp2); diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/c1/c1_LIRGenerator.cpp --- a/src/share/vm/c1/c1_LIRGenerator.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/c1/c1_LIRGenerator.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -1256,8 +1256,7 @@ info = state_for(x); } __ move(new LIR_Address(rcvr.result(), oopDesc::klass_offset_in_bytes(), T_OBJECT), result, info); - __ move_wide(new LIR_Address(result, Klass::java_mirror_offset_in_bytes() + - klassOopDesc::klass_part_offset_in_bytes(), T_OBJECT), result); + __ move_wide(new LIR_Address(result, in_bytes(Klass::java_mirror_offset()), T_OBJECT), result); } diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/c1/c1_Optimizer.cpp --- a/src/share/vm/c1/c1_Optimizer.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/c1/c1_Optimizer.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -122,18 +122,32 @@ if (sux != f_goto->default_sux()) return; // check if at least one word was pushed on sux_state + // inlining depths must match + ValueStack* if_state = if_->state(); ValueStack* sux_state = sux->state(); - if (sux_state->stack_size() <= if_->state()->stack_size()) return; + if (if_state->scope()->level() > sux_state->scope()->level()) { + while (sux_state->scope() != if_state->scope()) { + if_state = if_state->caller_state(); + assert(if_state != NULL, "states do not match up"); + } + } else if (if_state->scope()->level() < sux_state->scope()->level()) { + while (sux_state->scope() != if_state->scope()) { + sux_state = sux_state->caller_state(); + assert(sux_state != NULL, "states do not match up"); + } + } + + if (sux_state->stack_size() <= if_state->stack_size()) return; // check if phi function is present at end of successor stack and that // only this phi was pushed on the stack - Value sux_phi = sux_state->stack_at(if_->state()->stack_size()); + Value sux_phi = sux_state->stack_at(if_state->stack_size()); if (sux_phi == NULL || sux_phi->as_Phi() == NULL || sux_phi->as_Phi()->block() != sux) return; - if (sux_phi->type()->size() != sux_state->stack_size() - if_->state()->stack_size()) return; + if (sux_phi->type()->size() != sux_state->stack_size() - if_state->stack_size()) return; // get the values that were pushed in the true- and false-branch - Value t_value = t_goto->state()->stack_at(if_->state()->stack_size()); - Value f_value = f_goto->state()->stack_at(if_->state()->stack_size()); + Value t_value = t_goto->state()->stack_at(if_state->stack_size()); + Value f_value = f_goto->state()->stack_at(if_state->stack_size()); // backend does not support floats assert(t_value->type()->base() == f_value->type()->base(), "incompatible types"); @@ -180,11 +194,7 @@ Goto* goto_ = new Goto(sux, state_before, if_->is_safepoint() || t_goto->is_safepoint() || f_goto->is_safepoint()); // prepare state for Goto - ValueStack* goto_state = if_->state(); - while (sux_state->scope() != goto_state->scope()) { - goto_state = goto_state->caller_state(); - assert(goto_state != NULL, "states do not match up"); - } + ValueStack* goto_state = if_state; goto_state = goto_state->copy(ValueStack::StateAfter, goto_state->bci()); goto_state->push(result->type(), result); assert(goto_state->is_same(sux_state), "states must match now"); diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/oops/arrayKlass.hpp --- a/src/share/vm/oops/arrayKlass.hpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/oops/arrayKlass.hpp Thu Dec 29 11:37:50 2011 -0800 @@ -73,7 +73,7 @@ oop* adr_component_mirror() { return (oop*)&this->_component_mirror;} // Compiler/Interpreter offset - static ByteSize component_mirror_offset() { return byte_offset_of(arrayKlass, _component_mirror); } + static ByteSize component_mirror_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(arrayKlass, _component_mirror)); } virtual klassOop java_super() const;//{ return SystemDictionary::Object_klass(); } diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/oops/instanceKlass.hpp --- a/src/share/vm/oops/instanceKlass.hpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/oops/instanceKlass.hpp Thu Dec 29 11:37:50 2011 -0800 @@ -405,7 +405,7 @@ ReferenceType reference_type() const { return _reference_type; } void set_reference_type(ReferenceType t) { _reference_type = t; } - static int reference_type_offset_in_bytes() { return offset_of(instanceKlass, _reference_type); } + static ByteSize reference_type_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(instanceKlass, _reference_type)); } // find local field, returns true if found bool find_local_field(Symbol* name, Symbol* sig, fieldDescriptor* fd) const; @@ -616,8 +616,8 @@ void set_breakpoints(BreakpointInfo* bps) { _breakpoints = bps; }; // support for stub routines - static int init_state_offset_in_bytes() { return offset_of(instanceKlass, _init_state); } - static int init_thread_offset_in_bytes() { return offset_of(instanceKlass, _init_thread); } + static ByteSize init_state_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(instanceKlass, _init_state)); } + static ByteSize init_thread_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(instanceKlass, _init_thread)); } // subclass/subinterface checks bool implements_interface(klassOop k) const; diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/oops/klass.cpp --- a/src/share/vm/oops/klass.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/oops/klass.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -144,7 +144,7 @@ } kl->set_secondary_supers(NULL); oop_store_without_check((oop*) &kl->_primary_supers[0], k); - kl->set_super_check_offset(primary_supers_offset_in_bytes() + sizeof(oopDesc)); + kl->set_super_check_offset(in_bytes(primary_supers_offset())); } kl->set_java_mirror(NULL); diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/oops/klass.hpp --- a/src/share/vm/oops/klass.hpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/oops/klass.hpp Thu Dec 29 11:37:50 2011 -0800 @@ -313,7 +313,7 @@ // Can this klass be a primary super? False for interfaces and arrays of // interfaces. False also for arrays or classes with long super chains. bool can_be_primary_super() const { - const juint secondary_offset = secondary_super_cache_offset_in_bytes() + sizeof(oopDesc); + const juint secondary_offset = in_bytes(secondary_super_cache_offset()); return super_check_offset() != secondary_offset; } virtual bool can_be_primary_super_slow() const; @@ -323,7 +323,7 @@ if (!can_be_primary_super()) { return primary_super_limit(); } else { - juint d = (super_check_offset() - (primary_supers_offset_in_bytes() + sizeof(oopDesc))) / sizeof(klassOop); + juint d = (super_check_offset() - in_bytes(primary_supers_offset())) / sizeof(klassOop); assert(d < primary_super_limit(), "oob"); assert(_primary_supers[d] == as_klassOop(), "proper init"); return d; @@ -373,15 +373,15 @@ virtual void set_alloc_size(juint n) = 0; // Compiler support - static int super_offset_in_bytes() { return offset_of(Klass, _super); } - static int super_check_offset_offset_in_bytes() { return offset_of(Klass, _super_check_offset); } - static int primary_supers_offset_in_bytes(){ return offset_of(Klass, _primary_supers); } - static int secondary_super_cache_offset_in_bytes() { return offset_of(Klass, _secondary_super_cache); } - static int secondary_supers_offset_in_bytes() { return offset_of(Klass, _secondary_supers); } - static int java_mirror_offset_in_bytes() { return offset_of(Klass, _java_mirror); } - static int modifier_flags_offset_in_bytes(){ return offset_of(Klass, _modifier_flags); } - static int layout_helper_offset_in_bytes() { return offset_of(Klass, _layout_helper); } - static int access_flags_offset_in_bytes() { return offset_of(Klass, _access_flags); } + static ByteSize super_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _super)); } + static ByteSize super_check_offset_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _super_check_offset)); } + static ByteSize primary_supers_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _primary_supers)); } + static ByteSize secondary_super_cache_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _secondary_super_cache)); } + static ByteSize secondary_supers_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _secondary_supers)); } + static ByteSize java_mirror_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _java_mirror)); } + static ByteSize modifier_flags_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _modifier_flags)); } + static ByteSize layout_helper_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _layout_helper)); } + static ByteSize access_flags_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _access_flags)); } // Unpacking layout_helper: enum { @@ -478,7 +478,7 @@ bool is_subtype_of(klassOop k) const { juint off = k->klass_part()->super_check_offset(); klassOop sup = *(klassOop*)( (address)as_klassOop() + off ); - const juint secondary_offset = secondary_super_cache_offset_in_bytes() + sizeof(oopDesc); + const juint secondary_offset = in_bytes(secondary_super_cache_offset()); if (sup == k) { return true; } else if (off != secondary_offset) { @@ -674,7 +674,7 @@ // are potential problems in setting the bias pattern for // JVM-internal oops. inline void set_prototype_header(markOop header); - static int prototype_header_offset_in_bytes() { return offset_of(Klass, _prototype_header); } + static ByteSize prototype_header_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _prototype_header)); } int biased_lock_revocation_count() const { return (int) _biased_lock_revocation_count; } // Atomically increments biased_lock_revocation_count and returns updated value diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/oops/klassOop.hpp --- a/src/share/vm/oops/klassOop.hpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/oops/klassOop.hpp Thu Dec 29 11:37:50 2011 -0800 @@ -38,14 +38,8 @@ class klassOopDesc : public oopDesc { public: - // size operation - static int header_size() { return sizeof(klassOopDesc)/HeapWordSize; } - - // support for code generation - static int klass_part_offset_in_bytes() { return sizeof(klassOopDesc); } - // returns the Klass part containing dispatching behavior - Klass* klass_part() const { return (Klass*)((address)this + klass_part_offset_in_bytes()); } + Klass* klass_part() const { return (Klass*)((address)this + sizeof(klassOopDesc)); } // Convenience wrapper inline oop java_mirror() const; diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/oops/objArrayKlass.hpp --- a/src/share/vm/oops/objArrayKlass.hpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/oops/objArrayKlass.hpp Thu Dec 29 11:37:50 2011 -0800 @@ -47,7 +47,7 @@ oop* bottom_klass_addr() { return (oop*)&_bottom_klass; } // Compiler/Interpreter offset - static int element_klass_offset_in_bytes() { return offset_of(objArrayKlass, _element_klass); } + static ByteSize element_klass_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(objArrayKlass, _element_klass)); } // Dispatched operation bool can_be_primary_super_slow() const; diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/opto/callnode.hpp --- a/src/share/vm/opto/callnode.hpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/opto/callnode.hpp Thu Dec 29 11:37:50 2011 -0800 @@ -791,6 +791,10 @@ // are defined in graphKit.cpp, which sets up the bidirectional relation.) InitializeNode* initialization(); + // Return the corresponding storestore barrier (or null if none). + // Walks out edges to find it... + MemBarStoreStoreNode* storestore(); + // Convenience for initialization->maybe_set_complete(phase) bool maybe_set_complete(PhaseGVN* phase); }; diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/opto/classes.hpp --- a/src/share/vm/opto/classes.hpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/opto/classes.hpp Thu Dec 29 11:37:50 2011 -0800 @@ -166,6 +166,7 @@ macro(MemBarRelease) macro(MemBarReleaseLock) macro(MemBarVolatile) +macro(MemBarStoreStore) macro(MergeMem) macro(MinI) macro(ModD) diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/opto/compile.cpp --- a/src/share/vm/opto/compile.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/opto/compile.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -1282,12 +1282,11 @@ if( tk ) { // If we are referencing a field within a Klass, we need // to assume the worst case of an Object. Both exact and - // inexact types must flatten to the same alias class. - // Since the flattened result for a klass is defined to be - // precisely java.lang.Object, use a constant ptr. + // inexact types must flatten to the same alias class so + // use NotNull as the PTR. if ( offset == Type::OffsetBot || (offset >= 0 && (size_t)offset < sizeof(Klass)) ) { - tj = tk = TypeKlassPtr::make(TypePtr::Constant, + tj = tk = TypeKlassPtr::make(TypePtr::NotNull, TypeKlassPtr::OBJECT->klass(), offset); } @@ -1307,10 +1306,12 @@ // these 2 disparate memories into the same alias class. Since the // primary supertype array is read-only, there's no chance of confusion // where we bypass an array load and an array store. - uint off2 = offset - Klass::primary_supers_offset_in_bytes(); - if( offset == Type::OffsetBot || - off2 < Klass::primary_super_limit()*wordSize ) { - offset = sizeof(oopDesc) +Klass::secondary_super_cache_offset_in_bytes(); + int primary_supers_offset = in_bytes(Klass::primary_supers_offset()); + if (offset == Type::OffsetBot || + (offset >= primary_supers_offset && + offset < (int)(primary_supers_offset + Klass::primary_super_limit() * wordSize)) || + offset == (int)in_bytes(Klass::secondary_super_cache_offset())) { + offset = in_bytes(Klass::secondary_super_cache_offset()); tj = tk = TypeKlassPtr::make( TypePtr::NotNull, tk->klass(), offset ); } } @@ -1489,13 +1490,13 @@ alias_type(idx)->set_rewritable(false); } if (flat->isa_klassptr()) { - if (flat->offset() == Klass::super_check_offset_offset_in_bytes() + (int)sizeof(oopDesc)) + if (flat->offset() == in_bytes(Klass::super_check_offset_offset())) alias_type(idx)->set_rewritable(false); - if (flat->offset() == Klass::modifier_flags_offset_in_bytes() + (int)sizeof(oopDesc)) + if (flat->offset() == in_bytes(Klass::modifier_flags_offset())) alias_type(idx)->set_rewritable(false); - if (flat->offset() == Klass::access_flags_offset_in_bytes() + (int)sizeof(oopDesc)) + if (flat->offset() == in_bytes(Klass::access_flags_offset())) alias_type(idx)->set_rewritable(false); - if (flat->offset() == Klass::java_mirror_offset_in_bytes() + (int)sizeof(oopDesc)) + if (flat->offset() == in_bytes(Klass::java_mirror_offset())) alias_type(idx)->set_rewritable(false); } // %%% (We would like to finalize JavaThread::threadObj_offset(), @@ -2521,7 +2522,7 @@ break; } } - assert(p != NULL, "must be found"); + assert(proj != NULL, "must be found"); p->subsume_by(proj); } } diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/opto/escape.cpp --- a/src/share/vm/opto/escape.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/opto/escape.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -1595,6 +1595,7 @@ GrowableArray alloc_worklist; GrowableArray addp_worklist; GrowableArray ptr_cmp_worklist; + GrowableArray storestore_worklist; PhaseGVN* igvn = _igvn; // Push all useful nodes onto CG list and set their type. @@ -1618,6 +1619,11 @@ (n->Opcode() == Op_CmpP || n->Opcode() == Op_CmpN)) { // Compare pointers nodes ptr_cmp_worklist.append(n); + } else if (n->is_MemBarStoreStore()) { + // Collect all MemBarStoreStore nodes so that depending on the + // escape status of the associated Allocate node some of them + // may be eliminated. + storestore_worklist.append(n); } for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { Node* m = n->fast_out(i); // Get user @@ -1724,11 +1730,20 @@ uint alloc_length = alloc_worklist.length(); for (uint next = 0; next < alloc_length; ++next) { Node* n = alloc_worklist.at(next); - if (ptnode_adr(n->_idx)->escape_state() == PointsToNode::NoEscape) { + PointsToNode::EscapeState es = ptnode_adr(n->_idx)->escape_state(); + if (es == PointsToNode::NoEscape) { has_non_escaping_obj = true; if (n->is_Allocate()) { find_init_values(n, &visited, igvn); + // The object allocated by this Allocate node will never be + // seen by an other thread. Mark it so that when it is + // expanded no MemBarStoreStore is added. + n->as_Allocate()->initialization()->set_does_not_escape(); } + } else if ((es == PointsToNode::ArgEscape) && n->is_Allocate()) { + // Same as above. Mark this Allocate node so that when it is + // expanded no MemBarStoreStore is added. + n->as_Allocate()->initialization()->set_does_not_escape(); } } @@ -1874,6 +1889,25 @@ igvn->hash_delete(_pcmp_eq); } + // For MemBarStoreStore nodes added in library_call.cpp, check + // escape status of associated AllocateNode and optimize out + // MemBarStoreStore node if the allocated object never escapes. + while (storestore_worklist.length() != 0) { + Node *n = storestore_worklist.pop(); + MemBarStoreStoreNode *storestore = n ->as_MemBarStoreStore(); + Node *alloc = storestore->in(MemBarNode::Precedent)->in(0); + assert (alloc->is_Allocate(), "storestore should point to AllocateNode"); + PointsToNode::EscapeState es = ptnode_adr(alloc->_idx)->escape_state(); + if (es == PointsToNode::NoEscape || es == PointsToNode::ArgEscape) { + MemBarNode* mb = MemBarNode::make(C, Op_MemBarCPUOrder, Compile::AliasIdxBot); + mb->init_req(TypeFunc::Memory, storestore->in(TypeFunc::Memory)); + mb->init_req(TypeFunc::Control, storestore->in(TypeFunc::Control)); + + _igvn->register_new_node_with_optimizer(mb); + _igvn->replace_node(storestore, mb); + } + } + #ifndef PRODUCT if (PrintEscapeAnalysis) { dump(); // Dump ConnectionGraph diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/opto/graphKit.cpp --- a/src/share/vm/opto/graphKit.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/opto/graphKit.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -2304,9 +2304,9 @@ // will always succeed. We could leave a dependency behind to ensure this. // First load the super-klass's check-offset - Node *p1 = basic_plus_adr( superklass, superklass, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes() ); + Node *p1 = basic_plus_adr( superklass, superklass, in_bytes(Klass::super_check_offset_offset()) ); Node *chk_off = _gvn.transform( new (C, 3) LoadINode( NULL, memory(p1), p1, _gvn.type(p1)->is_ptr() ) ); - int cacheoff_con = sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes(); + int cacheoff_con = in_bytes(Klass::secondary_super_cache_offset()); bool might_be_cache = (find_int_con(chk_off, cacheoff_con) == cacheoff_con); // Load from the sub-klass's super-class display list, or a 1-word cache of @@ -2934,7 +2934,7 @@ } } constant_value = Klass::_lh_neutral_value; // put in a known value - Node* lhp = basic_plus_adr(klass_node, klass_node, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc)); + Node* lhp = basic_plus_adr(klass_node, klass_node, in_bytes(Klass::layout_helper_offset())); return make_load(NULL, lhp, TypeInt::INT, T_INT); } @@ -3337,6 +3337,19 @@ return NULL; } +// Trace Allocate -> Proj[Parm] -> MemBarStoreStore +MemBarStoreStoreNode* AllocateNode::storestore() { + ProjNode* rawoop = proj_out(AllocateNode::RawAddress); + if (rawoop == NULL) return NULL; + for (DUIterator_Fast imax, i = rawoop->fast_outs(imax); i < imax; i++) { + Node* storestore = rawoop->fast_out(i); + if (storestore->is_MemBarStoreStore()) { + return storestore->as_MemBarStoreStore(); + } + } + return NULL; +} + //----------------------------- loop predicates --------------------------- //------------------------------add_predicate_impl---------------------------- diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/opto/library_call.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -2165,8 +2165,7 @@ IdealKit ideal(this); #define __ ideal. - const int reference_type_offset = instanceKlass::reference_type_offset_in_bytes() + - sizeof(oopDesc); + const int reference_type_offset = in_bytes(instanceKlass::reference_type_offset()); Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset); @@ -2806,7 +2805,7 @@ // Note: The argument might still be an illegal value like // Serializable.class or Object[].class. The runtime will handle it. // But we must make an explicit check for initialization. - Node* insp = basic_plus_adr(kls, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)); + Node* insp = basic_plus_adr(kls, in_bytes(instanceKlass::init_state_offset())); Node* inst = make_load(NULL, insp, TypeInt::INT, T_INT); Node* bits = intcon(instanceKlass::fully_initialized); Node* test = _gvn.transform( new (C, 3) SubINode(inst, bits) ); @@ -2954,7 +2953,7 @@ //---------------------------load_mirror_from_klass---------------------------- // Given a klass oop, load its java mirror (a java.lang.Class oop). Node* LibraryCallKit::load_mirror_from_klass(Node* klass) { - Node* p = basic_plus_adr(klass, Klass::java_mirror_offset_in_bytes() + sizeof(oopDesc)); + Node* p = basic_plus_adr(klass, in_bytes(Klass::java_mirror_offset())); return make_load(NULL, p, TypeInstPtr::MIRROR, T_OBJECT); } @@ -2994,7 +2993,7 @@ Node* LibraryCallKit::generate_access_flags_guard(Node* kls, int modifier_mask, int modifier_bits, RegionNode* region) { // Branch around if the given klass has the given modifier bit set. // Like generate_guard, adds a new path onto the region. - Node* modp = basic_plus_adr(kls, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)); + Node* modp = basic_plus_adr(kls, in_bytes(Klass::access_flags_offset())); Node* mods = make_load(NULL, modp, TypeInt::INT, T_INT); Node* mask = intcon(modifier_mask); Node* bits = intcon(modifier_bits); @@ -3115,7 +3114,7 @@ break; case vmIntrinsics::_getModifiers: - p = basic_plus_adr(kls, Klass::modifier_flags_offset_in_bytes() + sizeof(oopDesc)); + p = basic_plus_adr(kls, in_bytes(Klass::modifier_flags_offset())); query_value = make_load(NULL, p, TypeInt::INT, T_INT); break; @@ -3155,7 +3154,7 @@ // A guard was added. If the guard is taken, it was an array. phi->add_req(makecon(TypeInstPtr::make(env()->Object_klass()->java_mirror()))); // If we fall through, it's a plain class. Get its _super. - p = basic_plus_adr(kls, Klass::super_offset_in_bytes() + sizeof(oopDesc)); + p = basic_plus_adr(kls, in_bytes(Klass::super_offset())); kls = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p, TypeRawPtr::BOTTOM, TypeKlassPtr::OBJECT_OR_NULL) ); null_ctl = top(); kls = null_check_oop(kls, &null_ctl); @@ -3173,7 +3172,7 @@ if (generate_array_guard(kls, region) != NULL) { // Be sure to pin the oop load to the guard edge just created: Node* is_array_ctrl = region->in(region->req()-1); - Node* cma = basic_plus_adr(kls, in_bytes(arrayKlass::component_mirror_offset()) + sizeof(oopDesc)); + Node* cma = basic_plus_adr(kls, in_bytes(arrayKlass::component_mirror_offset())); Node* cmo = make_load(is_array_ctrl, cma, TypeInstPtr::MIRROR, T_OBJECT); phi->add_req(cmo); } @@ -3181,7 +3180,7 @@ break; case vmIntrinsics::_getClassAccessFlags: - p = basic_plus_adr(kls, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)); + p = basic_plus_adr(kls, in_bytes(Klass::access_flags_offset())); query_value = make_load(NULL, p, TypeInt::INT, T_INT); break; @@ -4194,12 +4193,17 @@ Node* raw_obj = alloc_obj->in(1); assert(alloc_obj->is_CheckCastPP() && raw_obj->is_Proj() && raw_obj->in(0)->is_Allocate(), ""); + AllocateNode* alloc = NULL; if (ReduceBulkZeroing) { // We will be completely responsible for initializing this object - // mark Initialize node as complete. - AllocateNode* alloc = AllocateNode::Ideal_allocation(alloc_obj, &_gvn); + alloc = AllocateNode::Ideal_allocation(alloc_obj, &_gvn); // The object was just allocated - there should be no any stores! guarantee(alloc != NULL && alloc->maybe_set_complete(&_gvn), ""); + // Mark as complete_with_arraycopy so that on AllocateNode + // expansion, we know this AllocateNode is initialized by an array + // copy and a StoreStore barrier exists after the array copy. + alloc->initialization()->set_complete_with_arraycopy(); } // Copy the fastest available way. @@ -4261,7 +4265,18 @@ } // Do not let reads from the cloned object float above the arraycopy. - insert_mem_bar(Op_MemBarCPUOrder); + if (alloc != NULL) { + // Do not let stores that initialize this object be reordered with + // a subsequent store that would make this object accessible by + // other threads. + // Record what AllocateNode this StoreStore protects so that + // escape analysis can go from the MemBarStoreStoreNode to the + // AllocateNode and eliminate the MemBarStoreStoreNode if possible + // based on the escape status of the AllocateNode. + insert_mem_bar(Op_MemBarStoreStore, alloc->proj_out(AllocateNode::RawAddress)); + } else { + insert_mem_bar(Op_MemBarCPUOrder); + } } //------------------------inline_native_clone---------------------------- @@ -4857,7 +4872,7 @@ PreserveJVMState pjvms(this); set_control(not_subtype_ctrl); // (At this point we can assume disjoint_bases, since types differ.) - int ek_offset = objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc); + int ek_offset = in_bytes(objArrayKlass::element_klass_offset()); Node* p1 = basic_plus_adr(dest_klass, ek_offset); Node* n1 = LoadKlassNode::make(_gvn, immutable_memory(), p1, TypeRawPtr::BOTTOM); Node* dest_elem_klass = _gvn.transform(n1); @@ -5004,7 +5019,16 @@ // the membar also. // // Do not let reads from the cloned object float above the arraycopy. - if (InsertMemBarAfterArraycopy || alloc != NULL) + if (alloc != NULL) { + // Do not let stores that initialize this object be reordered with + // a subsequent store that would make this object accessible by + // other threads. + // Record what AllocateNode this StoreStore protects so that + // escape analysis can go from the MemBarStoreStoreNode to the + // AllocateNode and eliminate the MemBarStoreStoreNode if possible + // based on the escape status of the AllocateNode. + insert_mem_bar(Op_MemBarStoreStore, alloc->proj_out(AllocateNode::RawAddress)); + } else if (InsertMemBarAfterArraycopy) insert_mem_bar(Op_MemBarCPUOrder); } @@ -5308,7 +5332,7 @@ // for the target array. This is an optimistic check. It will // look in each non-null element's class, at the desired klass's // super_check_offset, for the desired klass. - int sco_offset = Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); Node* p3 = basic_plus_adr(dest_elem_klass, sco_offset); Node* n3 = new(C, 3) LoadINode(NULL, memory(p3), p3, _gvn.type(p3)->is_ptr()); Node* check_offset = ConvI2X(_gvn.transform(n3)); diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/opto/macro.cpp --- a/src/share/vm/opto/macro.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/opto/macro.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -1088,6 +1088,12 @@ Node* klass_node = alloc->in(AllocateNode::KlassNode); Node* initial_slow_test = alloc->in(AllocateNode::InitialTest); + Node* storestore = alloc->storestore(); + if (storestore != NULL) { + // Break this link that is no longer useful and confuses register allocation + storestore->set_req(MemBarNode::Precedent, top()); + } + assert(ctrl != NULL, "must have control"); // We need a Region and corresponding Phi's to merge the slow-path and fast-path results. // they will not be used if "always_slow" is set @@ -1289,10 +1295,66 @@ 0, new_alloc_bytes, T_LONG); } + InitializeNode* init = alloc->initialization(); fast_oop_rawmem = initialize_object(alloc, fast_oop_ctrl, fast_oop_rawmem, fast_oop, klass_node, length, size_in_bytes); + // If initialization is performed by an array copy, any required + // MemBarStoreStore was already added. If the object does not + // escape no need for a MemBarStoreStore. Otherwise we need a + // MemBarStoreStore so that stores that initialize this object + // can't be reordered with a subsequent store that makes this + // object accessible by other threads. + if (init == NULL || (!init->is_complete_with_arraycopy() && !init->does_not_escape())) { + if (init == NULL || init->req() < InitializeNode::RawStores) { + // No InitializeNode or no stores captured by zeroing + // elimination. Simply add the MemBarStoreStore after object + // initialization. + MemBarNode* mb = MemBarNode::make(C, Op_MemBarStoreStore, Compile::AliasIdxBot, fast_oop_rawmem); + transform_later(mb); + + mb->init_req(TypeFunc::Memory, fast_oop_rawmem); + mb->init_req(TypeFunc::Control, fast_oop_ctrl); + fast_oop_ctrl = new (C, 1) ProjNode(mb,TypeFunc::Control); + transform_later(fast_oop_ctrl); + fast_oop_rawmem = new (C, 1) ProjNode(mb,TypeFunc::Memory); + transform_later(fast_oop_rawmem); + } else { + // Add the MemBarStoreStore after the InitializeNode so that + // all stores performing the initialization that were moved + // before the InitializeNode happen before the storestore + // barrier. + + Node* init_ctrl = init->proj_out(TypeFunc::Control); + Node* init_mem = init->proj_out(TypeFunc::Memory); + + MemBarNode* mb = MemBarNode::make(C, Op_MemBarStoreStore, Compile::AliasIdxBot); + transform_later(mb); + + Node* ctrl = new (C, 1) ProjNode(init,TypeFunc::Control); + transform_later(ctrl); + Node* mem = new (C, 1) ProjNode(init,TypeFunc::Memory); + transform_later(mem); + + // The MemBarStoreStore depends on control and memory coming + // from the InitializeNode + mb->init_req(TypeFunc::Memory, mem); + mb->init_req(TypeFunc::Control, ctrl); + + ctrl = new (C, 1) ProjNode(mb,TypeFunc::Control); + transform_later(ctrl); + mem = new (C, 1) ProjNode(mb,TypeFunc::Memory); + transform_later(mem); + + // All nodes that depended on the InitializeNode for control + // and memory must now depend on the MemBarNode that itself + // depends on the InitializeNode + _igvn.replace_node(init_ctrl, ctrl); + _igvn.replace_node(init_mem, mem); + } + } + if (C->env()->dtrace_extended_probes()) { // Slow-path call int size = TypeFunc::Parms + 2; @@ -1326,6 +1388,7 @@ result_phi_rawmem->init_req(fast_result_path, fast_oop_rawmem); } else { slow_region = ctrl; + result_phi_i_o = i_o; // Rename it to use in the following code. } // Generate slow-path call @@ -1350,6 +1413,10 @@ copy_call_debug_info((CallNode *) alloc, call); if (!always_slow) { call->set_cnt(PROB_UNLIKELY_MAG(4)); // Same effect as RC_UNCOMMON. + } else { + // Hook i_o projection to avoid its elimination during allocation + // replacement (when only a slow call is generated). + call->set_req(TypeFunc::I_O, result_phi_i_o); } _igvn.replace_node(alloc, call); transform_later(call); @@ -1366,8 +1433,10 @@ // extract_call_projections(call); - // An allocate node has separate memory projections for the uses on the control and i_o paths - // Replace uses of the control memory projection with result_phi_rawmem (unless we are only generating a slow call) + // An allocate node has separate memory projections for the uses on + // the control and i_o paths. Replace the control memory projection with + // result_phi_rawmem (unless we are only generating a slow call when + // both memory projections are combined) if (!always_slow && _memproj_fallthrough != NULL) { for (DUIterator_Fast imax, i = _memproj_fallthrough->fast_outs(imax); i < imax; i++) { Node *use = _memproj_fallthrough->fast_out(i); @@ -1378,8 +1447,8 @@ --i; } } - // Now change uses of _memproj_catchall to use _memproj_fallthrough and delete _memproj_catchall so - // we end up with a call that has only 1 memory projection + // Now change uses of _memproj_catchall to use _memproj_fallthrough and delete + // _memproj_catchall so we end up with a call that has only 1 memory projection. if (_memproj_catchall != NULL ) { if (_memproj_fallthrough == NULL) { _memproj_fallthrough = new (C, 1) ProjNode(call, TypeFunc::Memory); @@ -1393,17 +1462,18 @@ // back up iterator --i; } + assert(_memproj_catchall->outcnt() == 0, "all uses must be deleted"); + _igvn.remove_dead_node(_memproj_catchall); } - // An allocate node has separate i_o projections for the uses on the control and i_o paths - // Replace uses of the control i_o projection with result_phi_i_o (unless we are only generating a slow call) - if (_ioproj_fallthrough == NULL) { - _ioproj_fallthrough = new (C, 1) ProjNode(call, TypeFunc::I_O); - transform_later(_ioproj_fallthrough); - } else if (!always_slow) { + // An allocate node has separate i_o projections for the uses on the control + // and i_o paths. Always replace the control i_o projection with result i_o + // otherwise incoming i_o become dead when only a slow call is generated + // (it is different from memory projections where both projections are + // combined in such case). + if (_ioproj_fallthrough != NULL) { for (DUIterator_Fast imax, i = _ioproj_fallthrough->fast_outs(imax); i < imax; i++) { Node *use = _ioproj_fallthrough->fast_out(i); - _igvn.hash_delete(use); imax -= replace_input(use, _ioproj_fallthrough, result_phi_i_o); _igvn._worklist.push(use); @@ -1411,9 +1481,13 @@ --i; } } - // Now change uses of _ioproj_catchall to use _ioproj_fallthrough and delete _ioproj_catchall so - // we end up with a call that has only 1 control projection + // Now change uses of _ioproj_catchall to use _ioproj_fallthrough and delete + // _ioproj_catchall so we end up with a call that has only 1 i_o projection. if (_ioproj_catchall != NULL ) { + if (_ioproj_fallthrough == NULL) { + _ioproj_fallthrough = new (C, 1) ProjNode(call, TypeFunc::I_O); + transform_later(_ioproj_fallthrough); + } for (DUIterator_Fast imax, i = _ioproj_catchall->fast_outs(imax); i < imax; i++) { Node *use = _ioproj_catchall->fast_out(i); _igvn.hash_delete(use); @@ -1422,11 +1496,18 @@ // back up iterator --i; } + assert(_ioproj_catchall->outcnt() == 0, "all uses must be deleted"); + _igvn.remove_dead_node(_ioproj_catchall); } // if we generated only a slow call, we are done - if (always_slow) + if (always_slow) { + // Now we can unhook i_o. + call->set_req(TypeFunc::I_O, top()); + if (result_phi_i_o->outcnt() == 0) + _igvn.remove_dead_node(result_phi_i_o); return; + } if (_fallthroughcatchproj != NULL) { @@ -1470,7 +1551,7 @@ Node* mark_node = NULL; // For now only enable fast locking for non-array types if (UseBiasedLocking && (length == NULL)) { - mark_node = make_load(control, rawmem, klass_node, Klass::prototype_header_offset_in_bytes() + sizeof(oopDesc), TypeRawPtr::BOTTOM, T_ADDRESS); + mark_node = make_load(control, rawmem, klass_node, in_bytes(Klass::prototype_header_offset()), TypeRawPtr::BOTTOM, T_ADDRESS); } else { mark_node = makecon(TypeRawPtr::make((address)markOopDesc::prototype())); } @@ -1958,7 +2039,7 @@ #endif klass_node->init_req(0, ctrl); } - Node *proto_node = make_load(ctrl, mem, klass_node, Klass::prototype_header_offset_in_bytes() + sizeof(oopDesc), TypeX_X, TypeX_X->basic_type()); + Node *proto_node = make_load(ctrl, mem, klass_node, in_bytes(Klass::prototype_header_offset()), TypeX_X, TypeX_X->basic_type()); Node* thread = transform_later(new (C, 1) ThreadLocalNode()); Node* cast_thread = transform_later(new (C, 2) CastP2XNode(ctrl, thread)); diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/opto/matcher.cpp --- a/src/share/vm/opto/matcher.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/opto/matcher.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -1365,31 +1365,36 @@ const Type *t = m->bottom_type(); - if( t->singleton() ) { + if (t->singleton()) { // Never force constants into registers. Allow them to match as // constants or registers. Copies of the same value will share // the same register. See find_shared_node. return false; } else { // Not a constant // Stop recursion if they have different Controls. - // Slot 0 of constants is not really a Control. - if( control && m->in(0) && control != m->in(0) ) { + Node* m_control = m->in(0); + // Control of load's memory can post-dominates load's control. + // So use it since load can't float above its memory. + Node* mem_control = (m->is_Load()) ? m->in(MemNode::Memory)->in(0) : NULL; + if (control && m_control && control != m_control && control != mem_control) { // Actually, we can live with the most conservative control we // find, if it post-dominates the others. This allows us to // pick up load/op/store trees where the load can float a little // above the store. Node *x = control; - const uint max_scan = 6; // Arbitrary scan cutoff + const uint max_scan = 6; // Arbitrary scan cutoff uint j; - for( j=0; jis_Region() ) // Bail out at merge points + for (j=0; jis_Region()) // Bail out at merge points return true; x = x->in(0); - if( x == m->in(0) ) // Does 'control' post-dominate + if (x == m_control) // Does 'control' post-dominate break; // m->in(0)? If so, we can use it + if (x == mem_control) // Does 'control' post-dominate + break; // mem_control? If so, we can use it } - if( j == max_scan ) // No post-domination before scan end? + if (j == max_scan) // No post-domination before scan end? return true; // Then break the match tree up } if (m->is_DecodeN() && Matcher::narrow_oop_use_complex_address()) { diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/opto/memnode.cpp --- a/src/share/vm/opto/memnode.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/opto/memnode.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -1473,19 +1473,19 @@ const Type* LoadNode::load_array_final_field(const TypeKlassPtr *tkls, ciKlass* klass) const { - if (tkls->offset() == Klass::modifier_flags_offset_in_bytes() + (int)sizeof(oopDesc)) { + if (tkls->offset() == in_bytes(Klass::modifier_flags_offset())) { // The field is Klass::_modifier_flags. Return its (constant) value. // (Folds up the 2nd indirection in aClassConstant.getModifiers().) assert(this->Opcode() == Op_LoadI, "must load an int from _modifier_flags"); return TypeInt::make(klass->modifier_flags()); } - if (tkls->offset() == Klass::access_flags_offset_in_bytes() + (int)sizeof(oopDesc)) { + if (tkls->offset() == in_bytes(Klass::access_flags_offset())) { // The field is Klass::_access_flags. Return its (constant) value. // (Folds up the 2nd indirection in Reflection.getClassAccessFlags(aClassConstant).) assert(this->Opcode() == Op_LoadI, "must load an int from _access_flags"); return TypeInt::make(klass->access_flags()); } - if (tkls->offset() == Klass::layout_helper_offset_in_bytes() + (int)sizeof(oopDesc)) { + if (tkls->offset() == in_bytes(Klass::layout_helper_offset())) { // The field is Klass::_layout_helper. Return its constant value if known. assert(this->Opcode() == Op_LoadI, "must load an int from _layout_helper"); return TypeInt::make(klass->layout_helper()); @@ -1636,14 +1636,14 @@ // We are loading a field from a Klass metaobject whose identity // is known at compile time (the type is "exact" or "precise"). // Check for fields we know are maintained as constants by the VM. - if (tkls->offset() == Klass::super_check_offset_offset_in_bytes() + (int)sizeof(oopDesc)) { + if (tkls->offset() == in_bytes(Klass::super_check_offset_offset())) { // The field is Klass::_super_check_offset. Return its (constant) value. // (Folds up type checking code.) assert(Opcode() == Op_LoadI, "must load an int from _super_check_offset"); return TypeInt::make(klass->super_check_offset()); } // Compute index into primary_supers array - juint depth = (tkls->offset() - (Klass::primary_supers_offset_in_bytes() + (int)sizeof(oopDesc))) / sizeof(klassOop); + juint depth = (tkls->offset() - in_bytes(Klass::primary_supers_offset())) / sizeof(klassOop); // Check for overflowing; use unsigned compare to handle the negative case. if( depth < ciKlass::primary_super_limit() ) { // The field is an element of Klass::_primary_supers. Return its (constant) value. @@ -1654,14 +1654,14 @@ } const Type* aift = load_array_final_field(tkls, klass); if (aift != NULL) return aift; - if (tkls->offset() == in_bytes(arrayKlass::component_mirror_offset()) + (int)sizeof(oopDesc) + if (tkls->offset() == in_bytes(arrayKlass::component_mirror_offset()) && klass->is_array_klass()) { // The field is arrayKlass::_component_mirror. Return its (constant) value. // (Folds up aClassConstant.getComponentType, common in Arrays.copyOf.) assert(Opcode() == Op_LoadP, "must load an oop from _component_mirror"); return TypeInstPtr::make(klass->as_array_klass()->component_mirror()); } - if (tkls->offset() == Klass::java_mirror_offset_in_bytes() + (int)sizeof(oopDesc)) { + if (tkls->offset() == in_bytes(Klass::java_mirror_offset())) { // The field is Klass::_java_mirror. Return its (constant) value. // (Folds up the 2nd indirection in anObjConstant.getClass().) assert(Opcode() == Op_LoadP, "must load an oop from _java_mirror"); @@ -1679,7 +1679,7 @@ if( inner->is_instance_klass() && !inner->as_instance_klass()->flags().is_interface() ) { // Compute index into primary_supers array - juint depth = (tkls->offset() - (Klass::primary_supers_offset_in_bytes() + (int)sizeof(oopDesc))) / sizeof(klassOop); + juint depth = (tkls->offset() - in_bytes(Klass::primary_supers_offset())) / sizeof(klassOop); // Check for overflowing; use unsigned compare to handle the negative case. if( depth < ciKlass::primary_super_limit() && depth <= klass->super_depth() ) { // allow self-depth checks to handle self-check case @@ -1695,7 +1695,7 @@ // If the type is enough to determine that the thing is not an array, // we can give the layout_helper a positive interval type. // This will help short-circuit some reflective code. - if (tkls->offset() == Klass::layout_helper_offset_in_bytes() + (int)sizeof(oopDesc) + if (tkls->offset() == in_bytes(Klass::layout_helper_offset()) && !klass->is_array_klass() // not directly typed as an array && !klass->is_interface() // specifically not Serializable & Cloneable && !klass->is_java_lang_Object() // not the supertype of all T[] @@ -1938,7 +1938,7 @@ if( !klass->is_loaded() ) return _type; // Bail out if not loaded if( klass->is_obj_array_klass() && - (uint)tkls->offset() == objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)) { + tkls->offset() == in_bytes(objArrayKlass::element_klass_offset())) { ciKlass* elem = klass->as_obj_array_klass()->element_klass(); // // Always returning precise element type is incorrect, // // e.g., element type could be object and array may contain strings @@ -1949,7 +1949,7 @@ return TypeKlassPtr::make(tkls->ptr(), elem, 0/*offset*/); } if( klass->is_instance_klass() && tkls->klass_is_exact() && - (uint)tkls->offset() == Klass::super_offset_in_bytes() + sizeof(oopDesc)) { + tkls->offset() == in_bytes(Klass::super_offset())) { ciKlass* sup = klass->as_instance_klass()->super(); // The field is Klass::_super. Return its (constant) value. // (Folds up the 2nd indirection in aClassConstant.getSuperClass().) @@ -2013,11 +2013,11 @@ tkls->klass()->is_array_klass()) && adr2->is_AddP() ) { - int mirror_field = Klass::java_mirror_offset_in_bytes(); + int mirror_field = in_bytes(Klass::java_mirror_offset()); if (offset == java_lang_Class::array_klass_offset_in_bytes()) { mirror_field = in_bytes(arrayKlass::component_mirror_offset()); } - if (tkls->offset() == mirror_field + (int)sizeof(oopDesc)) { + if (tkls->offset() == mirror_field) { return adr2->in(AddPNode::Base); } } @@ -2721,6 +2721,7 @@ case Op_MemBarVolatile: return new(C, len) MemBarVolatileNode(C, atp, pn); case Op_MemBarCPUOrder: return new(C, len) MemBarCPUOrderNode(C, atp, pn); case Op_Initialize: return new(C, len) InitializeNode(C, atp, pn); + case Op_MemBarStoreStore: return new(C, len) MemBarStoreStoreNode(C, atp, pn); default: ShouldNotReachHere(); return NULL; } } @@ -2870,7 +2871,7 @@ //---------------------------InitializeNode------------------------------------ InitializeNode::InitializeNode(Compile* C, int adr_type, Node* rawoop) - : _is_complete(Incomplete), + : _is_complete(Incomplete), _does_not_escape(false), MemBarNode(C, adr_type, rawoop) { init_class_id(Class_Initialize); diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/opto/memnode.hpp --- a/src/share/vm/opto/memnode.hpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/opto/memnode.hpp Thu Dec 29 11:37:50 2011 -0800 @@ -918,6 +918,15 @@ virtual int Opcode() const; }; +class MemBarStoreStoreNode: public MemBarNode { +public: + MemBarStoreStoreNode(Compile* C, int alias_idx, Node* precedent) + : MemBarNode(C, alias_idx, precedent) { + init_class_id(Class_MemBarStoreStore); + } + virtual int Opcode() const; +}; + // Ordering between a volatile store and a following volatile load. // Requires multi-CPU visibility? class MemBarVolatileNode: public MemBarNode { @@ -950,6 +959,8 @@ }; int _is_complete; + bool _does_not_escape; + public: enum { Control = TypeFunc::Control, @@ -989,6 +1000,9 @@ void set_complete(PhaseGVN* phase); void set_complete_with_arraycopy() { _is_complete = Complete | WithArraycopy; } + bool does_not_escape() { return _does_not_escape; } + void set_does_not_escape() { _does_not_escape = true; } + #ifdef ASSERT // ensure all non-degenerate stores are ordered and non-overlapping bool stores_are_sane(PhaseTransform* phase); diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/opto/node.hpp --- a/src/share/vm/opto/node.hpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/opto/node.hpp Thu Dec 29 11:37:50 2011 -0800 @@ -97,6 +97,7 @@ class MachTempNode; class Matcher; class MemBarNode; +class MemBarStoreStoreNode; class MemNode; class MergeMemNode; class MultiNode; @@ -564,7 +565,8 @@ DEFINE_CLASS_ID(NeverBranch, MultiBranch, 2) DEFINE_CLASS_ID(Start, Multi, 2) DEFINE_CLASS_ID(MemBar, Multi, 3) - DEFINE_CLASS_ID(Initialize, MemBar, 0) + DEFINE_CLASS_ID(Initialize, MemBar, 0) + DEFINE_CLASS_ID(MemBarStoreStore, MemBar, 1) DEFINE_CLASS_ID(Mach, Node, 1) DEFINE_CLASS_ID(MachReturn, Mach, 0) @@ -744,6 +746,7 @@ DEFINE_CLASS_QUERY(MachTemp) DEFINE_CLASS_QUERY(Mem) DEFINE_CLASS_QUERY(MemBar) + DEFINE_CLASS_QUERY(MemBarStoreStore) DEFINE_CLASS_QUERY(MergeMem) DEFINE_CLASS_QUERY(Multi) DEFINE_CLASS_QUERY(MultiBranch) diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/opto/parse1.cpp --- a/src/share/vm/opto/parse1.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/opto/parse1.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -1911,7 +1911,7 @@ Node* klass_addr = basic_plus_adr( receiver, receiver, oopDesc::klass_offset_in_bytes() ); Node* klass = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), klass_addr, TypeInstPtr::KLASS) ); - Node* access_flags_addr = basic_plus_adr(klass, klass, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)); + Node* access_flags_addr = basic_plus_adr(klass, klass, in_bytes(Klass::access_flags_offset())); Node* access_flags = make_load(NULL, access_flags_addr, TypeInt::INT, T_INT); Node* mask = _gvn.transform(new (C, 3) AndINode(access_flags, intcon(JVM_ACC_HAS_FINALIZER))); diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/opto/parseHelper.cpp --- a/src/share/vm/opto/parseHelper.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/opto/parseHelper.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -200,7 +200,7 @@ // Come here for polymorphic array klasses // Extract the array element class - int element_klass_offset = objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc); + int element_klass_offset = in_bytes(objArrayKlass::element_klass_offset()); Node *p2 = basic_plus_adr(array_klass, array_klass, element_klass_offset); Node *a_e_klass = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p2, tak) ); @@ -220,7 +220,7 @@ _gvn.set_type(merge, Type::CONTROL); Node* kls = makecon(TypeKlassPtr::make(klass)); - Node* init_thread_offset = _gvn.MakeConX(instanceKlass::init_thread_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()); + Node* init_thread_offset = _gvn.MakeConX(in_bytes(instanceKlass::init_thread_offset())); Node* adr_node = basic_plus_adr(kls, kls, init_thread_offset); Node* init_thread = make_load(NULL, adr_node, TypeRawPtr::BOTTOM, T_ADDRESS); Node *tst = Bool( CmpP( init_thread, cur_thread), BoolTest::eq); @@ -228,7 +228,7 @@ set_control(IfTrue(iff)); merge->set_req(1, IfFalse(iff)); - Node* init_state_offset = _gvn.MakeConX(instanceKlass::init_state_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()); + Node* init_state_offset = _gvn.MakeConX(in_bytes(instanceKlass::init_state_offset())); adr_node = basic_plus_adr(kls, kls, init_state_offset); Node* init_state = make_load(NULL, adr_node, TypeInt::INT, T_INT); Node* being_init = _gvn.intcon(instanceKlass::being_initialized); diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/runtime/advancedThresholdPolicy.cpp --- a/src/share/vm/runtime/advancedThresholdPolicy.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/runtime/advancedThresholdPolicy.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -156,20 +156,19 @@ // Called with the queue locked and with at least one element CompileTask* AdvancedThresholdPolicy::select_task(CompileQueue* compile_queue) { CompileTask *max_task = NULL; - methodOop max_method; + methodHandle max_method; jlong t = os::javaTimeMillis(); // Iterate through the queue and find a method with a maximum rate. for (CompileTask* task = compile_queue->first(); task != NULL;) { CompileTask* next_task = task->next(); - methodOop method = (methodOop)JNIHandles::resolve(task->method_handle()); - methodDataOop mdo = method->method_data(); - update_rate(t, method); + methodHandle method = (methodOop)JNIHandles::resolve(task->method_handle()); + update_rate(t, method()); if (max_task == NULL) { max_task = task; max_method = method; } else { // If a method has been stale for some time, remove it from the queue. - if (is_stale(t, TieredCompileTaskTimeout, method) && !is_old(method)) { + if (is_stale(t, TieredCompileTaskTimeout, method()) && !is_old(method())) { if (PrintTieredEvents) { print_event(REMOVE_FROM_QUEUE, method, method, task->osr_bci(), (CompLevel)task->comp_level()); } @@ -181,7 +180,7 @@ } // Select a method with a higher rate - if (compare_methods(method, max_method)) { + if (compare_methods(method(), max_method())) { max_task = task; max_method = method; } @@ -190,7 +189,7 @@ } if (max_task->comp_level() == CompLevel_full_profile && TieredStopAtLevel > CompLevel_full_profile - && is_method_profiled(max_method)) { + && is_method_profiled(max_method())) { max_task->set_comp_level(CompLevel_limited_profile); if (PrintTieredEvents) { print_event(UPDATE_IN_QUEUE, max_method, max_method, max_task->osr_bci(), (CompLevel)max_task->comp_level()); diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/runtime/arguments.cpp --- a/src/share/vm/runtime/arguments.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/runtime/arguments.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -1000,6 +1000,13 @@ UseInterpreter = false; BackgroundCompilation = false; ClipInlining = false; + // Be much more aggressive in tiered mode with -Xcomp and exercise C2 more. + // We will first compile a level 3 version (C1 with full profiling), then do one invocation of it and + // compile a level 4 (C2) and then continue executing it. + if (TieredCompilation) { + Tier3InvokeNotifyFreqLog = 0; + Tier4InvocationThreshold = 0; + } break; } } diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/runtime/globals.hpp Thu Dec 29 11:37:50 2011 -0800 @@ -527,6 +527,9 @@ product(intx, UseSSE, 99, \ "Highest supported SSE instructions set on x86/x64") \ \ + product(intx, UseAVX, 99, \ + "Highest supported AVX instructions set on x86/x64") \ + \ product(intx, UseVIS, 99, \ "Highest supported VIS instructions set on Sparc") \ \ diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/shark/sharkIntrinsics.cpp --- a/src/share/vm/shark/sharkIntrinsics.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/shark/sharkIntrinsics.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -213,17 +213,11 @@ SharkType::oop_type(), "klass"); - Value *klass_part = builder()->CreateAddressOfStructEntry( - klass, - in_ByteSize(klassOopDesc::klass_part_offset_in_bytes()), - SharkType::klass_type(), - "klass_part"); - state()->push( SharkValue::create_jobject( builder()->CreateValueOfStructEntry( - klass_part, - in_ByteSize(Klass::java_mirror_offset_in_bytes()), + klass, + Klass::java_mirror_offset(), SharkType::oop_type(), "java_mirror"), true)); diff -r 7faca6dfa2ed -r 8940fd98d540 src/share/vm/shark/sharkTopLevelBlock.cpp --- a/src/share/vm/shark/sharkTopLevelBlock.cpp Tue Dec 27 12:38:49 2011 -0800 +++ b/src/share/vm/shark/sharkTopLevelBlock.cpp Thu Dec 29 11:37:50 2011 -0800 @@ -745,15 +745,9 @@ SharkType::oop_type(), "klass"); - Value *klass_part = builder()->CreateAddressOfStructEntry( + Value *access_flags = builder()->CreateValueOfStructEntry( klass, - in_ByteSize(klassOopDesc::klass_part_offset_in_bytes()), - SharkType::klass_type(), - "klass_part"); - - Value *access_flags = builder()->CreateValueOfStructEntry( - klass_part, - in_ByteSize(Klass::access_flags_offset_in_bytes()), + Klass::access_flags_offset(), SharkType::jint_type(), "access_flags"); diff -r 7faca6dfa2ed -r 8940fd98d540 test/compiler/7123108/Test7123108.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/7123108/Test7123108.java Thu Dec 29 11:37:50 2011 -0800 @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7123108 + * @summary C1 crashes with assert(if_state != NULL) failed: states do not match up + * + * @run main/othervm -Xcomp Test7123108 + */ + +public class Test7123108 { + + static class Test_Class_0 { + final static byte var_2 = 67; + byte var_3; + } + + Object var_25 = "kgfpyhcms"; + static long var_27 = 6899666748616086528L; + + static float func_1() + { + return 0.0F; + } + + private void test() + { + "dlwq".charAt(((short)'x' > var_27 | func_1() <= (((Test_Class_0)var_25).var_3) ? true : true) ? Test_Class_0.var_2 & (short)-1.1173839E38F : 'Y'); + } + + public static void main(String[] args) + { + Test7123108 t = new Test7123108(); + try { + t.test(); + } catch (Throwable e) { } + } +}