diff src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp @ 1783:d5d065957597

6953144: Tiered compilation Summary: Infrastructure for tiered compilation support (interpreter + c1 + c2) for 32 and 64 bit. Simple tiered policy implementation. Reviewed-by: kvn, never, phh, twisti
author iveresov
date Fri, 03 Sep 2010 17:51:07 -0700
parents e9ff18c4ace7
children 3a294e483abc
line wrap: on
line diff
--- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Thu Sep 02 11:40:02 2010 -0700
+++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Fri Sep 03 17:51:07 2010 -0700
@@ -1625,13 +1625,18 @@
 
 void LIR_Assembler::return_op(LIR_Opr result) {
   // the poll may need a register so just pick one that isn't the return register
-#ifdef TIERED
+#if defined(TIERED) && !defined(_LP64)
   if (result->type_field() == LIR_OprDesc::long_type) {
     // Must move the result to G1
     // Must leave proper result in O0,O1 and G1 (TIERED only)
     __ sllx(I0, 32, G1);          // Shift bits into high G1
     __ srl (I1, 0, I1);           // Zero extend O1 (harmless?)
     __ or3 (I1, G1, G1);          // OR 64 bits into G1
+#ifdef ASSERT
+    // mangle it so any problems will show up
+    __ set(0xdeadbeef, I0);
+    __ set(0xdeadbeef, I1);
+#endif
   }
 #endif // TIERED
   __ set((intptr_t)os::get_polling_page(), L0);
@@ -2424,6 +2429,195 @@
 }
 
 
+void LIR_Assembler::type_profile_helper(Register mdo, int mdo_offset_bias,
+                                        ciMethodData *md, ciProfileData *data,
+                                        Register recv, Register tmp1, Label* update_done) {
+  uint i;
+  for (i = 0; i < VirtualCallData::row_limit(); i++) {
+    Label next_test;
+    // See if the receiver is receiver[n].
+    Address receiver_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) -
+                          mdo_offset_bias);
+    __ ld_ptr(receiver_addr, tmp1);
+    __ verify_oop(tmp1);
+    __ cmp(recv, tmp1);
+    __ brx(Assembler::notEqual, false, Assembler::pt, next_test);
+    __ delayed()->nop();
+    Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) -
+                      mdo_offset_bias);
+    __ ld_ptr(data_addr, tmp1);
+    __ add(tmp1, DataLayout::counter_increment, tmp1);
+    __ st_ptr(tmp1, data_addr);
+    __ ba(false, *update_done);
+    __ delayed()->nop();
+    __ bind(next_test);
+  }
+
+  // Didn't find receiver; find next empty slot and fill it in
+  for (i = 0; i < VirtualCallData::row_limit(); i++) {
+    Label next_test;
+    Address recv_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) -
+                      mdo_offset_bias);
+    load(recv_addr, tmp1, T_OBJECT);
+    __ br_notnull(tmp1, false, Assembler::pt, next_test);
+    __ delayed()->nop();
+    __ st_ptr(recv, recv_addr);
+    __ set(DataLayout::counter_increment, tmp1);
+    __ st_ptr(tmp1, mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) -
+              mdo_offset_bias);
+    __ ba(false, *update_done);
+    __ delayed()->nop();
+    __ bind(next_test);
+  }
+}
+
+void LIR_Assembler::emit_checkcast(LIR_OpTypeCheck *op) {
+  assert(op->code() == lir_checkcast, "Invalid operation");
+  // we always need a stub for the failure case.
+  CodeStub* stub = op->stub();
+  Register obj = op->object()->as_register();
+  Register k_RInfo = op->tmp1()->as_register();
+  Register klass_RInfo = op->tmp2()->as_register();
+  Register dst = op->result_opr()->as_register();
+  Register Rtmp1 = op->tmp3()->as_register();
+  ciKlass* k = op->klass();
+
+
+  if (obj == k_RInfo) {
+    k_RInfo = klass_RInfo;
+    klass_RInfo = obj;
+  }
+
+  ciMethodData* md;
+  ciProfileData* data;
+  int mdo_offset_bias = 0;
+  if (op->should_profile()) {
+    ciMethod* method = op->profiled_method();
+    assert(method != NULL, "Should have method");
+    int bci          = op->profiled_bci();
+    md = method->method_data();
+    if (md == NULL) {
+      bailout("out of memory building methodDataOop");
+      return;
+    }
+    data = md->bci_to_data(bci);
+    assert(data != NULL,       "need data for checkcast");
+    assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for checkcast");
+    if (!Assembler::is_simm13(md->byte_offset_of_slot(data, DataLayout::header_offset()) + data->size_in_bytes())) {
+      // The offset is large so bias the mdo by the base of the slot so
+      // that the ld can use simm13s to reference the slots of the data
+      mdo_offset_bias = md->byte_offset_of_slot(data, DataLayout::header_offset());
+    }
+
+    // We need two temporaries to perform this operation on SPARC,
+    // so to keep things simple we perform a redundant test here
+    Label profile_done;
+    __ br_notnull(obj, false, Assembler::pn, profile_done);
+    __ delayed()->nop();
+    Register mdo      = k_RInfo;
+    Register data_val = Rtmp1;
+    jobject2reg(md->constant_encoding(), mdo);
+    if (mdo_offset_bias > 0) {
+      __ set(mdo_offset_bias, data_val);
+      __ add(mdo, data_val, mdo);
+    }
+    Address flags_addr(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()) - mdo_offset_bias);
+    __ ldub(flags_addr, data_val);
+    __ or3(data_val, BitData::null_seen_byte_constant(), data_val);
+    __ stb(data_val, flags_addr);
+    __ bind(profile_done);
+  }
+  Label profile_cast_failure;
+
+  Label done, done_null;
+  // Where to go in case of cast failure
+  Label *failure_target = op->should_profile() ? &profile_cast_failure : stub->entry();
+
+  // patching may screw with our temporaries on sparc,
+  // so let's do it before loading the class
+  if (k->is_loaded()) {
+    jobject2reg(k->constant_encoding(), k_RInfo);
+  } else {
+    jobject2reg_with_patching(k_RInfo, op->info_for_patch());
+  }
+  assert(obj != k_RInfo, "must be different");
+  __ br_null(obj, false, Assembler::pn, done_null);
+  __ delayed()->nop();
+
+  // get object class
+  // not a safepoint as obj null check happens earlier
+  load(obj, oopDesc::klass_offset_in_bytes(), klass_RInfo, T_OBJECT, NULL);
+  if (op->fast_check()) {
+    assert_different_registers(klass_RInfo, k_RInfo);
+    __ cmp(k_RInfo, klass_RInfo);
+    __ brx(Assembler::notEqual, false, Assembler::pt, *failure_target);
+    __ delayed()->nop();
+  } else {
+    bool need_slow_path = true;
+    if (k->is_loaded()) {
+      if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())
+        need_slow_path = false;
+      // perform the fast part of the checking logic
+      __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, noreg,
+                                       (need_slow_path ? &done : NULL),
+                                       failure_target, NULL,
+                                       RegisterOrConstant(k->super_check_offset()));
+    } else {
+      // perform the fast part of the checking logic
+      __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, &done,
+                                       failure_target, NULL);
+    }
+    if (need_slow_path) {
+      // call out-of-line instance of __ check_klass_subtype_slow_path(...):
+      assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
+      __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
+      __ delayed()->nop();
+      __ cmp(G3, 0);
+      __ br(Assembler::equal, false, Assembler::pn, *failure_target);
+      __ delayed()->nop();
+    }
+  }
+  __ bind(done);
+
+  if (op->should_profile()) {
+    Register mdo  = klass_RInfo, recv = k_RInfo, tmp1 = Rtmp1;
+    assert_different_registers(obj, mdo, recv, tmp1);
+
+    jobject2reg(md->constant_encoding(), mdo);
+    if (mdo_offset_bias > 0) {
+      __ set(mdo_offset_bias, tmp1);
+      __ add(mdo, tmp1, mdo);
+    }
+    Label update_done;
+    load(Address(obj, oopDesc::klass_offset_in_bytes()), recv, T_OBJECT);
+    type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, &update_done);
+    // Jump over the failure case
+    __ ba(false, update_done);
+    __ delayed()->nop();
+
+
+    // Cast failure case
+    __ bind(profile_cast_failure);
+    jobject2reg(md->constant_encoding(), mdo);
+    if (mdo_offset_bias > 0) {
+      __ set(mdo_offset_bias, tmp1);
+      __ add(mdo, tmp1, mdo);
+    }
+    Address data_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias);
+    __ ld_ptr(data_addr, tmp1);
+    __ sub(tmp1, DataLayout::counter_increment, tmp1);
+    __ st_ptr(tmp1, data_addr);
+    __ ba(false, *stub->entry());
+    __ delayed()->nop();
+
+    __ bind(update_done);
+  }
+
+  __ bind(done_null);
+  __ mov(obj, dst);
+}
+
+
 void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
   LIR_Code code = op->code();
   if (code == lir_store_check) {
@@ -2437,8 +2631,7 @@
 
     CodeStub* stub = op->stub();
     Label done;
-    __ cmp(value, 0);
-    __ br(Assembler::equal, false, Assembler::pn, done);
+    __ br_null(value, false, Assembler::pn, done);
     __ delayed()->nop();
     load(array, oopDesc::klass_offset_in_bytes(), k_RInfo, T_OBJECT, op->info_for_exception());
     load(value, oopDesc::klass_offset_in_bytes(), klass_RInfo, T_OBJECT, NULL);
@@ -2456,109 +2649,6 @@
     __ br(Assembler::equal, false, Assembler::pn, *stub->entry());
     __ delayed()->nop();
     __ bind(done);
-  } else if (op->code() == lir_checkcast) {
-    // we always need a stub for the failure case.
-    CodeStub* stub = op->stub();
-    Register obj = op->object()->as_register();
-    Register k_RInfo = op->tmp1()->as_register();
-    Register klass_RInfo = op->tmp2()->as_register();
-    Register dst = op->result_opr()->as_register();
-    Register Rtmp1 = op->tmp3()->as_register();
-    ciKlass* k = op->klass();
-
-    if (obj == k_RInfo) {
-      k_RInfo = klass_RInfo;
-      klass_RInfo = obj;
-    }
-    if (op->profiled_method() != NULL) {
-      ciMethod* method = op->profiled_method();
-      int bci          = op->profiled_bci();
-
-      // We need two temporaries to perform this operation on SPARC,
-      // so to keep things simple we perform a redundant test here
-      Label profile_done;
-      __ cmp(obj, 0);
-      __ br(Assembler::notEqual, false, Assembler::pn, profile_done);
-      __ delayed()->nop();
-      // Object is null; update methodDataOop
-      ciMethodData* md = method->method_data();
-      if (md == NULL) {
-        bailout("out of memory building methodDataOop");
-        return;
-      }
-      ciProfileData* data = md->bci_to_data(bci);
-      assert(data != NULL,       "need data for checkcast");
-      assert(data->is_BitData(), "need BitData for checkcast");
-      Register mdo      = k_RInfo;
-      Register data_val = Rtmp1;
-      jobject2reg(md->constant_encoding(), mdo);
-
-      int mdo_offset_bias = 0;
-      if (!Assembler::is_simm13(md->byte_offset_of_slot(data, DataLayout::header_offset()) + data->size_in_bytes())) {
-        // The offset is large so bias the mdo by the base of the slot so
-        // that the ld can use simm13s to reference the slots of the data
-        mdo_offset_bias = md->byte_offset_of_slot(data, DataLayout::header_offset());
-        __ set(mdo_offset_bias, data_val);
-        __ add(mdo, data_val, mdo);
-      }
-
-
-      Address flags_addr(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()) - mdo_offset_bias);
-      __ ldub(flags_addr, data_val);
-      __ or3(data_val, BitData::null_seen_byte_constant(), data_val);
-      __ stb(data_val, flags_addr);
-      __ bind(profile_done);
-    }
-
-    Label done;
-    // patching may screw with our temporaries on sparc,
-    // so let's do it before loading the class
-    if (k->is_loaded()) {
-      jobject2reg(k->constant_encoding(), k_RInfo);
-    } else {
-      jobject2reg_with_patching(k_RInfo, op->info_for_patch());
-    }
-    assert(obj != k_RInfo, "must be different");
-    __ cmp(obj, 0);
-    __ br(Assembler::equal, false, Assembler::pn, done);
-    __ delayed()->nop();
-
-    // get object class
-    // not a safepoint as obj null check happens earlier
-    load(obj, oopDesc::klass_offset_in_bytes(), klass_RInfo, T_OBJECT, NULL);
-    if (op->fast_check()) {
-      assert_different_registers(klass_RInfo, k_RInfo);
-      __ cmp(k_RInfo, klass_RInfo);
-      __ br(Assembler::notEqual, false, Assembler::pt, *stub->entry());
-      __ delayed()->nop();
-      __ bind(done);
-    } else {
-      bool need_slow_path = true;
-      if (k->is_loaded()) {
-        if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())
-          need_slow_path = false;
-        // perform the fast part of the checking logic
-        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, noreg,
-                                         (need_slow_path ? &done : NULL),
-                                         stub->entry(), NULL,
-                                         RegisterOrConstant(k->super_check_offset()));
-      } else {
-        // perform the fast part of the checking logic
-        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7,
-                                         &done, stub->entry(), NULL);
-      }
-      if (need_slow_path) {
-        // call out-of-line instance of __ check_klass_subtype_slow_path(...):
-        assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
-        __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
-        __ delayed()->nop();
-        __ cmp(G3, 0);
-        __ br(Assembler::equal, false, Assembler::pn, *stub->entry());
-        __ delayed()->nop();
-      }
-      __ bind(done);
-    }
-    __ mov(obj, dst);
   } else if (code == lir_instanceof) {
     Register obj = op->object()->as_register();
     Register k_RInfo = op->tmp1()->as_register();
@@ -2580,8 +2670,7 @@
       jobject2reg_with_patching(k_RInfo, op->info_for_patch());
     }
     assert(obj != k_RInfo, "must be different");
-    __ cmp(obj, 0);
-    __ br(Assembler::equal, true, Assembler::pn, done);
+    __ br_null(obj, true, Assembler::pn, done);
     __ delayed()->set(0, dst);
 
     // get object class
@@ -2589,7 +2678,7 @@
     load(obj, oopDesc::klass_offset_in_bytes(), klass_RInfo, T_OBJECT, NULL);
     if (op->fast_check()) {
       __ cmp(k_RInfo, klass_RInfo);
-      __ br(Assembler::equal, true, Assembler::pt, done);
+      __ brx(Assembler::equal, true, Assembler::pt, done);
       __ delayed()->set(1, dst);
       __ set(0, dst);
       __ bind(done);
@@ -2776,9 +2865,14 @@
   ciProfileData* data = md->bci_to_data(bci);
   assert(data->is_CounterData(), "need CounterData for calls");
   assert(op->mdo()->is_single_cpu(),  "mdo must be allocated");
+  Register mdo  = op->mdo()->as_register();
+#ifdef _LP64
+  assert(op->tmp1()->is_double_cpu(), "tmp1 must be allocated");
+  Register tmp1 = op->tmp1()->as_register_lo();
+#else
   assert(op->tmp1()->is_single_cpu(), "tmp1 must be allocated");
-  Register mdo  = op->mdo()->as_register();
   Register tmp1 = op->tmp1()->as_register();
+#endif
   jobject2reg(md->constant_encoding(), mdo);
   int mdo_offset_bias = 0;
   if (!Assembler::is_simm13(md->byte_offset_of_slot(data, CounterData::count_offset()) +
@@ -2795,13 +2889,13 @@
   // Perform additional virtual call profiling for invokevirtual and
   // invokeinterface bytecodes
   if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
-      Tier1ProfileVirtualCalls) {
+      C1ProfileVirtualCalls) {
     assert(op->recv()->is_single_cpu(), "recv must be allocated");
     Register recv = op->recv()->as_register();
     assert_different_registers(mdo, tmp1, recv);
     assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls");
     ciKlass* known_klass = op->known_holder();
-    if (Tier1OptimizeVirtualCallProfiling && known_klass != NULL) {
+    if (C1OptimizeVirtualCallProfiling && known_klass != NULL) {
       // We know the type that will be seen at this call site; we can
       // statically update the methodDataOop rather than needing to do
       // dynamic tests on the receiver type
@@ -2816,9 +2910,9 @@
           Address data_addr(mdo, md->byte_offset_of_slot(data,
                                                          VirtualCallData::receiver_count_offset(i)) -
                             mdo_offset_bias);
-          __ lduw(data_addr, tmp1);
+          __ ld_ptr(data_addr, tmp1);
           __ add(tmp1, DataLayout::counter_increment, tmp1);
-          __ stw(tmp1, data_addr);
+          __ st_ptr(tmp1, data_addr);
           return;
         }
       }
@@ -2837,70 +2931,32 @@
           __ st_ptr(tmp1, recv_addr);
           Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) -
                             mdo_offset_bias);
-          __ lduw(data_addr, tmp1);
+          __ ld_ptr(data_addr, tmp1);
           __ add(tmp1, DataLayout::counter_increment, tmp1);
-          __ stw(tmp1, data_addr);
+          __ st_ptr(tmp1, data_addr);
           return;
         }
       }
     } else {
       load(Address(recv, oopDesc::klass_offset_in_bytes()), recv, T_OBJECT);
       Label update_done;
-      uint i;
-      for (i = 0; i < VirtualCallData::row_limit(); i++) {
-        Label next_test;
-        // See if the receiver is receiver[n].
-        Address receiver_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)) -
-                              mdo_offset_bias);
-        __ ld_ptr(receiver_addr, tmp1);
-        __ verify_oop(tmp1);
-        __ cmp(recv, tmp1);
-        __ brx(Assembler::notEqual, false, Assembler::pt, next_test);
-        __ delayed()->nop();
-        Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) -
-                          mdo_offset_bias);
-        __ lduw(data_addr, tmp1);
-        __ add(tmp1, DataLayout::counter_increment, tmp1);
-        __ stw(tmp1, data_addr);
-        __ br(Assembler::always, false, Assembler::pt, update_done);
-        __ delayed()->nop();
-        __ bind(next_test);
-      }
-
-      // Didn't find receiver; find next empty slot and fill it in
-      for (i = 0; i < VirtualCallData::row_limit(); i++) {
-        Label next_test;
-        Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)) -
-                          mdo_offset_bias);
-        load(recv_addr, tmp1, T_OBJECT);
-        __ tst(tmp1);
-        __ brx(Assembler::notEqual, false, Assembler::pt, next_test);
-        __ delayed()->nop();
-        __ st_ptr(recv, recv_addr);
-        __ set(DataLayout::counter_increment, tmp1);
-        __ st_ptr(tmp1, mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) -
-                  mdo_offset_bias);
-        __ br(Assembler::always, false, Assembler::pt, update_done);
-        __ delayed()->nop();
-        __ bind(next_test);
-      }
+      type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, &update_done);
       // Receiver did not match any saved receiver and there is no empty row for it.
       // Increment total counter to indicate polymorphic case.
-      __ lduw(counter_addr, tmp1);
+      __ ld_ptr(counter_addr, tmp1);
       __ add(tmp1, DataLayout::counter_increment, tmp1);
-      __ stw(tmp1, counter_addr);
+      __ st_ptr(tmp1, counter_addr);
 
       __ bind(update_done);
     }
   } else {
     // Static call
-    __ lduw(counter_addr, tmp1);
+    __ ld_ptr(counter_addr, tmp1);
     __ add(tmp1, DataLayout::counter_increment, tmp1);
-    __ stw(tmp1, counter_addr);
+    __ st_ptr(tmp1, counter_addr);
   }
 }
 
-
 void LIR_Assembler::align_backward_branch_target() {
   __ align(OptoLoopAlignment);
 }
@@ -3093,31 +3149,36 @@
   // no-op on TSO
 }
 
-// Macro to Pack two sequential registers containing 32 bit values
+// Pack two sequential registers containing 32 bit values
 // into a single 64 bit register.
-// rs and rs->successor() are packed into rd
-// rd and rs may be the same register.
-// Note: rs and rs->successor() are destroyed.
-void LIR_Assembler::pack64( Register rs, Register rd ) {
+// src and src->successor() are packed into dst
+// src and dst may be the same register.
+// Note: src is destroyed
+void LIR_Assembler::pack64(LIR_Opr src, LIR_Opr dst) {
+  Register rs = src->as_register();
+  Register rd = dst->as_register_lo();
   __ sllx(rs, 32, rs);
   __ srl(rs->successor(), 0, rs->successor());
   __ or3(rs, rs->successor(), rd);
 }
 
-// Macro to unpack a 64 bit value in a register into
+// Unpack a 64 bit value in a register into
 // two sequential registers.
-// rd is unpacked into rd and rd->successor()
-void LIR_Assembler::unpack64( Register rd ) {
-  __ mov(rd, rd->successor());
-  __ srax(rd, 32, rd);
-  __ sra(rd->successor(), 0, rd->successor());
+// src is unpacked into dst and dst->successor()
+void LIR_Assembler::unpack64(LIR_Opr src, LIR_Opr dst) {
+  Register rs = src->as_register_lo();
+  Register rd = dst->as_register_hi();
+  assert_different_registers(rs, rd, rd->successor());
+  __ srlx(rs, 32, rd);
+  __ srl (rs,  0, rd->successor());
 }
 
 
 void LIR_Assembler::leal(LIR_Opr addr_opr, LIR_Opr dest) {
   LIR_Address* addr = addr_opr->as_address_ptr();
   assert(addr->index()->is_illegal() && addr->scale() == LIR_Address::times_1 && Assembler::is_simm13(addr->disp()), "can't handle complex addresses yet");
-  __ add(addr->base()->as_register(), addr->disp(), dest->as_register());
+
+  __ add(addr->base()->as_pointer_register(), addr->disp(), dest->as_pointer_register());
 }
 
 
@@ -3188,11 +3249,36 @@
             tty->cr();
           }
 #endif
-          continue;
+        } else {
+          LIR_Op* delay_op = new LIR_OpDelay(new LIR_Op0(lir_nop), op->as_OpJavaCall()->info());
+          inst->insert_before(i + 1, delay_op);
+          i++;
         }
 
-        LIR_Op* delay_op = new LIR_OpDelay(new LIR_Op0(lir_nop), op->as_OpJavaCall()->info());
-        inst->insert_before(i + 1, delay_op);
+#if defined(TIERED) && !defined(_LP64)
+        // fixup the return value from G1 to O0/O1 for long returns.
+        // It's done here instead of in LIRGenerator because there's
+        // such a mismatch between the single reg and double reg
+        // calling convention.
+        LIR_OpJavaCall* callop = op->as_OpJavaCall();
+        if (callop->result_opr() == FrameMap::out_long_opr) {
+          LIR_OpJavaCall* call;
+          LIR_OprList* arguments = new LIR_OprList(callop->arguments()->length());
+          for (int a = 0; a < arguments->length(); a++) {
+            arguments[a] = callop->arguments()[a];
+          }
+          if (op->code() == lir_virtual_call) {
+            call = new LIR_OpJavaCall(op->code(), callop->method(), callop->receiver(), FrameMap::g1_long_single_opr,
+                                      callop->vtable_offset(), arguments, callop->info());
+          } else {
+            call = new LIR_OpJavaCall(op->code(), callop->method(), callop->receiver(), FrameMap::g1_long_single_opr,
+                                      callop->addr(), arguments, callop->info());
+          }
+          inst->at_put(i - 1, call);
+          inst->insert_before(i + 1, new LIR_Op1(lir_unpack64, FrameMap::g1_long_single_opr, callop->result_opr(),
+                                                 T_LONG, lir_patch_none, NULL));
+        }
+#endif
         break;
       }
     }