changeset 17628:add2caa66e7e

8026253: New type profiling points: sparc support Summary: c1 and interpreter support for new type profiling on sparc Reviewed-by: kvn, twisti
author roland
date Tue, 14 Jan 2014 14:51:47 +0100
parents d7773b29c65a
children 412d3b5fe90e
files src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp src/cpu/sparc/vm/interp_masm_sparc.cpp src/cpu/sparc/vm/interp_masm_sparc.hpp src/cpu/sparc/vm/templateInterpreter_sparc.cpp src/cpu/sparc/vm/templateTable_sparc.cpp src/cpu/x86/vm/interp_masm_x86.cpp src/share/vm/c1/c1_LIRGenerator.cpp src/share/vm/runtime/arguments.cpp
diffstat 8 files changed, 386 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Tue Jan 14 12:44:12 2014 +0100
+++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Tue Jan 14 14:51:47 2014 +0100
@@ -1315,7 +1315,7 @@
 }
 
 Address LIR_Assembler::as_Address(LIR_Address* addr) {
-  Register reg = addr->base()->as_register();
+  Register reg = addr->base()->as_pointer_register();
   LIR_Opr index = addr->index();
   if (index->is_illegal()) {
     return Address(reg, addr->disp());
@@ -3101,7 +3101,145 @@
 }
 
 void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) {
-  fatal("Type profiling not implemented on this platform");
+  Register obj = op->obj()->as_register();
+  Register tmp1 = op->tmp()->as_pointer_register();
+  Register tmp2 = G1;
+  Address mdo_addr = as_Address(op->mdp()->as_address_ptr());
+  ciKlass* exact_klass = op->exact_klass();
+  intptr_t current_klass = op->current_klass();
+  bool not_null = op->not_null();
+  bool no_conflict = op->no_conflict();
+
+  Label update, next, none;
+
+  bool do_null = !not_null;
+  bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass;
+  bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set;
+
+  assert(do_null || do_update, "why are we here?");
+  assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?");
+
+  __ verify_oop(obj);
+
+  if (tmp1 != obj) {
+    __ mov(obj, tmp1);
+  }
+  if (do_null) {
+    __ br_notnull_short(tmp1, Assembler::pt, update);
+    if (!TypeEntries::was_null_seen(current_klass)) {
+      __ ld_ptr(mdo_addr, tmp1);
+      __ or3(tmp1, TypeEntries::null_seen, tmp1);
+      __ st_ptr(tmp1, mdo_addr);
+    }
+    if (do_update) {
+      __ ba(next);
+      __ delayed()->nop();
+    }
+#ifdef ASSERT
+  } else {
+    __ br_notnull_short(tmp1, Assembler::pt, update);
+    __ stop("unexpect null obj");
+#endif
+  }
+
+  __ bind(update);
+
+  if (do_update) {
+#ifdef ASSERT
+    if (exact_klass != NULL) {
+      Label ok;
+      __ load_klass(tmp1, tmp1);
+      metadata2reg(exact_klass->constant_encoding(), tmp2);
+      __ cmp_and_br_short(tmp1, tmp2, Assembler::equal, Assembler::pt, ok);
+      __ stop("exact klass and actual klass differ");
+      __ bind(ok);
+    }
+#endif
+
+    Label do_update;
+    __ ld_ptr(mdo_addr, tmp2);
+
+    if (!no_conflict) {
+      if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) {
+        if (exact_klass != NULL) {
+          metadata2reg(exact_klass->constant_encoding(), tmp1);
+        } else {
+          __ load_klass(tmp1, tmp1);
+        }
+
+        __ xor3(tmp1, tmp2, tmp1);
+        __ btst(TypeEntries::type_klass_mask, tmp1);
+        // klass seen before, nothing to do. The unknown bit may have been
+        // set already but no need to check.
+        __ brx(Assembler::zero, false, Assembler::pt, next);
+        __ delayed()->
+
+           btst(TypeEntries::type_unknown, tmp1);
+        // already unknown. Nothing to do anymore.
+        __ brx(Assembler::notZero, false, Assembler::pt, next);
+
+        if (TypeEntries::is_type_none(current_klass)) {
+          __ delayed()->btst(TypeEntries::type_mask, tmp2);
+          __ brx(Assembler::zero, true, Assembler::pt, do_update);
+          // first time here. Set profile type.
+          __ delayed()->or3(tmp2, tmp1, tmp2);
+        } else {
+          __ delayed()->nop();
+        }
+      } else {
+        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
+               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only");
+
+        __ btst(TypeEntries::type_unknown, tmp2);
+        // already unknown. Nothing to do anymore.
+        __ brx(Assembler::notZero, false, Assembler::pt, next);
+        __ delayed()->nop();
+      }
+
+      // different than before. Cannot keep accurate profile.
+      __ or3(tmp2, TypeEntries::type_unknown, tmp2);
+    } else {
+      // There's a single possible klass at this profile point
+      assert(exact_klass != NULL, "should be");
+      if (TypeEntries::is_type_none(current_klass)) {
+        metadata2reg(exact_klass->constant_encoding(), tmp1);
+        __ xor3(tmp1, tmp2, tmp1);
+        __ btst(TypeEntries::type_klass_mask, tmp1);
+        __ brx(Assembler::zero, false, Assembler::pt, next);
+#ifdef ASSERT
+
+        {
+          Label ok;
+          __ delayed()->btst(TypeEntries::type_mask, tmp2);
+          __ brx(Assembler::zero, true, Assembler::pt, ok);
+          __ delayed()->nop();
+
+          __ stop("unexpected profiling mismatch");
+          __ bind(ok);
+        }
+        // first time here. Set profile type.
+        __ or3(tmp2, tmp1, tmp2);
+#else
+        // first time here. Set profile type.
+        __ delayed()->or3(tmp2, tmp1, tmp2);
+#endif
+
+      } else {
+        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
+               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent");
+
+        // already unknown. Nothing to do anymore.
+        __ btst(TypeEntries::type_unknown, tmp2);
+        __ brx(Assembler::notZero, false, Assembler::pt, next);
+        __ delayed()->or3(tmp2, TypeEntries::type_unknown, tmp2);
+      }
+    }
+
+    __ bind(do_update);
+    __ st_ptr(tmp2, mdo_addr);
+
+    __ bind(next);
+  }
 }
 
 void LIR_Assembler::align_backward_branch_target() {
@@ -3321,9 +3459,14 @@
 
 void LIR_Assembler::leal(LIR_Opr addr_opr, LIR_Opr dest) {
   LIR_Address* addr = addr_opr->as_address_ptr();
-  assert(addr->index()->is_illegal() && addr->scale() == LIR_Address::times_1 && Assembler::is_simm13(addr->disp()), "can't handle complex addresses yet");
-
-  __ add(addr->base()->as_pointer_register(), addr->disp(), dest->as_pointer_register());
+  assert(addr->index()->is_illegal() && addr->scale() == LIR_Address::times_1, "can't handle complex addresses yet");
+
+  if (Assembler::is_simm13(addr->disp())) {
+    __ add(addr->base()->as_pointer_register(), addr->disp(), dest->as_pointer_register());
+  } else {
+    __ set(addr->disp(), G3_scratch);
+    __ add(addr->base()->as_pointer_register(), G3_scratch, dest->as_pointer_register());
+  }
 }
 
 
--- a/src/cpu/sparc/vm/interp_masm_sparc.cpp	Tue Jan 14 12:44:12 2014 +0100
+++ b/src/cpu/sparc/vm/interp_masm_sparc.cpp	Tue Jan 14 14:51:47 2014 +0100
@@ -1892,6 +1892,220 @@
   }
 }
 
+void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr, Register tmp) {
+  Label not_null, do_nothing, do_update;
+
+  assert_different_registers(obj, mdo_addr.base(), tmp);
+
+  verify_oop(obj);
+
+  ld_ptr(mdo_addr, tmp);
+
+  br_notnull_short(obj, pt, not_null);
+  or3(tmp, TypeEntries::null_seen, tmp);
+  ba_short(do_update);
+
+  bind(not_null);
+  load_klass(obj, obj);
+
+  xor3(obj, tmp, obj);
+  btst(TypeEntries::type_klass_mask, obj);
+  // klass seen before, nothing to do. The unknown bit may have been
+  // set already but no need to check.
+  brx(zero, false, pt, do_nothing);
+  delayed()->
+
+  btst(TypeEntries::type_unknown, obj);
+  // already unknown. Nothing to do anymore.
+  brx(notZero, false, pt, do_nothing);
+  delayed()->
+
+  btst(TypeEntries::type_mask, tmp);
+  brx(zero, true, pt, do_update);
+  // first time here. Set profile type.
+  delayed()->or3(tmp, obj, tmp);
+
+  // different than before. Cannot keep accurate profile.
+  or3(tmp, TypeEntries::type_unknown, tmp);
+
+  bind(do_update);
+  // update profile
+  st_ptr(tmp, mdo_addr);
+
+  bind(do_nothing);
+}
+
+void InterpreterMacroAssembler::profile_arguments_type(Register callee, Register tmp1, Register tmp2, bool is_virtual) {
+  if (!ProfileInterpreter) {
+    return;
+  }
+
+  assert_different_registers(callee, tmp1, tmp2, ImethodDataPtr);
+
+  if (MethodData::profile_arguments() || MethodData::profile_return()) {
+    Label profile_continue;
+
+    test_method_data_pointer(profile_continue);
+
+    int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
+
+    ldub(ImethodDataPtr, in_bytes(DataLayout::tag_offset()) - off_to_start, tmp1);
+    cmp_and_br_short(tmp1, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag, notEqual, pn, profile_continue);
+
+    if (MethodData::profile_arguments()) {
+      Label done;
+      int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset());
+      add(ImethodDataPtr, off_to_args, ImethodDataPtr);
+
+      for (int i = 0; i < TypeProfileArgsLimit; i++) {
+        if (i > 0 || MethodData::profile_return()) {
+          // If return value type is profiled we may have no argument to profile
+          ld_ptr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args, tmp1);
+          sub(tmp1, i*TypeStackSlotEntries::per_arg_count(), tmp1);
+          cmp_and_br_short(tmp1, TypeStackSlotEntries::per_arg_count(), less, pn, done);
+        }
+        ld_ptr(Address(callee, Method::const_offset()), tmp1);
+        lduh(Address(tmp1, ConstMethod::size_of_parameters_offset()), tmp1);
+        // stack offset o (zero based) from the start of the argument
+        // list, for n arguments translates into offset n - o - 1 from
+        // the end of the argument list. But there's an extra slot at
+        // the stop of the stack. So the offset is n - o from Lesp.
+        ld_ptr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args, tmp2);
+        sub(tmp1, tmp2, tmp1);
+
+        // Can't use MacroAssembler::argument_address() which needs Gargs to be set up
+        sll(tmp1, Interpreter::logStackElementSize, tmp1);
+        ld_ptr(Lesp, tmp1, tmp1);
+
+        Address mdo_arg_addr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args);
+        profile_obj_type(tmp1, mdo_arg_addr, tmp2);
+
+        int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
+        add(ImethodDataPtr, to_add, ImethodDataPtr);
+        off_to_args += to_add;
+      }
+
+      if (MethodData::profile_return()) {
+        ld_ptr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args, tmp1);
+        sub(tmp1, TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count(), tmp1);
+      }
+
+      bind(done);
+
+      if (MethodData::profile_return()) {
+        // We're right after the type profile for the last
+        // argument. tmp1 is the number of cells left in the
+        // CallTypeData/VirtualCallTypeData to reach its end. Non null
+        // if there's a return to profile.
+        assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
+        sll(tmp1, exact_log2(DataLayout::cell_size), tmp1);
+        add(ImethodDataPtr, tmp1, ImethodDataPtr);
+      }
+    } else {
+      assert(MethodData::profile_return(), "either profile call args or call ret");
+      update_mdp_by_constant(in_bytes(ReturnTypeEntry::size()));
+    }
+
+    // mdp points right after the end of the
+    // CallTypeData/VirtualCallTypeData, right after the cells for the
+    // return value type if there's one.
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_return_type(Register ret, Register tmp1, Register tmp2) {
+  assert_different_registers(ret, tmp1, tmp2);
+  if (ProfileInterpreter && MethodData::profile_return()) {
+    Label profile_continue, done;
+
+    test_method_data_pointer(profile_continue);
+
+    if (MethodData::profile_return_jsr292_only()) {
+      // If we don't profile all invoke bytecodes we must make sure
+      // it's a bytecode we indeed profile. We can't go back to the
+      // begining of the ProfileData we intend to update to check its
+      // type because we're right after it and we don't known its
+      // length.
+      Label do_profile;
+      ldub(Lbcp, 0, tmp1);
+      cmp_and_br_short(tmp1, Bytecodes::_invokedynamic, equal, pn, do_profile);
+      cmp(tmp1, Bytecodes::_invokehandle);
+      br(equal, false, pn, do_profile);
+      delayed()->ldub(Lmethod, Method::intrinsic_id_offset_in_bytes(), tmp1);
+      cmp_and_br_short(tmp1, vmIntrinsics::_compiledLambdaForm, notEqual, pt, profile_continue);
+
+      bind(do_profile);
+    }
+
+    Address mdo_ret_addr(ImethodDataPtr, -in_bytes(ReturnTypeEntry::size()));
+    mov(ret, tmp1);
+    profile_obj_type(tmp1, mdo_ret_addr, tmp2);
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_parameters_type(Register tmp1, Register tmp2, Register tmp3, Register tmp4) {
+  if (ProfileInterpreter && MethodData::profile_parameters()) {
+    Label profile_continue, done;
+
+    test_method_data_pointer(profile_continue);
+
+    // Load the offset of the area within the MDO used for
+    // parameters. If it's negative we're not profiling any parameters.
+    lduw(ImethodDataPtr, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()), tmp1);
+    cmp_and_br_short(tmp1, 0, less, pn, profile_continue);
+
+    // Compute a pointer to the area for parameters from the offset
+    // and move the pointer to the slot for the last
+    // parameters. Collect profiling from last parameter down.
+    // mdo start + parameters offset + array length - 1
+
+    // Pointer to the parameter area in the MDO
+    Register mdp = tmp1;
+    add(ImethodDataPtr, tmp1, mdp);
+
+    // offset of the current profile entry to update
+    Register entry_offset = tmp2;
+    // entry_offset = array len in number of cells
+    ld_ptr(mdp, ArrayData::array_len_offset(), entry_offset);
+
+    int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
+    assert(off_base % DataLayout::cell_size == 0, "should be a number of cells");
+
+    // entry_offset (number of cells)  = array len - size of 1 entry + offset of the stack slot field
+    sub(entry_offset, TypeStackSlotEntries::per_arg_count() - (off_base / DataLayout::cell_size), entry_offset);
+    // entry_offset in bytes
+    sll(entry_offset, exact_log2(DataLayout::cell_size), entry_offset);
+
+    Label loop;
+    bind(loop);
+
+    // load offset on the stack from the slot for this parameter
+    ld_ptr(mdp, entry_offset, tmp3);
+    sll(tmp3,Interpreter::logStackElementSize, tmp3);
+    neg(tmp3);
+    // read the parameter from the local area
+    ld_ptr(Llocals, tmp3, tmp3);
+
+    // make entry_offset now point to the type field for this parameter
+    int type_base = in_bytes(ParametersTypeData::type_offset(0));
+    assert(type_base > off_base, "unexpected");
+    add(entry_offset, type_base - off_base, entry_offset);
+
+    // profile the parameter
+    Address arg_type(mdp, entry_offset);
+    profile_obj_type(tmp3, arg_type, tmp4);
+
+    // go to next parameter
+    sub(entry_offset, TypeStackSlotEntries::per_arg_count() * DataLayout::cell_size + (type_base - off_base), entry_offset);
+    cmp_and_br_short(entry_offset, off_base, greaterEqual, pt, loop);
+
+    bind(profile_continue);
+  }
+}
+
 // add a InterpMonitorElem to stack (see frame_sparc.hpp)
 
 void InterpreterMacroAssembler::add_monitor_to_stack( bool stack_is_empty,
--- a/src/cpu/sparc/vm/interp_masm_sparc.hpp	Tue Jan 14 12:44:12 2014 +0100
+++ b/src/cpu/sparc/vm/interp_masm_sparc.hpp	Tue Jan 14 14:51:47 2014 +0100
@@ -323,6 +323,11 @@
                            Register scratch2,
                            Register scratch3);
 
+  void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp);
+  void profile_arguments_type(Register callee, Register tmp1, Register tmp2, bool is_virtual);
+  void profile_return_type(Register ret, Register tmp1, Register tmp2);
+  void profile_parameters_type(Register tmp1, Register tmp2, Register tmp3, Register tmp4);
+
   // Debugging
   void interp_verify_oop(Register reg, TosState state, const char * file, int line);    // only if +VerifyOops && state == atos
   void verify_oop_or_return_address(Register reg, Register rtmp); // for astore
--- a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Tue Jan 14 12:44:12 2014 +0100
+++ b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Tue Jan 14 14:51:47 2014 +0100
@@ -156,6 +156,10 @@
 address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
   address entry = __ pc();
 
+  if (state == atos) {
+    __ profile_return_type(O0, G3_scratch, G1_scratch);
+  }
+
 #if !defined(_LP64) && defined(COMPILER2)
   // All return values are where we want them, except for Longs.  C2 returns
   // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1.
@@ -1333,6 +1337,7 @@
   __ movbool(true, G3_scratch);
   __ stbool(G3_scratch, do_not_unlock_if_synchronized);
 
+  __ profile_parameters_type(G1_scratch, G3_scratch, G4_scratch, Lscratch);
   // increment invocation counter and check for overflow
   //
   // Note: checking for negative value instead of overflow
--- a/src/cpu/sparc/vm/templateTable_sparc.cpp	Tue Jan 14 12:44:12 2014 +0100
+++ b/src/cpu/sparc/vm/templateTable_sparc.cpp	Tue Jan 14 14:51:47 2014 +0100
@@ -2942,12 +2942,12 @@
 
 
 void TemplateTable::generate_vtable_call(Register Rrecv, Register Rindex, Register Rret) {
-  Register Rtemp = G4_scratch;
   Register Rcall = Rindex;
   assert_different_registers(Rcall, G5_method, Gargs, Rret);
 
   // get target Method* & entry point
   __ lookup_virtual_method(Rrecv, Rindex, G5_method);
+  __ profile_arguments_type(G5_method, Rcall, Gargs, true);
   __ call_from_interpreter(Rcall, Gargs, Rret);
 }
 
@@ -3022,6 +3022,7 @@
   __ null_check(O0);
 
   __ profile_final_call(O4);
+  __ profile_arguments_type(G5_method, Rscratch, Gargs, true);
 
   // get return address
   AddressLiteral table(Interpreter::invoke_return_entry_table());
@@ -3051,6 +3052,7 @@
 
   // do the call
   __ profile_call(O4);
+  __ profile_arguments_type(G5_method, Rscratch, Gargs, false);
   __ call_from_interpreter(Rscratch, Gargs, Rret);
 }
 
@@ -3066,6 +3068,7 @@
 
   // do the call
   __ profile_call(O4);
+  __ profile_arguments_type(G5_method, Rscratch, Gargs, false);
   __ call_from_interpreter(Rscratch, Gargs, Rret);
 }
 
@@ -3091,6 +3094,7 @@
   // do the call - the index (f2) contains the Method*
   assert_different_registers(G5_method, Gargs, Rcall);
   __ mov(Rindex, G5_method);
+  __ profile_arguments_type(G5_method, Rcall, Gargs, true);
   __ call_from_interpreter(Rcall, Gargs, Rret);
   __ bind(notFinal);
 
@@ -3197,6 +3201,7 @@
   Register Rcall = Rinterface;
   assert_different_registers(Rcall, G5_method, Gargs, Rret);
 
+  __ profile_arguments_type(G5_method, Rcall, Gargs, true);
   __ call_from_interpreter(Rcall, Gargs, Rret);
 }
 
@@ -3226,6 +3231,7 @@
   // do the call
   __ verify_oop(G4_mtype);
   __ profile_final_call(O4);  // FIXME: profile the LambdaForm also
+  __ profile_arguments_type(G5_method, Rscratch, Gargs, true);
   __ call_from_interpreter(Rscratch, Gargs, Rret);
 }
 
@@ -3262,6 +3268,7 @@
 
   // do the call
   __ verify_oop(G4_callsite);
+  __ profile_arguments_type(G5_method, Rscratch, Gargs, false);
   __ call_from_interpreter(Rscratch, Gargs, Rret);
 }
 
--- a/src/cpu/x86/vm/interp_masm_x86.cpp	Tue Jan 14 12:44:12 2014 +0100
+++ b/src/cpu/x86/vm/interp_masm_x86.cpp	Tue Jan 14 14:51:47 2014 +0100
@@ -127,7 +127,7 @@
 
       if (MethodData::profile_return()) {
         // We're right after the type profile for the last
-        // argument. tmp is the number of cell left in the
+        // argument. tmp is the number of cells left in the
         // CallTypeData/VirtualCallTypeData to reach its end. Non null
         // if there's a return to profile.
         assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
@@ -198,7 +198,7 @@
     // parameters. Collect profiling from last parameter down.
     // mdo start + parameters offset + array length - 1
     addptr(mdp, tmp1);
-    movptr(tmp1, Address(mdp, in_bytes(ArrayData::array_len_offset())));
+    movptr(tmp1, Address(mdp, ArrayData::array_len_offset()));
     decrement(tmp1, TypeStackSlotEntries::per_arg_count());
 
     Label loop;
--- a/src/share/vm/c1/c1_LIRGenerator.cpp	Tue Jan 14 12:44:12 2014 +0100
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp	Tue Jan 14 14:51:47 2014 +0100
@@ -3288,7 +3288,10 @@
   ciSignature* signature_at_call = NULL;
   x->method()->get_method_at_bci(bci, ignored_will_link, &signature_at_call);
 
-  ciKlass* exact = profile_type(md, 0, md->byte_offset_of_slot(data, ret->type_offset()),
+  // The offset within the MDO of the entry to update may be too large
+  // to be used in load/store instructions on some platforms. So have
+  // profile_type() compute the address of the profile in a register.
+  ciKlass* exact = profile_type(md, md->byte_offset_of_slot(data, ret->type_offset()), 0,
                                 ret->type(), x->ret(), mdp,
                                 !x->needs_null_check(),
                                 signature_at_call->return_type()->as_klass(),
--- a/src/share/vm/runtime/arguments.cpp	Tue Jan 14 12:44:12 2014 +0100
+++ b/src/share/vm/runtime/arguments.cpp	Tue Jan 14 14:51:47 2014 +0100
@@ -3727,10 +3727,6 @@
     // Doing the replace in parent maps helps speculation
     FLAG_SET_DEFAULT(ReplaceInParentMaps, true);
   }
-#ifndef X86
-  // Only on x86 for now
-  FLAG_SET_DEFAULT(TypeProfileLevel, 0);
-#endif
 #endif
 
   if (PrintAssembly && FLAG_IS_DEFAULT(DebugNonSafepoints)) {