view src/cpu/ppc/vm/interp_masm_ppc_64.cpp @ 20304:a22acf6d7598

8048112: G1 Full GC needs to support the case when the very first region is not available Summary: Refactor preparation for compaction during Full GC so that it lazily initializes the first compaction point. This also avoids problems later when the first region may not be committed. Also reviewed by K. Barrett. Reviewed-by: brutisso
author tschatzl
date Mon, 21 Jul 2014 10:00:31 +0200
parents fd1b9f02cc91
children 92aa6797d639 b384ba33c9a0
line wrap: on
line source

/*
 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
 * Copyright 2012, 2014 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */


#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "interp_masm_ppc_64.hpp"
#include "interpreter/interpreterRuntime.hpp"
#include "prims/jvmtiThreadState.hpp"

#ifdef PRODUCT
#define BLOCK_COMMENT(str) // nothing
#else
#define BLOCK_COMMENT(str) block_comment(str)
#endif

void InterpreterMacroAssembler::null_check_throw(Register a, int offset, Register temp_reg) {
#ifdef CC_INTERP
  address exception_entry = StubRoutines::throw_NullPointerException_at_call_entry();
#else
  address exception_entry = Interpreter::throw_NullPointerException_entry();
#endif
  MacroAssembler::null_check_throw(a, offset, temp_reg, exception_entry);
}

void InterpreterMacroAssembler::branch_to_entry(address entry, Register Rscratch) {
  assert(entry, "Entry must have been generated by now");
  if (is_within_range_of_b(entry, pc())) {
    b(entry);
  } else {
    load_const_optimized(Rscratch, entry, R0);
    mtctr(Rscratch);
    bctr();
  }
}

#ifndef CC_INTERP

void InterpreterMacroAssembler::dispatch_next(TosState state, int bcp_incr) {
  Register bytecode = R12_scratch2;
  if (bcp_incr != 0) {
    lbzu(bytecode, bcp_incr, R14_bcp);
  } else {
    lbz(bytecode, 0, R14_bcp);
  }

  dispatch_Lbyte_code(state, bytecode, Interpreter::dispatch_table(state));
}

void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
  // Load current bytecode.
  Register bytecode = R12_scratch2;
  lbz(bytecode, 0, R14_bcp);
  dispatch_Lbyte_code(state, bytecode, table);
}

// Dispatch code executed in the prolog of a bytecode which does not do it's
// own dispatch. The dispatch address is computed and placed in R24_dispatch_addr.
void InterpreterMacroAssembler::dispatch_prolog(TosState state, int bcp_incr) {
  Register bytecode = R12_scratch2;
  lbz(bytecode, bcp_incr, R14_bcp);

  load_dispatch_table(R24_dispatch_addr, Interpreter::dispatch_table(state));

  sldi(bytecode, bytecode, LogBytesPerWord);
  ldx(R24_dispatch_addr, R24_dispatch_addr, bytecode);
}

// Dispatch code executed in the epilog of a bytecode which does not do it's
// own dispatch. The dispatch address in R24_dispatch_addr is used for the
// dispatch.
void InterpreterMacroAssembler::dispatch_epilog(TosState state, int bcp_incr) {
  mtctr(R24_dispatch_addr);
  addi(R14_bcp, R14_bcp, bcp_incr);
  bctr();
}

void InterpreterMacroAssembler::check_and_handle_popframe(Register scratch_reg) {
  assert(scratch_reg != R0, "can't use R0 as scratch_reg here");
  if (JvmtiExport::can_pop_frame()) {
    Label L;

    // Check the "pending popframe condition" flag in the current thread.
    lwz(scratch_reg, in_bytes(JavaThread::popframe_condition_offset()), R16_thread);

    // Initiate popframe handling only if it is not already being
    // processed. If the flag has the popframe_processing bit set, it
    // means that this code is called *during* popframe handling - we
    // don't want to reenter.
    andi_(R0, scratch_reg, JavaThread::popframe_pending_bit);
    beq(CCR0, L);

    andi_(R0, scratch_reg, JavaThread::popframe_processing_bit);
    bne(CCR0, L);

    // Call the Interpreter::remove_activation_preserving_args_entry()
    // func to get the address of the same-named entrypoint in the
    // generated interpreter code.
    call_c(CAST_FROM_FN_PTR(FunctionDescriptor*,
                            Interpreter::remove_activation_preserving_args_entry),
           relocInfo::none);

    // Jump to Interpreter::_remove_activation_preserving_args_entry.
    mtctr(R3_RET);
    bctr();

    align(32, 12);
    bind(L);
  }
}

void InterpreterMacroAssembler::check_and_handle_earlyret(Register scratch_reg) {
  const Register Rthr_state_addr = scratch_reg;
  if (JvmtiExport::can_force_early_return()) {
    Label Lno_early_ret;
    ld(Rthr_state_addr, in_bytes(JavaThread::jvmti_thread_state_offset()), R16_thread);
    cmpdi(CCR0, Rthr_state_addr, 0);
    beq(CCR0, Lno_early_ret);

    lwz(R0, in_bytes(JvmtiThreadState::earlyret_state_offset()), Rthr_state_addr);
    cmpwi(CCR0, R0, JvmtiThreadState::earlyret_pending);
    bne(CCR0, Lno_early_ret);

    // Jump to Interpreter::_earlyret_entry.
    lwz(R3_ARG1, in_bytes(JvmtiThreadState::earlyret_tos_offset()), Rthr_state_addr);
    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry));
    mtlr(R3_RET);
    blr();

    align(32, 12);
    bind(Lno_early_ret);
  }
}

void InterpreterMacroAssembler::load_earlyret_value(TosState state, Register Rscratch1) {
  const Register RjvmtiState = Rscratch1;
  const Register Rscratch2   = R0;

  ld(RjvmtiState, in_bytes(JavaThread::jvmti_thread_state_offset()), R16_thread);
  li(Rscratch2, 0);

  switch (state) {
    case atos: ld(R17_tos, in_bytes(JvmtiThreadState::earlyret_oop_offset()), RjvmtiState);
               std(Rscratch2, in_bytes(JvmtiThreadState::earlyret_oop_offset()), RjvmtiState);
               break;
    case ltos: ld(R17_tos, in_bytes(JvmtiThreadState::earlyret_value_offset()), RjvmtiState);
               break;
    case btos: // fall through
    case ctos: // fall through
    case stos: // fall through
    case itos: lwz(R17_tos, in_bytes(JvmtiThreadState::earlyret_value_offset()), RjvmtiState);
               break;
    case ftos: lfs(F15_ftos, in_bytes(JvmtiThreadState::earlyret_value_offset()), RjvmtiState);
               break;
    case dtos: lfd(F15_ftos, in_bytes(JvmtiThreadState::earlyret_value_offset()), RjvmtiState);
               break;
    case vtos: break;
    default  : ShouldNotReachHere();
  }

  // Clean up tos value in the jvmti thread state.
  std(Rscratch2, in_bytes(JvmtiThreadState::earlyret_value_offset()), RjvmtiState);
  // Set tos state field to illegal value.
  li(Rscratch2, ilgl);
  stw(Rscratch2, in_bytes(JvmtiThreadState::earlyret_tos_offset()), RjvmtiState);
}

// Common code to dispatch and dispatch_only.
// Dispatch value in Lbyte_code and increment Lbcp.

void InterpreterMacroAssembler::load_dispatch_table(Register dst, address* table) {
  address table_base = (address)Interpreter::dispatch_table((TosState)0);
  intptr_t table_offs = (intptr_t)table - (intptr_t)table_base;
  if (is_simm16(table_offs)) {
    addi(dst, R25_templateTableBase, (int)table_offs);
  } else {
    load_const_optimized(dst, table, R0);
  }
}

void InterpreterMacroAssembler::dispatch_Lbyte_code(TosState state, Register bytecode, address* table, bool verify) {
  if (verify) {
    unimplemented("dispatch_Lbyte_code: verify"); // See Sparc Implementation to implement this
  }

#ifdef FAST_DISPATCH
  unimplemented("dispatch_Lbyte_code FAST_DISPATCH");
#else
  assert_different_registers(bytecode, R11_scratch1);

  // Calc dispatch table address.
  load_dispatch_table(R11_scratch1, table);

  sldi(R12_scratch2, bytecode, LogBytesPerWord);
  ldx(R11_scratch1, R11_scratch1, R12_scratch2);

  // Jump off!
  mtctr(R11_scratch1);
  bctr();
#endif
}

void InterpreterMacroAssembler::load_receiver(Register Rparam_count, Register Rrecv_dst) {
  sldi(Rrecv_dst, Rparam_count, Interpreter::logStackElementSize);
  ldx(Rrecv_dst, Rrecv_dst, R15_esp);
}

// helpers for expression stack

void InterpreterMacroAssembler::pop_i(Register r) {
  lwzu(r, Interpreter::stackElementSize, R15_esp);
}

void InterpreterMacroAssembler::pop_ptr(Register r) {
  ldu(r, Interpreter::stackElementSize, R15_esp);
}

void InterpreterMacroAssembler::pop_l(Register r) {
  ld(r, Interpreter::stackElementSize, R15_esp);
  addi(R15_esp, R15_esp, 2 * Interpreter::stackElementSize);
}

void InterpreterMacroAssembler::pop_f(FloatRegister f) {
  lfsu(f, Interpreter::stackElementSize, R15_esp);
}

void InterpreterMacroAssembler::pop_d(FloatRegister f) {
  lfd(f, Interpreter::stackElementSize, R15_esp);
  addi(R15_esp, R15_esp, 2 * Interpreter::stackElementSize);
}

void InterpreterMacroAssembler::push_i(Register r) {
  stw(r, 0, R15_esp);
  addi(R15_esp, R15_esp, - Interpreter::stackElementSize );
}

void InterpreterMacroAssembler::push_ptr(Register r) {
  std(r, 0, R15_esp);
  addi(R15_esp, R15_esp, - Interpreter::stackElementSize );
}

void InterpreterMacroAssembler::push_l(Register r) {
  std(r, - Interpreter::stackElementSize, R15_esp);
  addi(R15_esp, R15_esp, - 2 * Interpreter::stackElementSize );
}

void InterpreterMacroAssembler::push_f(FloatRegister f) {
  stfs(f, 0, R15_esp);
  addi(R15_esp, R15_esp, - Interpreter::stackElementSize );
}

void InterpreterMacroAssembler::push_d(FloatRegister f)   {
  stfd(f, - Interpreter::stackElementSize, R15_esp);
  addi(R15_esp, R15_esp, - 2 * Interpreter::stackElementSize );
}

void InterpreterMacroAssembler::push_2ptrs(Register first, Register second) {
  std(first, 0, R15_esp);
  std(second, -Interpreter::stackElementSize, R15_esp);
  addi(R15_esp, R15_esp, - 2 * Interpreter::stackElementSize );
}

void InterpreterMacroAssembler::push_l_pop_d(Register l, FloatRegister d) {
  std(l, 0, R15_esp);
  lfd(d, 0, R15_esp);
}

void InterpreterMacroAssembler::push_d_pop_l(FloatRegister d, Register l) {
  stfd(d, 0, R15_esp);
  ld(l, 0, R15_esp);
}

void InterpreterMacroAssembler::push(TosState state) {
  switch (state) {
    case atos: push_ptr();                break;
    case btos:
    case ctos:
    case stos:
    case itos: push_i();                  break;
    case ltos: push_l();                  break;
    case ftos: push_f();                  break;
    case dtos: push_d();                  break;
    case vtos: /* nothing to do */        break;
    default  : ShouldNotReachHere();
  }
}

void InterpreterMacroAssembler::pop(TosState state) {
  switch (state) {
    case atos: pop_ptr();            break;
    case btos:
    case ctos:
    case stos:
    case itos: pop_i();              break;
    case ltos: pop_l();              break;
    case ftos: pop_f();              break;
    case dtos: pop_d();              break;
    case vtos: /* nothing to do */   break;
    default  : ShouldNotReachHere();
  }
  verify_oop(R17_tos, state);
}

void InterpreterMacroAssembler::empty_expression_stack() {
  addi(R15_esp, R26_monitor, - Interpreter::stackElementSize);
}

void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(int         bcp_offset,
                                                          Register    Rdst,
                                                          signedOrNot is_signed) {
  // Read Java big endian format.
  if (is_signed == Signed) {
    lha(Rdst, bcp_offset, R14_bcp);
  } else {
    lhz(Rdst, bcp_offset, R14_bcp);
  }
#if 0
  assert(Rtmp != Rdst, "need separate temp register");
  Register Rfirst = Rtmp;
  lbz(Rfirst, bcp_offset, R14_bcp); // first byte
  lbz(Rdst, bcp_offset+1, R14_bcp); // second byte

  // Rdst = ((Rfirst<<8) & 0xFF00) | (Rdst &~ 0xFF00)
  rldimi(/*RA=*/Rdst, /*RS=*/Rfirst, /*sh=*/8, /*mb=*/48);
  if (is_signed == Signed) {
    extsh(Rdst, Rdst);
  }
#endif
}

void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(int         bcp_offset,
                                                          Register    Rdst,
                                                          signedOrNot is_signed) {
  // Read Java big endian format.
  if (bcp_offset & 3) { // Offset unaligned?
    load_const_optimized(Rdst, bcp_offset);
    if (is_signed == Signed) {
      lwax(Rdst, R14_bcp, Rdst);
    } else {
      lwzx(Rdst, R14_bcp, Rdst);
    }
  } else {
    if (is_signed == Signed) {
      lwa(Rdst, bcp_offset, R14_bcp);
    } else {
      lwz(Rdst, bcp_offset, R14_bcp);
    }
  }
}

// Load the constant pool cache index from the bytecode stream.
//
// Kills / writes:
//   - Rdst, Rscratch
void InterpreterMacroAssembler::get_cache_index_at_bcp(Register Rdst, int bcp_offset, size_t index_size) {
  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
  if (index_size == sizeof(u2)) {
    get_2_byte_integer_at_bcp(bcp_offset, Rdst, Unsigned);
  } else if (index_size == sizeof(u4)) {
    assert(EnableInvokeDynamic, "giant index used only for JSR 292");
    get_4_byte_integer_at_bcp(bcp_offset, Rdst, Signed);
    assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
    nand(Rdst, Rdst, Rdst); // convert to plain index
  } else if (index_size == sizeof(u1)) {
    lbz(Rdst, bcp_offset, R14_bcp);
  } else {
    ShouldNotReachHere();
  }
  // Rdst now contains cp cache index.
}

void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, int bcp_offset, size_t index_size) {
  get_cache_index_at_bcp(cache, bcp_offset, index_size);
  sldi(cache, cache, exact_log2(in_words(ConstantPoolCacheEntry::size()) * BytesPerWord));
  add(cache, R27_constPoolCache, cache);
}

// Load object from cpool->resolved_references(index).
void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index) {
  assert_different_registers(result, index);
  get_constant_pool(result);

  // Convert from field index to resolved_references() index and from
  // word index to byte offset. Since this is a java object, it can be compressed.
  Register tmp = index;  // reuse
  sldi(tmp, index, LogBytesPerHeapOop);
  // Load pointer for resolved_references[] objArray.
  ld(result, ConstantPool::resolved_references_offset_in_bytes(), result);
  // JNIHandles::resolve(result)
  ld(result, 0, result);
#ifdef ASSERT
  Label index_ok;
  lwa(R0, arrayOopDesc::length_offset_in_bytes(), result);
  sldi(R0, R0, LogBytesPerHeapOop);
  cmpd(CCR0, tmp, R0);
  blt(CCR0, index_ok);
  stop("resolved reference index out of bounds", 0x09256);
  bind(index_ok);
#endif
  // Add in the index.
  add(result, tmp, result);
  load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result);
}

// Generate a subtype check: branch to ok_is_subtype if sub_klass is
// a subtype of super_klass. Blows registers Rsub_klass, tmp1, tmp2.
void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, Register Rsuper_klass, Register Rtmp1,
                                                  Register Rtmp2, Register Rtmp3, Label &ok_is_subtype) {
  // Profile the not-null value's klass.
  profile_typecheck(Rsub_klass, Rtmp1, Rtmp2);
  check_klass_subtype(Rsub_klass, Rsuper_klass, Rtmp1, Rtmp2, ok_is_subtype);
  profile_typecheck_failed(Rtmp1, Rtmp2);
}

void InterpreterMacroAssembler::generate_stack_overflow_check_with_compare_and_throw(Register Rmem_frame_size, Register Rscratch1) {
  Label done;
  sub(Rmem_frame_size, R1_SP, Rmem_frame_size);
  ld(Rscratch1, thread_(stack_overflow_limit));
  cmpld(CCR0/*is_stack_overflow*/, Rmem_frame_size, Rscratch1);
  bgt(CCR0/*is_stack_overflow*/, done);

  // Load target address of the runtime stub.
  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "generated in wrong order");
  load_const_optimized(Rscratch1, (StubRoutines::throw_StackOverflowError_entry()), R0);
  mtctr(Rscratch1);
  // Restore caller_sp.
#ifdef ASSERT
  ld(Rscratch1, 0, R1_SP);
  ld(R0, 0, R21_sender_SP);
  cmpd(CCR0, R0, Rscratch1);
  asm_assert_eq("backlink", 0x547);
#endif // ASSERT
  mr(R1_SP, R21_sender_SP);
  bctr();

  align(32, 12);
  bind(done);
}

// Separate these two to allow for delay slot in middle.
// These are used to do a test and full jump to exception-throwing code.

// Check that index is in range for array, then shift index by index_shift,
// and put arrayOop + shifted_index into res.
// Note: res is still shy of address by array offset into object.

void InterpreterMacroAssembler::index_check_without_pop(Register Rarray, Register Rindex, int index_shift, Register Rtmp, Register Rres) {
  // Check that index is in range for array, then shift index by index_shift,
  // and put arrayOop + shifted_index into res.
  // Note: res is still shy of address by array offset into object.
  // Kills:
  //   - Rindex
  // Writes:
  //   - Rres: Address that corresponds to the array index if check was successful.
  verify_oop(Rarray);
  const Register Rlength   = R0;
  const Register RsxtIndex = Rtmp;
  Label LisNull, LnotOOR;

  // Array nullcheck
  if (!ImplicitNullChecks) {
    cmpdi(CCR0, Rarray, 0);
    beq(CCR0, LisNull);
  } else {
    null_check_throw(Rarray, arrayOopDesc::length_offset_in_bytes(), /*temp*/RsxtIndex);
  }

  // Rindex might contain garbage in upper bits (remember that we don't sign extend
  // during integer arithmetic operations). So kill them and put value into same register
  // where ArrayIndexOutOfBounds would expect the index in.
  rldicl(RsxtIndex, Rindex, 0, 32); // zero extend 32 bit -> 64 bit

  // Index check
  lwz(Rlength, arrayOopDesc::length_offset_in_bytes(), Rarray);
  cmplw(CCR0, Rindex, Rlength);
  sldi(RsxtIndex, RsxtIndex, index_shift);
  blt(CCR0, LnotOOR);
  load_dispatch_table(Rtmp, (address*)Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
  mtctr(Rtmp);
  bctr();

  if (!ImplicitNullChecks) {
    bind(LisNull);
    load_dispatch_table(Rtmp, (address*)Interpreter::_throw_NullPointerException_entry);
    mtctr(Rtmp);
    bctr();
  }

  align(32, 16);
  bind(LnotOOR);

  // Calc address
  add(Rres, RsxtIndex, Rarray);
}

void InterpreterMacroAssembler::index_check(Register array, Register index, int index_shift, Register tmp, Register res) {
  // pop array
  pop_ptr(array);

  // check array
  index_check_without_pop(array, index, index_shift, tmp, res);
}

void InterpreterMacroAssembler::get_const(Register Rdst) {
  ld(Rdst, in_bytes(Method::const_offset()), R19_method);
}

void InterpreterMacroAssembler::get_constant_pool(Register Rdst) {
  get_const(Rdst);
  ld(Rdst, in_bytes(ConstMethod::constants_offset()), Rdst);
}

void InterpreterMacroAssembler::get_constant_pool_cache(Register Rdst) {
  get_constant_pool(Rdst);
  ld(Rdst, ConstantPool::cache_offset_in_bytes(), Rdst);
}

void InterpreterMacroAssembler::get_cpool_and_tags(Register Rcpool, Register Rtags) {
  get_constant_pool(Rcpool);
  ld(Rtags, ConstantPool::tags_offset_in_bytes(), Rcpool);
}

// Unlock if synchronized method.
//
// Unlock the receiver if this is a synchronized method.
// Unlock any Java monitors from synchronized blocks.
//
// If there are locked Java monitors
//   If throw_monitor_exception
//     throws IllegalMonitorStateException
//   Else if install_monitor_exception
//     installs IllegalMonitorStateException
//   Else
//     no error processing
void InterpreterMacroAssembler::unlock_if_synchronized_method(TosState state,
                                                              bool throw_monitor_exception,
                                                              bool install_monitor_exception) {
  Label Lunlocked, Lno_unlock;
  {
    Register Rdo_not_unlock_flag = R11_scratch1;
    Register Raccess_flags       = R12_scratch2;

    // Check if synchronized method or unlocking prevented by
    // JavaThread::do_not_unlock_if_synchronized flag.
    lbz(Rdo_not_unlock_flag, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()), R16_thread);
    lwz(Raccess_flags, in_bytes(Method::access_flags_offset()), R19_method);
    li(R0, 0);
    stb(R0, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()), R16_thread); // reset flag

    push(state);

    // Skip if we don't have to unlock.
    rldicl_(R0, Raccess_flags, 64-JVM_ACC_SYNCHRONIZED_BIT, 63); // Extract bit and compare to 0.
    beq(CCR0, Lunlocked);

    cmpwi(CCR0, Rdo_not_unlock_flag, 0);
    bne(CCR0, Lno_unlock);
  }

  // Unlock
  {
    Register Rmonitor_base = R11_scratch1;

    Label Lunlock;
    // If it's still locked, everything is ok, unlock it.
    ld(Rmonitor_base, 0, R1_SP);
    addi(Rmonitor_base, Rmonitor_base, - (frame::ijava_state_size + frame::interpreter_frame_monitor_size_in_bytes())); // Monitor base

    ld(R0, BasicObjectLock::obj_offset_in_bytes(), Rmonitor_base);
    cmpdi(CCR0, R0, 0);
    bne(CCR0, Lunlock);

    // If it's already unlocked, throw exception.
    if (throw_monitor_exception) {
      call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception));
      should_not_reach_here();
    } else {
      if (install_monitor_exception) {
        call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception));
        b(Lunlocked);
      }
    }

    bind(Lunlock);
    unlock_object(Rmonitor_base);
  }

  // Check that all other monitors are unlocked. Throw IllegelMonitorState exception if not.
  bind(Lunlocked);
  {
    Label Lexception, Lrestart;
    Register Rcurrent_obj_addr = R11_scratch1;
    const int delta = frame::interpreter_frame_monitor_size_in_bytes();
    assert((delta & LongAlignmentMask) == 0, "sizeof BasicObjectLock must be even number of doublewords");

    bind(Lrestart);
    // Set up search loop: Calc num of iterations.
    {
      Register Riterations = R12_scratch2;
      Register Rmonitor_base = Rcurrent_obj_addr;
      ld(Rmonitor_base, 0, R1_SP);
      addi(Rmonitor_base, Rmonitor_base, - frame::ijava_state_size);  // Monitor base

      subf_(Riterations, R26_monitor, Rmonitor_base);
      ble(CCR0, Lno_unlock);

      addi(Rcurrent_obj_addr, Rmonitor_base, BasicObjectLock::obj_offset_in_bytes() - frame::interpreter_frame_monitor_size_in_bytes());
      // Check if any monitor is on stack, bail out if not
      srdi(Riterations, Riterations, exact_log2(delta));
      mtctr(Riterations);
    }

    // The search loop: Look for locked monitors.
    {
      const Register Rcurrent_obj = R0;
      Label Lloop;

      ld(Rcurrent_obj, 0, Rcurrent_obj_addr);
      addi(Rcurrent_obj_addr, Rcurrent_obj_addr, -delta);
      bind(Lloop);

      // Check if current entry is used.
      cmpdi(CCR0, Rcurrent_obj, 0);
      bne(CCR0, Lexception);
      // Preload next iteration's compare value.
      ld(Rcurrent_obj, 0, Rcurrent_obj_addr);
      addi(Rcurrent_obj_addr, Rcurrent_obj_addr, -delta);
      bdnz(Lloop);
    }
    // Fell through: Everything's unlocked => finish.
    b(Lno_unlock);

    // An object is still locked => need to throw exception.
    bind(Lexception);
    if (throw_monitor_exception) {
      call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception));
      should_not_reach_here();
    } else {
      // Stack unrolling. Unlock object and if requested, install illegal_monitor_exception.
      // Unlock does not block, so don't have to worry about the frame.
      Register Rmonitor_addr = R11_scratch1;
      addi(Rmonitor_addr, Rcurrent_obj_addr, -BasicObjectLock::obj_offset_in_bytes() + delta);
      unlock_object(Rmonitor_addr);
      if (install_monitor_exception) {
        call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception));
      }
      b(Lrestart);
    }
  }

  align(32, 12);
  bind(Lno_unlock);
  pop(state);
}

// Support function for remove_activation & Co.
void InterpreterMacroAssembler::merge_frames(Register Rsender_sp, Register return_pc, Register Rscratch1, Register Rscratch2) {
  // Pop interpreter frame.
  ld(Rscratch1, 0, R1_SP); // *SP
  ld(Rsender_sp, _ijava_state_neg(sender_sp), Rscratch1); // top_frame_sp
  ld(Rscratch2, 0, Rscratch1); // **SP
#ifdef ASSERT
  {
    Label Lok;
    ld(R0, _ijava_state_neg(ijava_reserved), Rscratch1);
    cmpdi(CCR0, R0, 0x5afe);
    beq(CCR0, Lok);
    stop("frame corrupted (remove activation)", 0x5afe);
    bind(Lok);
  }
#endif
  if (return_pc!=noreg) {
    ld(return_pc, _abi(lr), Rscratch1); // LR
  }

  // Merge top frames.
  subf(Rscratch1, R1_SP, Rsender_sp); // top_frame_sp - SP
  stdux(Rscratch2, R1_SP, Rscratch1); // atomically set *(SP = top_frame_sp) = **SP
}

// Remove activation.
//
// Unlock the receiver if this is a synchronized method.
// Unlock any Java monitors from synchronized blocks.
// Remove the activation from the stack.
//
// If there are locked Java monitors
//    If throw_monitor_exception
//       throws IllegalMonitorStateException
//    Else if install_monitor_exception
//       installs IllegalMonitorStateException
//    Else
//       no error processing
void InterpreterMacroAssembler::remove_activation(TosState state,
                                                  bool throw_monitor_exception,
                                                  bool install_monitor_exception) {
  unlock_if_synchronized_method(state, throw_monitor_exception, install_monitor_exception);

  // Save result (push state before jvmti call and pop it afterwards) and notify jvmti.
  notify_method_exit(false, state, NotifyJVMTI, true);

  verify_oop(R17_tos, state);
  verify_thread();

  merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ R0, R11_scratch1, R12_scratch2);
  mtlr(R0);
}

#endif // !CC_INTERP

// Lock object
//
// Registers alive
//   monitor - Address of the BasicObjectLock to be used for locking,
//             which must be initialized with the object to lock.
//   object  - Address of the object to be locked.
//
void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
  if (UseHeavyMonitors) {
    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
            monitor, /*check_for_exceptions=*/true CC_INTERP_ONLY(&& false));
  } else {
    // template code:
    //
    // markOop displaced_header = obj->mark().set_unlocked();
    // monitor->lock()->set_displaced_header(displaced_header);
    // if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
    //   // We stored the monitor address into the object's mark word.
    // } else if (THREAD->is_lock_owned((address)displaced_header))
    //   // Simple recursive case.
    //   monitor->lock()->set_displaced_header(NULL);
    // } else {
    //   // Slow path.
    //   InterpreterRuntime::monitorenter(THREAD, monitor);
    // }

    const Register displaced_header = R7_ARG5;
    const Register object_mark_addr = R8_ARG6;
    const Register current_header   = R9_ARG7;
    const Register tmp              = R10_ARG8;

    Label done;
    Label cas_failed, slow_case;

    assert_different_registers(displaced_header, object_mark_addr, current_header, tmp);

    // markOop displaced_header = obj->mark().set_unlocked();

    // Load markOop from object into displaced_header.
    ld(displaced_header, oopDesc::mark_offset_in_bytes(), object);

    if (UseBiasedLocking) {
      biased_locking_enter(CCR0, object, displaced_header, tmp, current_header, done, &slow_case);
    }

    // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
    ori(displaced_header, displaced_header, markOopDesc::unlocked_value);

    // monitor->lock()->set_displaced_header(displaced_header);

    // Initialize the box (Must happen before we update the object mark!).
    std(displaced_header, BasicObjectLock::lock_offset_in_bytes() +
        BasicLock::displaced_header_offset_in_bytes(), monitor);

    // if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {

    // Store stack address of the BasicObjectLock (this is monitor) into object.
    addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes());

    // Must fence, otherwise, preceding store(s) may float below cmpxchg.
    // CmpxchgX sets CCR0 to cmpX(current, displaced).
    fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
    cmpxchgd(/*flag=*/CCR0,
             /*current_value=*/current_header,
             /*compare_value=*/displaced_header, /*exchange_value=*/monitor,
             /*where=*/object_mark_addr,
             MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
             MacroAssembler::cmpxchgx_hint_acquire_lock(),
             noreg,
             &cas_failed);

    // If the compare-and-exchange succeeded, then we found an unlocked
    // object and we have now locked it.
    b(done);
    bind(cas_failed);

    // } else if (THREAD->is_lock_owned((address)displaced_header))
    //   // Simple recursive case.
    //   monitor->lock()->set_displaced_header(NULL);

    // We did not see an unlocked object so try the fast recursive case.

    // Check if owner is self by comparing the value in the markOop of object
    // (current_header) with the stack pointer.
    sub(current_header, current_header, R1_SP);

    assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
    load_const_optimized(tmp,
                         (address) (~(os::vm_page_size()-1) |
                                    markOopDesc::lock_mask_in_place));

    and_(R0/*==0?*/, current_header, tmp);
    // If condition is true we are done and hence we can store 0 in the displaced
    // header indicating it is a recursive lock.
    bne(CCR0, slow_case);
    release();
    std(R0/*==0!*/, BasicObjectLock::lock_offset_in_bytes() +
        BasicLock::displaced_header_offset_in_bytes(), monitor);
    b(done);

    // } else {
    //   // Slow path.
    //   InterpreterRuntime::monitorenter(THREAD, monitor);

    // None of the above fast optimizations worked so we have to get into the
    // slow case of monitor enter.
    bind(slow_case);
    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
            monitor, /*check_for_exceptions=*/true CC_INTERP_ONLY(&& false));
    // }
    align(32, 12);
    bind(done);
  }
}

// Unlocks an object. Used in monitorexit bytecode and remove_activation.
//
// Registers alive
//   monitor - Address of the BasicObjectLock to be used for locking,
//             which must be initialized with the object to lock.
//
// Throw IllegalMonitorException if object is not locked by current thread.
void InterpreterMacroAssembler::unlock_object(Register monitor, bool check_for_exceptions) {
  if (UseHeavyMonitors) {
    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
            monitor, check_for_exceptions CC_INTERP_ONLY(&& false));
  } else {

    // template code:
    //
    // if ((displaced_header = monitor->displaced_header()) == NULL) {
    //   // Recursive unlock. Mark the monitor unlocked by setting the object field to NULL.
    //   monitor->set_obj(NULL);
    // } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) {
    //   // We swapped the unlocked mark in displaced_header into the object's mark word.
    //   monitor->set_obj(NULL);
    // } else {
    //   // Slow path.
    //   InterpreterRuntime::monitorexit(THREAD, monitor);
    // }

    const Register object           = R7_ARG5;
    const Register displaced_header = R8_ARG6;
    const Register object_mark_addr = R9_ARG7;
    const Register current_header   = R10_ARG8;

    Label free_slot;
    Label slow_case;

    assert_different_registers(object, displaced_header, object_mark_addr, current_header);

    if (UseBiasedLocking) {
      // The object address from the monitor is in object.
      ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor);
      assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
      biased_locking_exit(CCR0, object, displaced_header, free_slot);
    }

    // Test first if we are in the fast recursive case.
    ld(displaced_header, BasicObjectLock::lock_offset_in_bytes() +
           BasicLock::displaced_header_offset_in_bytes(), monitor);

    // If the displaced header is zero, we have a recursive unlock.
    cmpdi(CCR0, displaced_header, 0);
    beq(CCR0, free_slot); // recursive unlock

    // } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) {
    //   // We swapped the unlocked mark in displaced_header into the object's mark word.
    //   monitor->set_obj(NULL);

    // If we still have a lightweight lock, unlock the object and be done.

    // The object address from the monitor is in object.
    if (!UseBiasedLocking) { ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor); }
    addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes());

    // We have the displaced header in displaced_header. If the lock is still
    // lightweight, it will contain the monitor address and we'll store the
    // displaced header back into the object's mark word.
    // CmpxchgX sets CCR0 to cmpX(current, monitor).
    cmpxchgd(/*flag=*/CCR0,
             /*current_value=*/current_header,
             /*compare_value=*/monitor, /*exchange_value=*/displaced_header,
             /*where=*/object_mark_addr,
             MacroAssembler::MemBarRel,
             MacroAssembler::cmpxchgx_hint_release_lock(),
             noreg,
             &slow_case);
    b(free_slot);

    // } else {
    //   // Slow path.
    //   InterpreterRuntime::monitorexit(THREAD, monitor);

    // The lock has been converted into a heavy lock and hence
    // we need to get into the slow case.
    bind(slow_case);
    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
            monitor, check_for_exceptions CC_INTERP_ONLY(&& false));
    // }

    Label done;
    b(done); // Monitor register may be overwritten! Runtime has already freed the slot.

    // Exchange worked, do monitor->set_obj(NULL);
    align(32, 12);
    bind(free_slot);
    li(R0, 0);
    std(R0, BasicObjectLock::obj_offset_in_bytes(), monitor);
    bind(done);
  }
}

#ifndef CC_INTERP

// Load compiled (i2c) or interpreter entry when calling from interpreted and
// do the call. Centralized so that all interpreter calls will do the same actions.
// If jvmti single stepping is on for a thread we must not call compiled code.
//
// Input:
//   - Rtarget_method: method to call
//   - Rret_addr:      return address
//   - 2 scratch regs
//
void InterpreterMacroAssembler::call_from_interpreter(Register Rtarget_method, Register Rret_addr, Register Rscratch1, Register Rscratch2) {
  assert_different_registers(Rscratch1, Rscratch2, Rtarget_method, Rret_addr);
  // Assume we want to go compiled if available.
  const Register Rtarget_addr = Rscratch1;
  const Register Rinterp_only = Rscratch2;

  ld(Rtarget_addr, in_bytes(Method::from_interpreted_offset()), Rtarget_method);

  if (JvmtiExport::can_post_interpreter_events()) {
    lwz(Rinterp_only, in_bytes(JavaThread::interp_only_mode_offset()), R16_thread);

    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
    // compiled code in threads for which the event is enabled. Check here for
    // interp_only_mode if these events CAN be enabled.
    Label done;
    verify_thread();
    cmpwi(CCR0, Rinterp_only, 0);
    beq(CCR0, done);
    ld(Rtarget_addr, in_bytes(Method::interpreter_entry_offset()), Rtarget_method);
    align(32, 12);
    bind(done);
  }

#ifdef ASSERT
  {
    Label Lok;
    cmpdi(CCR0, Rtarget_addr, 0);
    bne(CCR0, Lok);
    stop("null entry point");
    bind(Lok);
  }
#endif // ASSERT

  mr(R21_sender_SP, R1_SP);

  // Calc a precise SP for the call. The SP value we calculated in
  // generate_fixed_frame() is based on the max_stack() value, so we would waste stack space
  // if esp is not max. Also, the i2c adapter extends the stack space without restoring
  // our pre-calced value, so repeating calls via i2c would result in stack overflow.
  // Since esp already points to an empty slot, we just have to sub 1 additional slot
  // to meet the abi scratch requirements.
  // The max_stack pointer will get restored by means of the GR_Lmax_stack local in
  // the return entry of the interpreter.
  addi(Rscratch2, R15_esp, Interpreter::stackElementSize - frame::abi_reg_args_size);
  clrrdi(Rscratch2, Rscratch2, exact_log2(frame::alignment_in_bytes)); // round towards smaller address
  resize_frame_absolute(Rscratch2, Rscratch2, R0);

  mr_if_needed(R19_method, Rtarget_method);
  mtctr(Rtarget_addr);
  mtlr(Rret_addr);

  save_interpreter_state(Rscratch2);
#ifdef ASSERT
  ld(Rscratch1, _ijava_state_neg(top_frame_sp), Rscratch2); // Rscratch2 contains fp
  cmpd(CCR0, R21_sender_SP, Rscratch1);
  asm_assert_eq("top_frame_sp incorrect", 0x951);
#endif

  bctr();
}

// Set the method data pointer for the current bcp.
void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
  assert(ProfileInterpreter, "must be profiling interpreter");
  Label get_continue;
  ld(R28_mdx, in_bytes(Method::method_data_offset()), R19_method);
  test_method_data_pointer(get_continue);
  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), R19_method, R14_bcp);

  addi(R28_mdx, R28_mdx, in_bytes(MethodData::data_offset()));
  add(R28_mdx, R28_mdx, R3_RET);
  bind(get_continue);
}

// Test ImethodDataPtr. If it is null, continue at the specified label.
void InterpreterMacroAssembler::test_method_data_pointer(Label& zero_continue) {
  assert(ProfileInterpreter, "must be profiling interpreter");
  cmpdi(CCR0, R28_mdx, 0);
  beq(CCR0, zero_continue);
}

void InterpreterMacroAssembler::verify_method_data_pointer() {
  assert(ProfileInterpreter, "must be profiling interpreter");
#ifdef ASSERT
  Label verify_continue;
  test_method_data_pointer(verify_continue);

  // If the mdp is valid, it will point to a DataLayout header which is
  // consistent with the bcp. The converse is highly probable also.
  lhz(R11_scratch1, in_bytes(DataLayout::bci_offset()), R28_mdx);
  ld(R12_scratch2, in_bytes(Method::const_offset()), R19_method);
  addi(R11_scratch1, R11_scratch1, in_bytes(ConstMethod::codes_offset()));
  add(R11_scratch1, R12_scratch2, R12_scratch2);
  cmpd(CCR0, R11_scratch1, R14_bcp);
  beq(CCR0, verify_continue);

  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp ), R19_method, R14_bcp, R28_mdx);

  bind(verify_continue);
#endif
}

void InterpreterMacroAssembler::test_invocation_counter_for_mdp(Register invocation_count,
                                                                Register Rscratch,
                                                                Label &profile_continue) {
  assert(ProfileInterpreter, "must be profiling interpreter");
  // Control will flow to "profile_continue" if the counter is less than the
  // limit or if we call profile_method().
  Label done;

  // If no method data exists, and the counter is high enough, make one.
  int ipl_offs = load_const_optimized(Rscratch, &InvocationCounter::InterpreterProfileLimit, R0, true);
  lwz(Rscratch, ipl_offs, Rscratch);

  cmpdi(CCR0, R28_mdx, 0);
  // Test to see if we should create a method data oop.
  cmpd(CCR1, Rscratch /* InterpreterProfileLimit */, invocation_count);
  bne(CCR0, done);
  bge(CCR1, profile_continue);

  // Build it now.
  call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
  set_method_data_pointer_for_bcp();
  b(profile_continue);

  align(32, 12);
  bind(done);
}

void InterpreterMacroAssembler::test_backedge_count_for_osr(Register backedge_count, Register branch_bcp, Register Rtmp) {
  assert_different_registers(backedge_count, Rtmp, branch_bcp);
  assert(UseOnStackReplacement,"Must UseOnStackReplacement to test_backedge_count_for_osr");

  Label did_not_overflow;
  Label overflow_with_error;

  int ibbl_offs = load_const_optimized(Rtmp, &InvocationCounter::InterpreterBackwardBranchLimit, R0, true);
  lwz(Rtmp, ibbl_offs, Rtmp);
  cmpw(CCR0, backedge_count, Rtmp);

  blt(CCR0, did_not_overflow);

  // When ProfileInterpreter is on, the backedge_count comes from the
  // methodDataOop, which value does not get reset on the call to
  // frequency_counter_overflow(). To avoid excessive calls to the overflow
  // routine while the method is being compiled, add a second test to make sure
  // the overflow function is called only once every overflow_frequency.
  if (ProfileInterpreter) {
    const int overflow_frequency = 1024;
    li(Rtmp, overflow_frequency-1);
    andr(Rtmp, Rtmp, backedge_count);
    cmpwi(CCR0, Rtmp, 0);
    bne(CCR0, did_not_overflow);
  }

  // Overflow in loop, pass branch bytecode.
  call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), branch_bcp, true);

  // Was an OSR adapter generated?
  // O0 = osr nmethod
  cmpdi(CCR0, R3_RET, 0);
  beq(CCR0, overflow_with_error);

  // Has the nmethod been invalidated already?
  lwz(Rtmp, nmethod::entry_bci_offset(), R3_RET);
  cmpwi(CCR0, Rtmp, InvalidOSREntryBci);
  beq(CCR0, overflow_with_error);

  // Migrate the interpreter frame off of the stack.
  // We can use all registers because we will not return to interpreter from this point.

  // Save nmethod.
  const Register osr_nmethod = R31;
  mr(osr_nmethod, R3_RET);
  set_top_ijava_frame_at_SP_as_last_Java_frame(R1_SP, R11_scratch1);
  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin), R16_thread);
  reset_last_Java_frame();
  // OSR buffer is in ARG1

  // Remove the interpreter frame.
  merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ R0, R11_scratch1, R12_scratch2);

  // Jump to the osr code.
  ld(R11_scratch1, nmethod::osr_entry_point_offset(), osr_nmethod);
  mtlr(R0);
  mtctr(R11_scratch1);
  bctr();

  align(32, 12);
  bind(overflow_with_error);
  bind(did_not_overflow);
}

// Store a value at some constant offset from the method data pointer.
void InterpreterMacroAssembler::set_mdp_data_at(int constant, Register value) {
  assert(ProfileInterpreter, "must be profiling interpreter");

  std(value, constant, R28_mdx);
}

// Increment the value at some constant offset from the method data pointer.
void InterpreterMacroAssembler::increment_mdp_data_at(int constant,
                                                      Register counter_addr,
                                                      Register Rbumped_count,
                                                      bool decrement) {
  // Locate the counter at a fixed offset from the mdp:
  addi(counter_addr, R28_mdx, constant);
  increment_mdp_data_at(counter_addr, Rbumped_count, decrement);
}

// Increment the value at some non-fixed (reg + constant) offset from
// the method data pointer.
void InterpreterMacroAssembler::increment_mdp_data_at(Register reg,
                                                      int constant,
                                                      Register scratch,
                                                      Register Rbumped_count,
                                                      bool decrement) {
  // Add the constant to reg to get the offset.
  add(scratch, R28_mdx, reg);
  // Then calculate the counter address.
  addi(scratch, scratch, constant);
  increment_mdp_data_at(scratch, Rbumped_count, decrement);
}

void InterpreterMacroAssembler::increment_mdp_data_at(Register counter_addr,
                                                      Register Rbumped_count,
                                                      bool decrement) {
  assert(ProfileInterpreter, "must be profiling interpreter");

  // Load the counter.
  ld(Rbumped_count, 0, counter_addr);

  if (decrement) {
    // Decrement the register. Set condition codes.
    addi(Rbumped_count, Rbumped_count, - DataLayout::counter_increment);
    // Store the decremented counter, if it is still negative.
    std(Rbumped_count, 0, counter_addr);
    // Note: add/sub overflow check are not ported, since 64 bit
    // calculation should never overflow.
  } else {
    // Increment the register. Set carry flag.
    addi(Rbumped_count, Rbumped_count, DataLayout::counter_increment);
    // Store the incremented counter.
    std(Rbumped_count, 0, counter_addr);
  }
}

// Set a flag value at the current method data pointer position.
void InterpreterMacroAssembler::set_mdp_flag_at(int flag_constant,
                                                Register scratch) {
  assert(ProfileInterpreter, "must be profiling interpreter");
  // Load the data header.
  lbz(scratch, in_bytes(DataLayout::flags_offset()), R28_mdx);
  // Set the flag.
  ori(scratch, scratch, flag_constant);
  // Store the modified header.
  stb(scratch, in_bytes(DataLayout::flags_offset()), R28_mdx);
}

// Test the location at some offset from the method data pointer.
// If it is not equal to value, branch to the not_equal_continue Label.
void InterpreterMacroAssembler::test_mdp_data_at(int offset,
                                                 Register value,
                                                 Label& not_equal_continue,
                                                 Register test_out) {
  assert(ProfileInterpreter, "must be profiling interpreter");

  ld(test_out, offset, R28_mdx);
  cmpd(CCR0,  value, test_out);
  bne(CCR0, not_equal_continue);
}

// Update the method data pointer by the displacement located at some fixed
// offset from the method data pointer.
void InterpreterMacroAssembler::update_mdp_by_offset(int offset_of_disp,
                                                     Register scratch) {
  assert(ProfileInterpreter, "must be profiling interpreter");

  ld(scratch, offset_of_disp, R28_mdx);
  add(R28_mdx, scratch, R28_mdx);
}

// Update the method data pointer by the displacement located at the
// offset (reg + offset_of_disp).
void InterpreterMacroAssembler::update_mdp_by_offset(Register reg,
                                                     int offset_of_disp,
                                                     Register scratch) {
  assert(ProfileInterpreter, "must be profiling interpreter");

  add(scratch, reg, R28_mdx);
  ld(scratch, offset_of_disp, scratch);
  add(R28_mdx, scratch, R28_mdx);
}

// Update the method data pointer by a simple constant displacement.
void InterpreterMacroAssembler::update_mdp_by_constant(int constant) {
  assert(ProfileInterpreter, "must be profiling interpreter");
  addi(R28_mdx, R28_mdx, constant);
}

// Update the method data pointer for a _ret bytecode whose target
// was not among our cached targets.
void InterpreterMacroAssembler::update_mdp_for_ret(TosState state,
                                                   Register return_bci) {
  assert(ProfileInterpreter, "must be profiling interpreter");

  push(state);
  assert(return_bci->is_nonvolatile(), "need to protect return_bci");
  call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), return_bci);
  pop(state);
}

// Increments the backedge counter.
// Returns backedge counter + invocation counter in Rdst.
void InterpreterMacroAssembler::increment_backedge_counter(const Register Rcounters, const Register Rdst,
                                                           const Register Rtmp1, Register Rscratch) {
  assert(UseCompiler, "incrementing must be useful");
  assert_different_registers(Rdst, Rtmp1);
  const Register invocation_counter = Rtmp1;
  const Register counter = Rdst;
  // TODO ppc port assert(4 == InvocationCounter::sz_counter(), "unexpected field size.");

  // Load backedge counter.
  lwz(counter, in_bytes(MethodCounters::backedge_counter_offset()) +
               in_bytes(InvocationCounter::counter_offset()), Rcounters);
  // Load invocation counter.
  lwz(invocation_counter, in_bytes(MethodCounters::invocation_counter_offset()) +
                          in_bytes(InvocationCounter::counter_offset()), Rcounters);

  // Add the delta to the backedge counter.
  addi(counter, counter, InvocationCounter::count_increment);

  // Mask the invocation counter.
  li(Rscratch, InvocationCounter::count_mask_value);
  andr(invocation_counter, invocation_counter, Rscratch);

  // Store new counter value.
  stw(counter, in_bytes(MethodCounters::backedge_counter_offset()) +
               in_bytes(InvocationCounter::counter_offset()), Rcounters);
  // Return invocation counter + backedge counter.
  add(counter, counter, invocation_counter);
}

// Count a taken branch in the bytecodes.
void InterpreterMacroAssembler::profile_taken_branch(Register scratch, Register bumped_count) {
  if (ProfileInterpreter) {
    Label profile_continue;

    // If no method data exists, go to profile_continue.
    test_method_data_pointer(profile_continue);

    // We are taking a branch. Increment the taken count.
    increment_mdp_data_at(in_bytes(JumpData::taken_offset()), scratch, bumped_count);

    // The method data pointer needs to be updated to reflect the new target.
    update_mdp_by_offset(in_bytes(JumpData::displacement_offset()), scratch);
    bind (profile_continue);
  }
}

// Count a not-taken branch in the bytecodes.
void InterpreterMacroAssembler::profile_not_taken_branch(Register scratch1, Register scratch2) {
  if (ProfileInterpreter) {
    Label profile_continue;

    // If no method data exists, go to profile_continue.
    test_method_data_pointer(profile_continue);

    // We are taking a branch. Increment the not taken count.
    increment_mdp_data_at(in_bytes(BranchData::not_taken_offset()), scratch1, scratch2);

    // The method data pointer needs to be updated to correspond to the
    // next bytecode.
    update_mdp_by_constant(in_bytes(BranchData::branch_data_size()));
    bind (profile_continue);
  }
}

// Count a non-virtual call in the bytecodes.
void InterpreterMacroAssembler::profile_call(Register scratch1, Register scratch2) {
  if (ProfileInterpreter) {
    Label profile_continue;

    // If no method data exists, go to profile_continue.
    test_method_data_pointer(profile_continue);

    // We are making a call. Increment the count.
    increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch1, scratch2);

    // The method data pointer needs to be updated to reflect the new target.
    update_mdp_by_constant(in_bytes(CounterData::counter_data_size()));
    bind (profile_continue);
  }
}

// Count a final call in the bytecodes.
void InterpreterMacroAssembler::profile_final_call(Register scratch1, Register scratch2) {
  if (ProfileInterpreter) {
    Label profile_continue;

    // If no method data exists, go to profile_continue.
    test_method_data_pointer(profile_continue);

    // We are making a call. Increment the count.
    increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch1, scratch2);

    // The method data pointer needs to be updated to reflect the new target.
    update_mdp_by_constant(in_bytes(VirtualCallData::virtual_call_data_size()));
    bind (profile_continue);
  }
}

// Count a virtual call in the bytecodes.
void InterpreterMacroAssembler::profile_virtual_call(Register Rreceiver,
                                                     Register Rscratch1,
                                                     Register Rscratch2,
                                                     bool receiver_can_be_null) {
  if (!ProfileInterpreter) { return; }
  Label profile_continue;

  // If no method data exists, go to profile_continue.
  test_method_data_pointer(profile_continue);

  Label skip_receiver_profile;
  if (receiver_can_be_null) {
    Label not_null;
    cmpdi(CCR0, Rreceiver, 0);
    bne(CCR0, not_null);
    // We are making a call. Increment the count for null receiver.
    increment_mdp_data_at(in_bytes(CounterData::count_offset()), Rscratch1, Rscratch2);
    b(skip_receiver_profile);
    bind(not_null);
  }

  // Record the receiver type.
  record_klass_in_profile(Rreceiver, Rscratch1, Rscratch2, true);
  bind(skip_receiver_profile);

  // The method data pointer needs to be updated to reflect the new target.
  update_mdp_by_constant(in_bytes(VirtualCallData::virtual_call_data_size()));
  bind (profile_continue);
}

void InterpreterMacroAssembler::profile_typecheck(Register Rklass, Register Rscratch1, Register Rscratch2) {
  if (ProfileInterpreter) {
    Label profile_continue;

    // If no method data exists, go to profile_continue.
    test_method_data_pointer(profile_continue);

    int mdp_delta = in_bytes(BitData::bit_data_size());
    if (TypeProfileCasts) {
      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());

      // Record the object type.
      record_klass_in_profile(Rklass, Rscratch1, Rscratch2, false);
    }

    // The method data pointer needs to be updated.
    update_mdp_by_constant(mdp_delta);

    bind (profile_continue);
  }
}

void InterpreterMacroAssembler::profile_typecheck_failed(Register Rscratch1, Register Rscratch2) {
  if (ProfileInterpreter && TypeProfileCasts) {
    Label profile_continue;

    // If no method data exists, go to profile_continue.
    test_method_data_pointer(profile_continue);

    int count_offset = in_bytes(CounterData::count_offset());
    // Back up the address, since we have already bumped the mdp.
    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());

    // *Decrement* the counter. We expect to see zero or small negatives.
    increment_mdp_data_at(count_offset, Rscratch1, Rscratch2, true);

    bind (profile_continue);
  }
}

// Count a ret in the bytecodes.
void InterpreterMacroAssembler::profile_ret(TosState state, Register return_bci, Register scratch1, Register scratch2) {
  if (ProfileInterpreter) {
    Label profile_continue;
    uint row;

    // If no method data exists, go to profile_continue.
    test_method_data_pointer(profile_continue);

    // Update the total ret count.
    increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch1, scratch2 );

    for (row = 0; row < RetData::row_limit(); row++) {
      Label next_test;

      // See if return_bci is equal to bci[n]:
      test_mdp_data_at(in_bytes(RetData::bci_offset(row)), return_bci, next_test, scratch1);

      // return_bci is equal to bci[n]. Increment the count.
      increment_mdp_data_at(in_bytes(RetData::bci_count_offset(row)), scratch1, scratch2);

      // The method data pointer needs to be updated to reflect the new target.
      update_mdp_by_offset(in_bytes(RetData::bci_displacement_offset(row)), scratch1);
      b(profile_continue);
      bind(next_test);
    }

    update_mdp_for_ret(state, return_bci);

    bind (profile_continue);
  }
}

// Count the default case of a switch construct.
void InterpreterMacroAssembler::profile_switch_default(Register scratch1,  Register scratch2) {
  if (ProfileInterpreter) {
    Label profile_continue;

    // If no method data exists, go to profile_continue.
    test_method_data_pointer(profile_continue);

    // Update the default case count
    increment_mdp_data_at(in_bytes(MultiBranchData::default_count_offset()),
                          scratch1, scratch2);

    // The method data pointer needs to be updated.
    update_mdp_by_offset(in_bytes(MultiBranchData::default_displacement_offset()),
                         scratch1);

    bind (profile_continue);
  }
}

// Count the index'th case of a switch construct.
void InterpreterMacroAssembler::profile_switch_case(Register index,
                                                    Register scratch1,
                                                    Register scratch2,
                                                    Register scratch3) {
  if (ProfileInterpreter) {
    assert_different_registers(index, scratch1, scratch2, scratch3);
    Label profile_continue;

    // If no method data exists, go to profile_continue.
    test_method_data_pointer(profile_continue);

    // Build the base (index * per_case_size_in_bytes()) + case_array_offset_in_bytes().
    li(scratch3, in_bytes(MultiBranchData::case_array_offset()));

    assert (in_bytes(MultiBranchData::per_case_size()) == 16, "so that shladd works");
    sldi(scratch1, index, exact_log2(in_bytes(MultiBranchData::per_case_size())));
    add(scratch1, scratch1, scratch3);

    // Update the case count.
    increment_mdp_data_at(scratch1, in_bytes(MultiBranchData::relative_count_offset()), scratch2, scratch3);

    // The method data pointer needs to be updated.
    update_mdp_by_offset(scratch1, in_bytes(MultiBranchData::relative_displacement_offset()), scratch2);

    bind (profile_continue);
  }
}

void InterpreterMacroAssembler::profile_null_seen(Register Rscratch1, Register Rscratch2) {
  if (ProfileInterpreter) {
    assert_different_registers(Rscratch1, Rscratch2);
    Label profile_continue;

    // If no method data exists, go to profile_continue.
    test_method_data_pointer(profile_continue);

    set_mdp_flag_at(BitData::null_seen_byte_constant(), Rscratch1);

    // The method data pointer needs to be updated.
    int mdp_delta = in_bytes(BitData::bit_data_size());
    if (TypeProfileCasts) {
      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
    }
    update_mdp_by_constant(mdp_delta);

    bind (profile_continue);
  }
}

void InterpreterMacroAssembler::record_klass_in_profile(Register Rreceiver,
                                                        Register Rscratch1, Register Rscratch2,
                                                        bool is_virtual_call) {
  assert(ProfileInterpreter, "must be profiling");
  assert_different_registers(Rreceiver, Rscratch1, Rscratch2);

  Label done;
  record_klass_in_profile_helper(Rreceiver, Rscratch1, Rscratch2, 0, done, is_virtual_call);
  bind (done);
}

void InterpreterMacroAssembler::record_klass_in_profile_helper(
                                        Register receiver, Register scratch1, Register scratch2,
                                        int start_row, Label& done, bool is_virtual_call) {
  if (TypeProfileWidth == 0) {
    if (is_virtual_call) {
      increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch1, scratch2);
    }
    return;
  }

  int last_row = VirtualCallData::row_limit() - 1;
  assert(start_row <= last_row, "must be work left to do");
  // Test this row for both the receiver and for null.
  // Take any of three different outcomes:
  //   1. found receiver => increment count and goto done
  //   2. found null => keep looking for case 1, maybe allocate this cell
  //   3. found something else => keep looking for cases 1 and 2
  // Case 3 is handled by a recursive call.
  for (int row = start_row; row <= last_row; row++) {
    Label next_test;
    bool test_for_null_also = (row == start_row);

    // See if the receiver is receiver[n].
    int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
    test_mdp_data_at(recvr_offset, receiver, next_test, scratch1);
    // delayed()->tst(scratch);

    // The receiver is receiver[n]. Increment count[n].
    int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
    increment_mdp_data_at(count_offset, scratch1, scratch2);
    b(done);
    bind(next_test);

    if (test_for_null_also) {
      Label found_null;
      // Failed the equality check on receiver[n]... Test for null.
      if (start_row == last_row) {
        // The only thing left to do is handle the null case.
        if (is_virtual_call) {
          // Scratch1 contains test_out from test_mdp_data_at.
          cmpdi(CCR0, scratch1, 0);
          beq(CCR0, found_null);
          // Receiver did not match any saved receiver and there is no empty row for it.
          // Increment total counter to indicate polymorphic case.
          increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch1, scratch2);
          b(done);
          bind(found_null);
        } else {
          cmpdi(CCR0, scratch1, 0);
          bne(CCR0, done);
        }
        break;
      }
      // Since null is rare, make it be the branch-taken case.
      cmpdi(CCR0, scratch1, 0);
      beq(CCR0, found_null);

      // Put all the "Case 3" tests here.
      record_klass_in_profile_helper(receiver, scratch1, scratch2, start_row + 1, done, is_virtual_call);

      // Found a null. Keep searching for a matching receiver,
      // but remember that this is an empty (unused) slot.
      bind(found_null);
    }
  }

  // In the fall-through case, we found no matching receiver, but we
  // observed the receiver[start_row] is NULL.

  // Fill in the receiver field and increment the count.
  int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
  set_mdp_data_at(recvr_offset, receiver);
  int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
  li(scratch1, DataLayout::counter_increment);
  set_mdp_data_at(count_offset, scratch1);
  if (start_row > 0) {
    b(done);
  }
}

// Add a InterpMonitorElem to stack (see frame_sparc.hpp).
void InterpreterMacroAssembler::add_monitor_to_stack(bool stack_is_empty, Register Rtemp1, Register Rtemp2) {

  // Very-local scratch registers.
  const Register esp  = Rtemp1;
  const Register slot = Rtemp2;

  // Extracted monitor_size.
  int monitor_size = frame::interpreter_frame_monitor_size_in_bytes();
  assert(Assembler::is_aligned((unsigned int)monitor_size,
                               (unsigned int)frame::alignment_in_bytes),
         "size of a monitor must respect alignment of SP");

  resize_frame(-monitor_size, /*temp*/esp); // Allocate space for new monitor
  std(R1_SP, _ijava_state_neg(top_frame_sp), esp); // esp contains fp

  // Shuffle expression stack down. Recall that stack_base points
  // just above the new expression stack bottom. Old_tos and new_tos
  // are used to scan thru the old and new expression stacks.
  if (!stack_is_empty) {
    Label copy_slot, copy_slot_finished;
    const Register n_slots = slot;

    addi(esp, R15_esp, Interpreter::stackElementSize); // Point to first element (pre-pushed stack).
    subf(n_slots, esp, R26_monitor);
    srdi_(n_slots, n_slots, LogBytesPerWord);          // Compute number of slots to copy.
    assert(LogBytesPerWord == 3, "conflicts assembler instructions");
    beq(CCR0, copy_slot_finished);                     // Nothing to copy.

    mtctr(n_slots);

    // loop
    bind(copy_slot);
    ld(slot, 0, esp);              // Move expression stack down.
    std(slot, -monitor_size, esp); // distance = monitor_size
    addi(esp, esp, BytesPerWord);
    bdnz(copy_slot);

    bind(copy_slot_finished);
  }

  addi(R15_esp, R15_esp, -monitor_size);
  addi(R26_monitor, R26_monitor, -monitor_size);

  // Restart interpreter
}

// ============================================================================
// Java locals access

// Load a local variable at index in Rindex into register Rdst_value.
// Also puts address of local into Rdst_address as a service.
// Kills:
//   - Rdst_value
//   - Rdst_address
void InterpreterMacroAssembler::load_local_int(Register Rdst_value, Register Rdst_address, Register Rindex) {
  sldi(Rdst_address, Rindex, Interpreter::logStackElementSize);
  subf(Rdst_address, Rdst_address, R18_locals);
  lwz(Rdst_value, 0, Rdst_address);
}

// Load a local variable at index in Rindex into register Rdst_value.
// Also puts address of local into Rdst_address as a service.
// Kills:
//   - Rdst_value
//   - Rdst_address
void InterpreterMacroAssembler::load_local_long(Register Rdst_value, Register Rdst_address, Register Rindex) {
  sldi(Rdst_address, Rindex, Interpreter::logStackElementSize);
  subf(Rdst_address, Rdst_address, R18_locals);
  ld(Rdst_value, -8, Rdst_address);
}

// Load a local variable at index in Rindex into register Rdst_value.
// Also puts address of local into Rdst_address as a service.
// Input:
//   - Rindex:      slot nr of local variable
// Kills:
//   - Rdst_value
//   - Rdst_address
void InterpreterMacroAssembler::load_local_ptr(Register Rdst_value, Register Rdst_address, Register Rindex) {
  sldi(Rdst_address, Rindex, Interpreter::logStackElementSize);
  subf(Rdst_address, Rdst_address, R18_locals);
  ld(Rdst_value, 0, Rdst_address);
}

// Load a local variable at index in Rindex into register Rdst_value.
// Also puts address of local into Rdst_address as a service.
// Kills:
//   - Rdst_value
//   - Rdst_address
void InterpreterMacroAssembler::load_local_float(FloatRegister Rdst_value, Register Rdst_address, Register Rindex) {
  sldi(Rdst_address, Rindex, Interpreter::logStackElementSize);
  subf(Rdst_address, Rdst_address, R18_locals);
  lfs(Rdst_value, 0, Rdst_address);
}

// Load a local variable at index in Rindex into register Rdst_value.
// Also puts address of local into Rdst_address as a service.
// Kills:
//   - Rdst_value
//   - Rdst_address
void InterpreterMacroAssembler::load_local_double(FloatRegister Rdst_value, Register Rdst_address, Register Rindex) {
  sldi(Rdst_address, Rindex, Interpreter::logStackElementSize);
  subf(Rdst_address, Rdst_address, R18_locals);
  lfd(Rdst_value, -8, Rdst_address);
}

// Store an int value at local variable slot Rindex.
// Kills:
//   - Rindex
void InterpreterMacroAssembler::store_local_int(Register Rvalue, Register Rindex) {
  sldi(Rindex, Rindex, Interpreter::logStackElementSize);
  subf(Rindex, Rindex, R18_locals);
  stw(Rvalue, 0, Rindex);
}

// Store a long value at local variable slot Rindex.
// Kills:
//   - Rindex
void InterpreterMacroAssembler::store_local_long(Register Rvalue, Register Rindex) {
  sldi(Rindex, Rindex, Interpreter::logStackElementSize);
  subf(Rindex, Rindex, R18_locals);
  std(Rvalue, -8, Rindex);
}

// Store an oop value at local variable slot Rindex.
// Kills:
//   - Rindex
void InterpreterMacroAssembler::store_local_ptr(Register Rvalue, Register Rindex) {
  sldi(Rindex, Rindex, Interpreter::logStackElementSize);
  subf(Rindex, Rindex, R18_locals);
  std(Rvalue, 0, Rindex);
}

// Store an int value at local variable slot Rindex.
// Kills:
//   - Rindex
void InterpreterMacroAssembler::store_local_float(FloatRegister Rvalue, Register Rindex) {
  sldi(Rindex, Rindex, Interpreter::logStackElementSize);
  subf(Rindex, Rindex, R18_locals);
  stfs(Rvalue, 0, Rindex);
}

// Store an int value at local variable slot Rindex.
// Kills:
//   - Rindex
void InterpreterMacroAssembler::store_local_double(FloatRegister Rvalue, Register Rindex) {
  sldi(Rindex, Rindex, Interpreter::logStackElementSize);
  subf(Rindex, Rindex, R18_locals);
  stfd(Rvalue, -8, Rindex);
}

// Read pending exception from thread and jump to interpreter.
// Throw exception entry if one if pending. Fall through otherwise.
void InterpreterMacroAssembler::check_and_forward_exception(Register Rscratch1, Register Rscratch2) {
  assert_different_registers(Rscratch1, Rscratch2, R3);
  Register Rexception = Rscratch1;
  Register Rtmp       = Rscratch2;
  Label Ldone;
  // Get pending exception oop.
  ld(Rexception, thread_(pending_exception));
  cmpdi(CCR0, Rexception, 0);
  beq(CCR0, Ldone);
  li(Rtmp, 0);
  mr_if_needed(R3, Rexception);
  std(Rtmp, thread_(pending_exception)); // Clear exception in thread
  if (Interpreter::rethrow_exception_entry() != NULL) {
    // Already got entry address.
    load_dispatch_table(Rtmp, (address*)Interpreter::rethrow_exception_entry());
  } else {
    // Dynamically load entry address.
    int simm16_rest = load_const_optimized(Rtmp, &Interpreter::_rethrow_exception_entry, R0, true);
    ld(Rtmp, simm16_rest, Rtmp);
  }
  mtctr(Rtmp);
  save_interpreter_state(Rtmp);
  bctr();

  align(32, 12);
  bind(Ldone);
}

void InterpreterMacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
  save_interpreter_state(R11_scratch1);

  MacroAssembler::call_VM(oop_result, entry_point, false);

  restore_interpreter_state(R11_scratch1, /*bcp_and_mdx_only*/ true);

  check_and_handle_popframe(R11_scratch1);
  check_and_handle_earlyret(R11_scratch1);
  // Now check exceptions manually.
  if (check_exceptions) {
    check_and_forward_exception(R11_scratch1, R12_scratch2);
  }
}

void InterpreterMacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
  // ARG1 is reserved for the thread.
  mr_if_needed(R4_ARG2, arg_1);
  call_VM(oop_result, entry_point, check_exceptions);
}

void InterpreterMacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
  // ARG1 is reserved for the thread.
  mr_if_needed(R4_ARG2, arg_1);
  assert(arg_2 != R4_ARG2, "smashed argument");
  mr_if_needed(R5_ARG3, arg_2);
  call_VM(oop_result, entry_point, check_exceptions);
}

void InterpreterMacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
  // ARG1 is reserved for the thread.
  mr_if_needed(R4_ARG2, arg_1);
  assert(arg_2 != R4_ARG2, "smashed argument");
  mr_if_needed(R5_ARG3, arg_2);
  assert(arg_3 != R4_ARG2 && arg_3 != R5_ARG3, "smashed argument");
  mr_if_needed(R6_ARG4, arg_3);
  call_VM(oop_result, entry_point, check_exceptions);
}

void InterpreterMacroAssembler::save_interpreter_state(Register scratch) {
  ld(scratch, 0, R1_SP);
  std(R15_esp, _ijava_state_neg(esp), scratch);
  std(R14_bcp, _ijava_state_neg(bcp), scratch);
  std(R26_monitor, _ijava_state_neg(monitors), scratch);
  if (ProfileInterpreter) { std(R28_mdx, _ijava_state_neg(mdx), scratch); }
  // Other entries should be unchanged.
}

void InterpreterMacroAssembler::restore_interpreter_state(Register scratch, bool bcp_and_mdx_only) {
  ld(scratch, 0, R1_SP);
  ld(R14_bcp, _ijava_state_neg(bcp), scratch); // Changed by VM code (exception).
  if (ProfileInterpreter) { ld(R28_mdx, _ijava_state_neg(mdx), scratch); } // Changed by VM code.
  if (!bcp_and_mdx_only) {
    // Following ones are Metadata.
    ld(R19_method, _ijava_state_neg(method), scratch);
    ld(R27_constPoolCache, _ijava_state_neg(cpoolCache), scratch);
    // Following ones are stack addresses and don't require reload.
    ld(R15_esp, _ijava_state_neg(esp), scratch);
    ld(R18_locals, _ijava_state_neg(locals), scratch);
    ld(R26_monitor, _ijava_state_neg(monitors), scratch);
  }
#ifdef ASSERT
  {
    Label Lok;
    subf(R0, R1_SP, scratch);
    cmpdi(CCR0, R0, frame::abi_reg_args_size + frame::ijava_state_size);
    bge(CCR0, Lok);
    stop("frame too small (restore istate)", 0x5432);
    bind(Lok);
  }
  {
    Label Lok;
    ld(R0, _ijava_state_neg(ijava_reserved), scratch);
    cmpdi(CCR0, R0, 0x5afe);
    beq(CCR0, Lok);
    stop("frame corrupted (restore istate)", 0x5afe);
    bind(Lok);
  }
#endif
}

#endif // !CC_INTERP

void InterpreterMacroAssembler::get_method_counters(Register method,
                                                    Register Rcounters,
                                                    Label& skip) {
  BLOCK_COMMENT("Load and ev. allocate counter object {");
  Label has_counters;
  ld(Rcounters, in_bytes(Method::method_counters_offset()), method);
  cmpdi(CCR0, Rcounters, 0);
  bne(CCR0, has_counters);
  call_VM(noreg, CAST_FROM_FN_PTR(address,
                                  InterpreterRuntime::build_method_counters), method, false);
  ld(Rcounters, in_bytes(Method::method_counters_offset()), method);
  cmpdi(CCR0, Rcounters, 0);
  beq(CCR0, skip); // No MethodCounters, OutOfMemory.
  BLOCK_COMMENT("} Load and ev. allocate counter object");

  bind(has_counters);
}

void InterpreterMacroAssembler::increment_invocation_counter(Register Rcounters, Register iv_be_count, Register Rtmp_r0) {
  assert(UseCompiler, "incrementing must be useful");
  Register invocation_count = iv_be_count;
  Register backedge_count   = Rtmp_r0;
  int delta = InvocationCounter::count_increment;

  // Load each counter in a register.
  //  ld(inv_counter, Rtmp);
  //  ld(be_counter, Rtmp2);
  int inv_counter_offset = in_bytes(MethodCounters::invocation_counter_offset() +
                                    InvocationCounter::counter_offset());
  int be_counter_offset  = in_bytes(MethodCounters::backedge_counter_offset() +
                                    InvocationCounter::counter_offset());

  BLOCK_COMMENT("Increment profiling counters {");

  // Load the backedge counter.
  lwz(backedge_count, be_counter_offset, Rcounters); // is unsigned int
  // Mask the backedge counter.
  Register tmp = invocation_count;
  li(tmp, InvocationCounter::count_mask_value);
  andr(backedge_count, tmp, backedge_count); // Cannot use andi, need sign extension of count_mask_value.

  // Load the invocation counter.
  lwz(invocation_count, inv_counter_offset, Rcounters); // is unsigned int
  // Add the delta to the invocation counter and store the result.
  addi(invocation_count, invocation_count, delta);
  // Store value.
  stw(invocation_count, inv_counter_offset, Rcounters);

  // Add invocation counter + backedge counter.
  add(iv_be_count, backedge_count, invocation_count);

  // Note that this macro must leave the backedge_count + invocation_count in
  // register iv_be_count!
  BLOCK_COMMENT("} Increment profiling counters");
}

void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) {
  if (state == atos) { MacroAssembler::verify_oop(reg); }
}

#ifndef CC_INTERP
// Local helper function for the verify_oop_or_return_address macro.
static bool verify_return_address(Method* m, int bci) {
#ifndef PRODUCT
  address pc = (address)(m->constMethod()) + in_bytes(ConstMethod::codes_offset()) + bci;
  // Assume it is a valid return address if it is inside m and is preceded by a jsr.
  if (!m->contains(pc))                                            return false;
  address jsr_pc;
  jsr_pc = pc - Bytecodes::length_for(Bytecodes::_jsr);
  if (*jsr_pc == Bytecodes::_jsr   && jsr_pc >= m->code_base())    return true;
  jsr_pc = pc - Bytecodes::length_for(Bytecodes::_jsr_w);
  if (*jsr_pc == Bytecodes::_jsr_w && jsr_pc >= m->code_base())    return true;
#endif // PRODUCT
  return false;
}

void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) {
  if (VerifyFPU) {
    unimplemented("verfiyFPU");
  }
}

void InterpreterMacroAssembler::verify_oop_or_return_address(Register reg, Register Rtmp) {
  if (!VerifyOops) return;

  // The VM documentation for the astore[_wide] bytecode allows
  // the TOS to be not only an oop but also a return address.
  Label test;
  Label skip;
  // See if it is an address (in the current method):

  const int log2_bytecode_size_limit = 16;
  srdi_(Rtmp, reg, log2_bytecode_size_limit);
  bne(CCR0, test);

  address fd = CAST_FROM_FN_PTR(address, verify_return_address);
  unsigned int nbytes_save = 10*8; // 10 volatile gprs

  save_LR_CR(Rtmp);
  push_frame_reg_args(nbytes_save, Rtmp);
  save_volatile_gprs(R1_SP, 112); // except R0

  load_const_optimized(Rtmp, fd, R0);
  mr_if_needed(R4_ARG2, reg);
  mr(R3_ARG1, R19_method);
  call_c(Rtmp); // call C

  restore_volatile_gprs(R1_SP, 112); // except R0
  pop_frame();
  restore_LR_CR(Rtmp);
  b(skip);

  // Perform a more elaborate out-of-line call.
  // Not an address; verify it:
  bind(test);
  verify_oop(reg);
  bind(skip);
}
#endif // !CC_INTERP

// Inline assembly for:
//
// if (thread is in interp_only_mode) {
//   InterpreterRuntime::post_method_entry();
// }
// if (*jvmpi::event_flags_array_at_addr(JVMPI_EVENT_METHOD_ENTRY ) ||
//     *jvmpi::event_flags_array_at_addr(JVMPI_EVENT_METHOD_ENTRY2)   ) {
//   SharedRuntime::jvmpi_method_entry(method, receiver);
// }
void InterpreterMacroAssembler::notify_method_entry() {
  // JVMTI
  // Whenever JVMTI puts a thread in interp_only_mode, method
  // entry/exit events are sent for that thread to track stack
  // depth. If it is possible to enter interp_only_mode we add
  // the code to check if the event should be sent.
  if (JvmtiExport::can_post_interpreter_events()) {
    Label jvmti_post_done;

    lwz(R0, in_bytes(JavaThread::interp_only_mode_offset()), R16_thread);
    cmpwi(CCR0, R0, 0);
    beq(CCR0, jvmti_post_done);
    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_entry),
            /*check_exceptions=*/true CC_INTERP_ONLY(&& false));

    bind(jvmti_post_done);
  }
}

// Inline assembly for:
//
// if (thread is in interp_only_mode) {
//   // save result
//   InterpreterRuntime::post_method_exit();
//   // restore result
// }
// if (*jvmpi::event_flags_array_at_addr(JVMPI_EVENT_METHOD_EXIT)) {
//   // save result
//   SharedRuntime::jvmpi_method_exit();
//   // restore result
// }
//
// Native methods have their result stored in d_tmp and l_tmp.
// Java methods have their result stored in the expression stack.
void InterpreterMacroAssembler::notify_method_exit(bool is_native_method, TosState state,
                                                   NotifyMethodExitMode mode, bool check_exceptions) {
  // JVMTI
  // Whenever JVMTI puts a thread in interp_only_mode, method
  // entry/exit events are sent for that thread to track stack
  // depth. If it is possible to enter interp_only_mode we add
  // the code to check if the event should be sent.
  if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
    Label jvmti_post_done;

    lwz(R0, in_bytes(JavaThread::interp_only_mode_offset()), R16_thread);
    cmpwi(CCR0, R0, 0);
    beq(CCR0, jvmti_post_done);
    CC_INTERP_ONLY(assert(is_native_method && !check_exceptions, "must not push state"));
    if (!is_native_method) push(state); // Expose tos to GC.
    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit),
            /*check_exceptions=*/check_exceptions);
    if (!is_native_method) pop(state);

    align(32, 12);
    bind(jvmti_post_done);
  }

  // Dtrace support not implemented.
}

#ifdef CC_INTERP
// Convert the current TOP_IJAVA_FRAME into a PARENT_IJAVA_FRAME
// (using parent_frame_resize) and push a new interpreter
// TOP_IJAVA_FRAME (using frame_size).
void InterpreterMacroAssembler::push_interpreter_frame(Register top_frame_size, Register parent_frame_resize,
                                                       Register tmp1, Register tmp2, Register tmp3,
                                                       Register tmp4, Register pc) {
  assert_different_registers(top_frame_size, parent_frame_resize, tmp1, tmp2, tmp3, tmp4);
  ld(tmp1, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
  mr(tmp2/*top_frame_sp*/, R1_SP);
  // Move initial_caller_sp.
  ld(tmp4, _top_ijava_frame_abi(initial_caller_sp), R1_SP);
  neg(parent_frame_resize, parent_frame_resize);
  resize_frame(parent_frame_resize/*-parent_frame_resize*/, tmp3);

  // Set LR in new parent frame.
  std(tmp1, _abi(lr), R1_SP);
  // Set top_frame_sp info for new parent frame.
  std(tmp2, _parent_ijava_frame_abi(top_frame_sp), R1_SP);
  std(tmp4, _parent_ijava_frame_abi(initial_caller_sp), R1_SP);

  // Push new TOP_IJAVA_FRAME.
  push_frame(top_frame_size, tmp2);

  get_PC_trash_LR(tmp3);
  std(tmp3, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
  // Used for non-initial callers by unextended_sp().
  std(R1_SP, _top_ijava_frame_abi(initial_caller_sp), R1_SP);
}

// Pop the topmost TOP_IJAVA_FRAME and convert the previous
// PARENT_IJAVA_FRAME back into a TOP_IJAVA_FRAME.
void InterpreterMacroAssembler::pop_interpreter_frame(Register tmp1, Register tmp2, Register tmp3, Register tmp4) {
  assert_different_registers(tmp1, tmp2, tmp3, tmp4);

  ld(tmp1/*caller's sp*/, _abi(callers_sp), R1_SP);
  ld(tmp3, _abi(lr), tmp1);

  ld(tmp4, _parent_ijava_frame_abi(initial_caller_sp), tmp1);

  ld(tmp2/*caller's caller's sp*/, _abi(callers_sp), tmp1);
  // Merge top frame.
  std(tmp2, _abi(callers_sp), R1_SP);

  ld(tmp2, _parent_ijava_frame_abi(top_frame_sp), tmp1);

  // Update C stack pointer to caller's top_abi.
  resize_frame_absolute(tmp2/*addr*/, tmp1/*tmp*/, tmp2/*tmp*/);

  // Update LR in top_frame.
  std(tmp3, _top_ijava_frame_abi(frame_manager_lr), R1_SP);

  std(tmp4, _top_ijava_frame_abi(initial_caller_sp), R1_SP);

  // Store the top-frame stack-pointer for c2i adapters.
  std(R1_SP, _top_ijava_frame_abi(top_frame_sp), R1_SP);
}

// Turn state's interpreter frame into the current TOP_IJAVA_FRAME.
void InterpreterMacroAssembler::pop_interpreter_frame_to_state(Register state, Register tmp1, Register tmp2, Register tmp3) {
  assert_different_registers(R14_state, R15_prev_state, tmp1, tmp2, tmp3);

  if (state == R14_state) {
    ld(tmp1/*state's fp*/, state_(_last_Java_fp));
    ld(tmp2/*state's sp*/, state_(_last_Java_sp));
  } else if (state == R15_prev_state) {
    ld(tmp1/*state's fp*/, prev_state_(_last_Java_fp));
    ld(tmp2/*state's sp*/, prev_state_(_last_Java_sp));
  } else {
    ShouldNotReachHere();
  }

  // Merge top frames.
  std(tmp1, _abi(callers_sp), R1_SP);

  // Tmp2 is new SP.
  // Tmp1 is parent's SP.
  resize_frame_absolute(tmp2/*addr*/, tmp1/*tmp*/, tmp2/*tmp*/);

  // Update LR in top_frame.
  // Must be interpreter frame.
  get_PC_trash_LR(tmp3);
  std(tmp3, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
  // Used for non-initial callers by unextended_sp().
  std(R1_SP, _top_ijava_frame_abi(initial_caller_sp), R1_SP);
}

// Set SP to initial caller's sp, but before fix the back chain.
void InterpreterMacroAssembler::resize_frame_to_initial_caller(Register tmp1, Register tmp2) {
  ld(tmp1, _parent_ijava_frame_abi(initial_caller_sp), R1_SP);
  ld(tmp2, _parent_ijava_frame_abi(callers_sp), R1_SP);
  std(tmp2, _parent_ijava_frame_abi(callers_sp), tmp1); // Fix back chain ...
  mr(R1_SP, tmp1); // ... and resize to initial caller.
}

// Pop the current interpreter state (without popping the correspoding
// frame) and restore R14_state and R15_prev_state accordingly.
// Use prev_state_may_be_0 to indicate whether prev_state may be 0
// in order to generate an extra check before retrieving prev_state_(_prev_link).
void InterpreterMacroAssembler::pop_interpreter_state(bool prev_state_may_be_0)
{
  // Move prev_state to state and restore prev_state from state_(_prev_link).
  Label prev_state_is_0;
  mr(R14_state, R15_prev_state);

  // Don't retrieve /*state==*/prev_state_(_prev_link)
  // if /*state==*/prev_state is 0.
  if (prev_state_may_be_0) {
    cmpdi(CCR0, R15_prev_state, 0);
    beq(CCR0, prev_state_is_0);
  }

  ld(R15_prev_state, /*state==*/prev_state_(_prev_link));
  bind(prev_state_is_0);
}

void InterpreterMacroAssembler::restore_prev_state() {
  // _prev_link is private, but cInterpreter is a friend.
  ld(R15_prev_state, state_(_prev_link));
}
#endif // CC_INTERP