view src/cpu/ppc/vm/ppc.ad @ 20304:a22acf6d7598

8048112: G1 Full GC needs to support the case when the very first region is not available Summary: Refactor preparation for compaction during Full GC so that it lazily initializes the first compaction point. This also avoids problems later when the first region may not be committed. Also reviewed by K. Barrett. Reviewed-by: brutisso
author tschatzl
date Mon, 21 Jul 2014 10:00:31 +0200
parents 0bf37f737702
children f6bde7889409
line wrap: on
line source

//
// Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved.
// Copyright 2012, 2014 SAP AG. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License version 2 only, as
// published by the Free Software Foundation.
//
// This code is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
// version 2 for more details (a copy is included in the LICENSE file that
// accompanied this code).
//
// You should have received a copy of the GNU General Public License version
// 2 along with this work; if not, write to the Free Software Foundation,
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
//
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
// or visit www.oracle.com if you need additional information or have any
// questions.
//
//

//
// PPC64 Architecture Description File
//

//----------REGISTER DEFINITION BLOCK------------------------------------------
// This information is used by the matcher and the register allocator to
// describe individual registers and classes of registers within the target
// architecture.
register %{
//----------Architecture Description Register Definitions----------------------
// General Registers
// "reg_def"  name (register save type, C convention save type,
//                  ideal register type, encoding);
//
// Register Save Types:
//
//   NS  = No-Save:     The register allocator assumes that these registers
//                      can be used without saving upon entry to the method, &
//                      that they do not need to be saved at call sites.
//
//   SOC = Save-On-Call: The register allocator assumes that these registers
//                      can be used without saving upon entry to the method,
//                      but that they must be saved at call sites.
//                      These are called "volatiles" on ppc.
//
//   SOE = Save-On-Entry: The register allocator assumes that these registers
//                      must be saved before using them upon entry to the
//                      method, but they do not need to be saved at call
//                      sites.
//                      These are called "nonvolatiles" on ppc.
//
//   AS  = Always-Save:   The register allocator assumes that these registers
//                      must be saved before using them upon entry to the
//                      method, & that they must be saved at call sites.
//
// Ideal Register Type is used to determine how to save & restore a
// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
// spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
//
// The encoding number is the actual bit-pattern placed into the opcodes.
//
// PPC64 register definitions, based on the 64-bit PowerPC ELF ABI
// Supplement Version 1.7 as of 2003-10-29.
//
// For each 64-bit register we must define two registers: the register
// itself, e.g. R3, and a corresponding virtual other (32-bit-)'half',
// e.g. R3_H, which is needed by the allocator, but is not used
// for stores, loads, etc.

// ----------------------------
// Integer/Long Registers
// ----------------------------

  // PPC64 has 32 64-bit integer registers.

  // types: v = volatile, nv = non-volatile, s = system
  reg_def R0   ( SOC, SOC, Op_RegI,  0, R0->as_VMReg()         );  // v   used in prologs
  reg_def R0_H ( SOC, SOC, Op_RegI, 99, R0->as_VMReg()->next() );
  reg_def R1   ( NS,  NS,  Op_RegI,  1, R1->as_VMReg()         );  // s   SP
  reg_def R1_H ( NS,  NS,  Op_RegI, 99, R1->as_VMReg()->next() );
  reg_def R2   ( SOC, SOC, Op_RegI,  2, R2->as_VMReg()         );  // v   TOC
  reg_def R2_H ( SOC, SOC, Op_RegI, 99, R2->as_VMReg()->next() );
  reg_def R3   ( SOC, SOC, Op_RegI,  3, R3->as_VMReg()         );  // v   iarg1 & iret
  reg_def R3_H ( SOC, SOC, Op_RegI, 99, R3->as_VMReg()->next() );
  reg_def R4   ( SOC, SOC, Op_RegI,  4, R4->as_VMReg()         );  //     iarg2
  reg_def R4_H ( SOC, SOC, Op_RegI, 99, R4->as_VMReg()->next() );
  reg_def R5   ( SOC, SOC, Op_RegI,  5, R5->as_VMReg()         );  // v   iarg3
  reg_def R5_H ( SOC, SOC, Op_RegI, 99, R5->as_VMReg()->next() );
  reg_def R6   ( SOC, SOC, Op_RegI,  6, R6->as_VMReg()         );  // v   iarg4
  reg_def R6_H ( SOC, SOC, Op_RegI, 99, R6->as_VMReg()->next() );
  reg_def R7   ( SOC, SOC, Op_RegI,  7, R7->as_VMReg()         );  // v   iarg5
  reg_def R7_H ( SOC, SOC, Op_RegI, 99, R7->as_VMReg()->next() );
  reg_def R8   ( SOC, SOC, Op_RegI,  8, R8->as_VMReg()         );  // v   iarg6
  reg_def R8_H ( SOC, SOC, Op_RegI, 99, R8->as_VMReg()->next() );
  reg_def R9   ( SOC, SOC, Op_RegI,  9, R9->as_VMReg()         );  // v   iarg7
  reg_def R9_H ( SOC, SOC, Op_RegI, 99, R9->as_VMReg()->next() );
  reg_def R10  ( SOC, SOC, Op_RegI, 10, R10->as_VMReg()        );  // v   iarg8
  reg_def R10_H( SOC, SOC, Op_RegI, 99, R10->as_VMReg()->next());
  reg_def R11  ( SOC, SOC, Op_RegI, 11, R11->as_VMReg()        );  // v   ENV / scratch
  reg_def R11_H( SOC, SOC, Op_RegI, 99, R11->as_VMReg()->next());
  reg_def R12  ( SOC, SOC, Op_RegI, 12, R12->as_VMReg()        );  // v   scratch
  reg_def R12_H( SOC, SOC, Op_RegI, 99, R12->as_VMReg()->next());
  reg_def R13  ( NS,  NS,  Op_RegI, 13, R13->as_VMReg()        );  // s   system thread id
  reg_def R13_H( NS,  NS,  Op_RegI, 99, R13->as_VMReg()->next());
  reg_def R14  ( SOC, SOE, Op_RegI, 14, R14->as_VMReg()        );  // nv
  reg_def R14_H( SOC, SOE, Op_RegI, 99, R14->as_VMReg()->next());
  reg_def R15  ( SOC, SOE, Op_RegI, 15, R15->as_VMReg()        );  // nv
  reg_def R15_H( SOC, SOE, Op_RegI, 99, R15->as_VMReg()->next());
  reg_def R16  ( SOC, SOE, Op_RegI, 16, R16->as_VMReg()        );  // nv
  reg_def R16_H( SOC, SOE, Op_RegI, 99, R16->as_VMReg()->next());
  reg_def R17  ( SOC, SOE, Op_RegI, 17, R17->as_VMReg()        );  // nv
  reg_def R17_H( SOC, SOE, Op_RegI, 99, R17->as_VMReg()->next());
  reg_def R18  ( SOC, SOE, Op_RegI, 18, R18->as_VMReg()        );  // nv
  reg_def R18_H( SOC, SOE, Op_RegI, 99, R18->as_VMReg()->next());
  reg_def R19  ( SOC, SOE, Op_RegI, 19, R19->as_VMReg()        );  // nv
  reg_def R19_H( SOC, SOE, Op_RegI, 99, R19->as_VMReg()->next());
  reg_def R20  ( SOC, SOE, Op_RegI, 20, R20->as_VMReg()        );  // nv
  reg_def R20_H( SOC, SOE, Op_RegI, 99, R20->as_VMReg()->next());
  reg_def R21  ( SOC, SOE, Op_RegI, 21, R21->as_VMReg()        );  // nv
  reg_def R21_H( SOC, SOE, Op_RegI, 99, R21->as_VMReg()->next());
  reg_def R22  ( SOC, SOE, Op_RegI, 22, R22->as_VMReg()        );  // nv
  reg_def R22_H( SOC, SOE, Op_RegI, 99, R22->as_VMReg()->next());
  reg_def R23  ( SOC, SOE, Op_RegI, 23, R23->as_VMReg()        );  // nv
  reg_def R23_H( SOC, SOE, Op_RegI, 99, R23->as_VMReg()->next());
  reg_def R24  ( SOC, SOE, Op_RegI, 24, R24->as_VMReg()        );  // nv
  reg_def R24_H( SOC, SOE, Op_RegI, 99, R24->as_VMReg()->next());
  reg_def R25  ( SOC, SOE, Op_RegI, 25, R25->as_VMReg()        );  // nv
  reg_def R25_H( SOC, SOE, Op_RegI, 99, R25->as_VMReg()->next());
  reg_def R26  ( SOC, SOE, Op_RegI, 26, R26->as_VMReg()        );  // nv
  reg_def R26_H( SOC, SOE, Op_RegI, 99, R26->as_VMReg()->next());
  reg_def R27  ( SOC, SOE, Op_RegI, 27, R27->as_VMReg()        );  // nv
  reg_def R27_H( SOC, SOE, Op_RegI, 99, R27->as_VMReg()->next());
  reg_def R28  ( SOC, SOE, Op_RegI, 28, R28->as_VMReg()        );  // nv
  reg_def R28_H( SOC, SOE, Op_RegI, 99, R28->as_VMReg()->next());
  reg_def R29  ( SOC, SOE, Op_RegI, 29, R29->as_VMReg()        );  // nv
  reg_def R29_H( SOC, SOE, Op_RegI, 99, R29->as_VMReg()->next());
  reg_def R30  ( SOC, SOE, Op_RegI, 30, R30->as_VMReg()        );  // nv
  reg_def R30_H( SOC, SOE, Op_RegI, 99, R30->as_VMReg()->next());
  reg_def R31  ( SOC, SOE, Op_RegI, 31, R31->as_VMReg()        );  // nv
  reg_def R31_H( SOC, SOE, Op_RegI, 99, R31->as_VMReg()->next());


// ----------------------------
// Float/Double Registers
// ----------------------------

  // Double Registers
  // The rules of ADL require that double registers be defined in pairs.
  // Each pair must be two 32-bit values, but not necessarily a pair of
  // single float registers. In each pair, ADLC-assigned register numbers
  // must be adjacent, with the lower number even. Finally, when the
  // CPU stores such a register pair to memory, the word associated with
  // the lower ADLC-assigned number must be stored to the lower address.

  // PPC64 has 32 64-bit floating-point registers. Each can store a single
  // or double precision floating-point value.

  // types: v = volatile, nv = non-volatile, s = system
  reg_def F0   ( SOC, SOC, Op_RegF,  0, F0->as_VMReg()         );  // v   scratch
  reg_def F0_H ( SOC, SOC, Op_RegF, 99, F0->as_VMReg()->next() );
  reg_def F1   ( SOC, SOC, Op_RegF,  1, F1->as_VMReg()         );  // v   farg1 & fret
  reg_def F1_H ( SOC, SOC, Op_RegF, 99, F1->as_VMReg()->next() );
  reg_def F2   ( SOC, SOC, Op_RegF,  2, F2->as_VMReg()         );  // v   farg2
  reg_def F2_H ( SOC, SOC, Op_RegF, 99, F2->as_VMReg()->next() );
  reg_def F3   ( SOC, SOC, Op_RegF,  3, F3->as_VMReg()         );  // v   farg3
  reg_def F3_H ( SOC, SOC, Op_RegF, 99, F3->as_VMReg()->next() );
  reg_def F4   ( SOC, SOC, Op_RegF,  4, F4->as_VMReg()         );  // v   farg4
  reg_def F4_H ( SOC, SOC, Op_RegF, 99, F4->as_VMReg()->next() );
  reg_def F5   ( SOC, SOC, Op_RegF,  5, F5->as_VMReg()         );  // v   farg5
  reg_def F5_H ( SOC, SOC, Op_RegF, 99, F5->as_VMReg()->next() );
  reg_def F6   ( SOC, SOC, Op_RegF,  6, F6->as_VMReg()         );  // v   farg6
  reg_def F6_H ( SOC, SOC, Op_RegF, 99, F6->as_VMReg()->next() );
  reg_def F7   ( SOC, SOC, Op_RegF,  7, F7->as_VMReg()         );  // v   farg7
  reg_def F7_H ( SOC, SOC, Op_RegF, 99, F7->as_VMReg()->next() );
  reg_def F8   ( SOC, SOC, Op_RegF,  8, F8->as_VMReg()         );  // v   farg8
  reg_def F8_H ( SOC, SOC, Op_RegF, 99, F8->as_VMReg()->next() );
  reg_def F9   ( SOC, SOC, Op_RegF,  9, F9->as_VMReg()         );  // v   farg9
  reg_def F9_H ( SOC, SOC, Op_RegF, 99, F9->as_VMReg()->next() );
  reg_def F10  ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()        );  // v   farg10
  reg_def F10_H( SOC, SOC, Op_RegF, 99, F10->as_VMReg()->next());
  reg_def F11  ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()        );  // v   farg11
  reg_def F11_H( SOC, SOC, Op_RegF, 99, F11->as_VMReg()->next());
  reg_def F12  ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()        );  // v   farg12
  reg_def F12_H( SOC, SOC, Op_RegF, 99, F12->as_VMReg()->next());
  reg_def F13  ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()        );  // v   farg13
  reg_def F13_H( SOC, SOC, Op_RegF, 99, F13->as_VMReg()->next());
  reg_def F14  ( SOC, SOE, Op_RegF, 14, F14->as_VMReg()        );  // nv
  reg_def F14_H( SOC, SOE, Op_RegF, 99, F14->as_VMReg()->next());
  reg_def F15  ( SOC, SOE, Op_RegF, 15, F15->as_VMReg()        );  // nv
  reg_def F15_H( SOC, SOE, Op_RegF, 99, F15->as_VMReg()->next());
  reg_def F16  ( SOC, SOE, Op_RegF, 16, F16->as_VMReg()        );  // nv
  reg_def F16_H( SOC, SOE, Op_RegF, 99, F16->as_VMReg()->next());
  reg_def F17  ( SOC, SOE, Op_RegF, 17, F17->as_VMReg()        );  // nv
  reg_def F17_H( SOC, SOE, Op_RegF, 99, F17->as_VMReg()->next());
  reg_def F18  ( SOC, SOE, Op_RegF, 18, F18->as_VMReg()        );  // nv
  reg_def F18_H( SOC, SOE, Op_RegF, 99, F18->as_VMReg()->next());
  reg_def F19  ( SOC, SOE, Op_RegF, 19, F19->as_VMReg()        );  // nv
  reg_def F19_H( SOC, SOE, Op_RegF, 99, F19->as_VMReg()->next());
  reg_def F20  ( SOC, SOE, Op_RegF, 20, F20->as_VMReg()        );  // nv
  reg_def F20_H( SOC, SOE, Op_RegF, 99, F20->as_VMReg()->next());
  reg_def F21  ( SOC, SOE, Op_RegF, 21, F21->as_VMReg()        );  // nv
  reg_def F21_H( SOC, SOE, Op_RegF, 99, F21->as_VMReg()->next());
  reg_def F22  ( SOC, SOE, Op_RegF, 22, F22->as_VMReg()        );  // nv
  reg_def F22_H( SOC, SOE, Op_RegF, 99, F22->as_VMReg()->next());
  reg_def F23  ( SOC, SOE, Op_RegF, 23, F23->as_VMReg()        );  // nv
  reg_def F23_H( SOC, SOE, Op_RegF, 99, F23->as_VMReg()->next());
  reg_def F24  ( SOC, SOE, Op_RegF, 24, F24->as_VMReg()        );  // nv
  reg_def F24_H( SOC, SOE, Op_RegF, 99, F24->as_VMReg()->next());
  reg_def F25  ( SOC, SOE, Op_RegF, 25, F25->as_VMReg()        );  // nv
  reg_def F25_H( SOC, SOE, Op_RegF, 99, F25->as_VMReg()->next());
  reg_def F26  ( SOC, SOE, Op_RegF, 26, F26->as_VMReg()        );  // nv
  reg_def F26_H( SOC, SOE, Op_RegF, 99, F26->as_VMReg()->next());
  reg_def F27  ( SOC, SOE, Op_RegF, 27, F27->as_VMReg()        );  // nv
  reg_def F27_H( SOC, SOE, Op_RegF, 99, F27->as_VMReg()->next());
  reg_def F28  ( SOC, SOE, Op_RegF, 28, F28->as_VMReg()        );  // nv
  reg_def F28_H( SOC, SOE, Op_RegF, 99, F28->as_VMReg()->next());
  reg_def F29  ( SOC, SOE, Op_RegF, 29, F29->as_VMReg()        );  // nv
  reg_def F29_H( SOC, SOE, Op_RegF, 99, F29->as_VMReg()->next());
  reg_def F30  ( SOC, SOE, Op_RegF, 30, F30->as_VMReg()        );  // nv
  reg_def F30_H( SOC, SOE, Op_RegF, 99, F30->as_VMReg()->next());
  reg_def F31  ( SOC, SOE, Op_RegF, 31, F31->as_VMReg()        );  // nv
  reg_def F31_H( SOC, SOE, Op_RegF, 99, F31->as_VMReg()->next());

// ----------------------------
// Special Registers
// ----------------------------

// Condition Codes Flag Registers

  // PPC64 has 8 condition code "registers" which are all contained
  // in the CR register.

  // types: v = volatile, nv = non-volatile, s = system
  reg_def CCR0(SOC, SOC, Op_RegFlags, 0, CCR0->as_VMReg());  // v
  reg_def CCR1(SOC, SOC, Op_RegFlags, 1, CCR1->as_VMReg());  // v
  reg_def CCR2(SOC, SOC, Op_RegFlags, 2, CCR2->as_VMReg());  // nv
  reg_def CCR3(SOC, SOC, Op_RegFlags, 3, CCR3->as_VMReg());  // nv
  reg_def CCR4(SOC, SOC, Op_RegFlags, 4, CCR4->as_VMReg());  // nv
  reg_def CCR5(SOC, SOC, Op_RegFlags, 5, CCR5->as_VMReg());  // v
  reg_def CCR6(SOC, SOC, Op_RegFlags, 6, CCR6->as_VMReg());  // v
  reg_def CCR7(SOC, SOC, Op_RegFlags, 7, CCR7->as_VMReg());  // v

  // Special registers of PPC64

  reg_def SR_XER(    SOC, SOC, Op_RegP, 0, SR_XER->as_VMReg());     // v
  reg_def SR_LR(     SOC, SOC, Op_RegP, 1, SR_LR->as_VMReg());      // v
  reg_def SR_CTR(    SOC, SOC, Op_RegP, 2, SR_CTR->as_VMReg());     // v
  reg_def SR_VRSAVE( SOC, SOC, Op_RegP, 3, SR_VRSAVE->as_VMReg());  // v
  reg_def SR_SPEFSCR(SOC, SOC, Op_RegP, 4, SR_SPEFSCR->as_VMReg()); // v
  reg_def SR_PPR(    SOC, SOC, Op_RegP, 5, SR_PPR->as_VMReg());     // v


// ----------------------------
// Specify priority of register selection within phases of register
// allocation. Highest priority is first. A useful heuristic is to
// give registers a low priority when they are required by machine
// instructions, like EAX and EDX on I486, and choose no-save registers
// before save-on-call, & save-on-call before save-on-entry. Registers
// which participate in fixed calling sequences should come last.
// Registers which are used as pairs must fall on an even boundary.

// It's worth about 1% on SPEC geomean to get this right.

// Chunk0, chunk1, and chunk2 form the MachRegisterNumbers enumeration
// in adGlobals_ppc64.hpp which defines the <register>_num values, e.g.
// R3_num. Therefore, R3_num may not be (and in reality is not)
// the same as R3->encoding()! Furthermore, we cannot make any
// assumptions on ordering, e.g. R3_num may be less than R2_num.
// Additionally, the function
//   static enum RC rc_class(OptoReg::Name reg )
// maps a given <register>_num value to its chunk type (except for flags)
// and its current implementation relies on chunk0 and chunk1 having a
// size of 64 each.

// If you change this allocation class, please have a look at the
// default values for the parameters RoundRobinIntegerRegIntervalStart
// and RoundRobinFloatRegIntervalStart

alloc_class chunk0 (
  // Chunk0 contains *all* 64 integer registers halves.

  // "non-volatile" registers
  R14, R14_H,
  R15, R15_H,
  R17, R17_H,
  R18, R18_H,
  R19, R19_H,
  R20, R20_H,
  R21, R21_H,
  R22, R22_H,
  R23, R23_H,
  R24, R24_H,
  R25, R25_H,
  R26, R26_H,
  R27, R27_H,
  R28, R28_H,
  R29, R29_H,
  R30, R30_H,
  R31, R31_H,

  // scratch/special registers
  R11, R11_H,
  R12, R12_H,

  // argument registers
  R10, R10_H,
  R9,  R9_H,
  R8,  R8_H,
  R7,  R7_H,
  R6,  R6_H,
  R5,  R5_H,
  R4,  R4_H,
  R3,  R3_H,

  // special registers, not available for allocation
  R16, R16_H,     // R16_thread
  R13, R13_H,     // system thread id
  R2,  R2_H,      // may be used for TOC
  R1,  R1_H,      // SP
  R0,  R0_H       // R0 (scratch)
);

// If you change this allocation class, please have a look at the
// default values for the parameters RoundRobinIntegerRegIntervalStart
// and RoundRobinFloatRegIntervalStart

alloc_class chunk1 (
  // Chunk1 contains *all* 64 floating-point registers halves.

  // scratch register
  F0,  F0_H,

  // argument registers
  F13, F13_H,
  F12, F12_H,
  F11, F11_H,
  F10, F10_H,
  F9,  F9_H,
  F8,  F8_H,
  F7,  F7_H,
  F6,  F6_H,
  F5,  F5_H,
  F4,  F4_H,
  F3,  F3_H,
  F2,  F2_H,
  F1,  F1_H,

  // non-volatile registers
  F14, F14_H,
  F15, F15_H,
  F16, F16_H,
  F17, F17_H,
  F18, F18_H,
  F19, F19_H,
  F20, F20_H,
  F21, F21_H,
  F22, F22_H,
  F23, F23_H,
  F24, F24_H,
  F25, F25_H,
  F26, F26_H,
  F27, F27_H,
  F28, F28_H,
  F29, F29_H,
  F30, F30_H,
  F31, F31_H
);

alloc_class chunk2 (
  // Chunk2 contains *all* 8 condition code registers.

  CCR0,
  CCR1,
  CCR2,
  CCR3,
  CCR4,
  CCR5,
  CCR6,
  CCR7
);

alloc_class chunk3 (
  // special registers
  // These registers are not allocated, but used for nodes generated by postalloc expand.
  SR_XER,
  SR_LR,
  SR_CTR,
  SR_VRSAVE,
  SR_SPEFSCR,
  SR_PPR
);

//-------Architecture Description Register Classes-----------------------

// Several register classes are automatically defined based upon
// information in this architecture description.

// 1) reg_class inline_cache_reg           ( as defined in frame section )
// 2) reg_class compiler_method_oop_reg    ( as defined in frame section )
// 2) reg_class interpreter_method_oop_reg ( as defined in frame section )
// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
//

// ----------------------------
// 32 Bit Register Classes
// ----------------------------

// We specify registers twice, once as read/write, and once read-only.
// We use the read-only registers for source operands. With this, we
// can include preset read only registers in this class, as a hard-coded
// '0'-register. (We used to simulate this on ppc.)

// 32 bit registers that can be read and written i.e. these registers
// can be dest (or src) of normal instructions.
reg_class bits32_reg_rw(
/*R0*/              // R0
/*R1*/              // SP
  R2,               // TOC
  R3,
  R4,
  R5,
  R6,
  R7,
  R8,
  R9,
  R10,
  R11,
  R12,
/*R13*/             // system thread id
  R14,
  R15,
/*R16*/             // R16_thread
  R17,
  R18,
  R19,
  R20,
  R21,
  R22,
  R23,
  R24,
  R25,
  R26,
  R27,
  R28,
/*R29*/             // global TOC
/*R30*/             // Narrow Oop Base
  R31
);

// 32 bit registers that can only be read i.e. these registers can
// only be src of all instructions.
reg_class bits32_reg_ro(
/*R0*/              // R0
/*R1*/              // SP
  R2                // TOC
  R3,
  R4,
  R5,
  R6,
  R7,
  R8,
  R9,
  R10,
  R11,
  R12,
/*R13*/             // system thread id
  R14,
  R15,
/*R16*/             // R16_thread
  R17,
  R18,
  R19,
  R20,
  R21,
  R22,
  R23,
  R24,
  R25,
  R26,
  R27,
  R28,
/*R29*/
/*R30*/             // Narrow Oop Base
  R31
);

// Complement-required-in-pipeline operands for narrow oops.
reg_class bits32_reg_ro_not_complement (
/*R0*/     // R0
  R1,      // SP
  R2,      // TOC
  R3,
  R4,
  R5,
  R6,
  R7,
  R8,
  R9,
  R10,
  R11,
  R12,
/*R13,*/   // system thread id
  R14,
  R15,
  R16,    // R16_thread
  R17,
  R18,
  R19,
  R20,
  R21,
  R22,
/*R23,
  R24,
  R25,
  R26,
  R27,
  R28,*/
/*R29,*/ // TODO: let allocator handle TOC!!
/*R30,*/
  R31
);

// Complement-required-in-pipeline operands for narrow oops.
// See 64-bit declaration.
reg_class bits32_reg_ro_complement (
  R23,
  R24,
  R25,
  R26,
  R27,
  R28
);

reg_class rscratch1_bits32_reg(R11);
reg_class rscratch2_bits32_reg(R12);
reg_class rarg1_bits32_reg(R3);
reg_class rarg2_bits32_reg(R4);
reg_class rarg3_bits32_reg(R5);
reg_class rarg4_bits32_reg(R6);

// ----------------------------
// 64 Bit Register Classes
// ----------------------------
// 64-bit build means 64-bit pointers means hi/lo pairs

reg_class rscratch1_bits64_reg(R11_H, R11);
reg_class rscratch2_bits64_reg(R12_H, R12);
reg_class rarg1_bits64_reg(R3_H, R3);
reg_class rarg2_bits64_reg(R4_H, R4);
reg_class rarg3_bits64_reg(R5_H, R5);
reg_class rarg4_bits64_reg(R6_H, R6);
// Thread register, 'written' by tlsLoadP, see there.
reg_class thread_bits64_reg(R16_H, R16);

reg_class r19_bits64_reg(R19_H, R19);

// 64 bit registers that can be read and written i.e. these registers
// can be dest (or src) of normal instructions.
reg_class bits64_reg_rw(
/*R0_H,  R0*/     // R0
/*R1_H,  R1*/     // SP
  R2_H,  R2,      // TOC
  R3_H,  R3,
  R4_H,  R4,
  R5_H,  R5,
  R6_H,  R6,
  R7_H,  R7,
  R8_H,  R8,
  R9_H,  R9,
  R10_H, R10,
  R11_H, R11,
  R12_H, R12,
/*R13_H, R13*/   // system thread id
  R14_H, R14,
  R15_H, R15,
/*R16_H, R16*/   // R16_thread
  R17_H, R17,
  R18_H, R18,
  R19_H, R19,
  R20_H, R20,
  R21_H, R21,
  R22_H, R22,
  R23_H, R23,
  R24_H, R24,
  R25_H, R25,
  R26_H, R26,
  R27_H, R27,
  R28_H, R28,
/*R29_H, R29*/
/*R30_H, R30*/
  R31_H, R31
);

// 64 bit registers used excluding r2, r11 and r12
// Used to hold the TOC to avoid collisions with expanded LeafCall which uses
// r2, r11 and r12 internally.
reg_class bits64_reg_leaf_call(
/*R0_H,  R0*/     // R0
/*R1_H,  R1*/     // SP
/*R2_H,  R2*/     // TOC
  R3_H,  R3,
  R4_H,  R4,
  R5_H,  R5,
  R6_H,  R6,
  R7_H,  R7,
  R8_H,  R8,
  R9_H,  R9,
  R10_H, R10,
/*R11_H, R11*/
/*R12_H, R12*/
/*R13_H, R13*/   // system thread id
  R14_H, R14,
  R15_H, R15,
/*R16_H, R16*/   // R16_thread
  R17_H, R17,
  R18_H, R18,
  R19_H, R19,
  R20_H, R20,
  R21_H, R21,
  R22_H, R22,
  R23_H, R23,
  R24_H, R24,
  R25_H, R25,
  R26_H, R26,
  R27_H, R27,
  R28_H, R28,
/*R29_H, R29*/
/*R30_H, R30*/
  R31_H, R31
);

// Used to hold the TOC to avoid collisions with expanded DynamicCall
// which uses r19 as inline cache internally and expanded LeafCall which uses
// r2, r11 and r12 internally.
reg_class bits64_constant_table_base(
/*R0_H,  R0*/     // R0
/*R1_H,  R1*/     // SP
/*R2_H,  R2*/     // TOC
  R3_H,  R3,
  R4_H,  R4,
  R5_H,  R5,
  R6_H,  R6,
  R7_H,  R7,
  R8_H,  R8,
  R9_H,  R9,
  R10_H, R10,
/*R11_H, R11*/
/*R12_H, R12*/
/*R13_H, R13*/   // system thread id
  R14_H, R14,
  R15_H, R15,
/*R16_H, R16*/   // R16_thread
  R17_H, R17,
  R18_H, R18,
/*R19_H, R19*/
  R20_H, R20,
  R21_H, R21,
  R22_H, R22,
  R23_H, R23,
  R24_H, R24,
  R25_H, R25,
  R26_H, R26,
  R27_H, R27,
  R28_H, R28,
/*R29_H, R29*/
/*R30_H, R30*/
  R31_H, R31
);

// 64 bit registers that can only be read i.e. these registers can
// only be src of all instructions.
reg_class bits64_reg_ro(
/*R0_H,  R0*/     // R0
  R1_H,  R1,
  R2_H,  R2,       // TOC
  R3_H,  R3,
  R4_H,  R4,
  R5_H,  R5,
  R6_H,  R6,
  R7_H,  R7,
  R8_H,  R8,
  R9_H,  R9,
  R10_H, R10,
  R11_H, R11,
  R12_H, R12,
/*R13_H, R13*/   // system thread id
  R14_H, R14,
  R15_H, R15,
  R16_H, R16,    // R16_thread
  R17_H, R17,
  R18_H, R18,
  R19_H, R19,
  R20_H, R20,
  R21_H, R21,
  R22_H, R22,
  R23_H, R23,
  R24_H, R24,
  R25_H, R25,
  R26_H, R26,
  R27_H, R27,
  R28_H, R28,
/*R29_H, R29*/ // TODO: let allocator handle TOC!!
/*R30_H, R30,*/
  R31_H, R31
);

// Complement-required-in-pipeline operands.
reg_class bits64_reg_ro_not_complement (
/*R0_H,  R0*/     // R0
  R1_H,  R1,      // SP
  R2_H,  R2,      // TOC
  R3_H,  R3,
  R4_H,  R4,
  R5_H,  R5,
  R6_H,  R6,
  R7_H,  R7,
  R8_H,  R8,
  R9_H,  R9,
  R10_H, R10,
  R11_H, R11,
  R12_H, R12,
/*R13_H, R13*/   // system thread id
  R14_H, R14,
  R15_H, R15,
  R16_H, R16,    // R16_thread
  R17_H, R17,
  R18_H, R18,
  R19_H, R19,
  R20_H, R20,
  R21_H, R21,
  R22_H, R22,
/*R23_H, R23,
  R24_H, R24,
  R25_H, R25,
  R26_H, R26,
  R27_H, R27,
  R28_H, R28,*/
/*R29_H, R29*/ // TODO: let allocator handle TOC!!
/*R30_H, R30,*/
  R31_H, R31
);

// Complement-required-in-pipeline operands.
// This register mask is used for the trap instructions that implement
// the null checks on AIX. The trap instruction first computes the
// complement of the value it shall trap on. Because of this, the
// instruction can not be scheduled in the same cycle as an other
// instruction reading the normal value of the same register. So we
// force the value to check into 'bits64_reg_ro_not_complement'
// and then copy it to 'bits64_reg_ro_complement' for the trap.
reg_class bits64_reg_ro_complement (
  R23_H, R23,
  R24_H, R24,
  R25_H, R25,
  R26_H, R26,
  R27_H, R27,
  R28_H, R28
);


// ----------------------------
// Special Class for Condition Code Flags Register

reg_class int_flags(
/*CCR0*/             // scratch
/*CCR1*/             // scratch
/*CCR2*/             // nv!
/*CCR3*/             // nv!
/*CCR4*/             // nv!
  CCR5,
  CCR6,
  CCR7
);

reg_class int_flags_CR0(CCR0);
reg_class int_flags_CR1(CCR1);
reg_class int_flags_CR6(CCR6);
reg_class ctr_reg(SR_CTR);

// ----------------------------
// Float Register Classes
// ----------------------------

reg_class flt_reg(
/*F0*/              // scratch
  F1,
  F2,
  F3,
  F4,
  F5,
  F6,
  F7,
  F8,
  F9,
  F10,
  F11,
  F12,
  F13,
  F14,              // nv!
  F15,              // nv!
  F16,              // nv!
  F17,              // nv!
  F18,              // nv!
  F19,              // nv!
  F20,              // nv!
  F21,              // nv!
  F22,              // nv!
  F23,              // nv!
  F24,              // nv!
  F25,              // nv!
  F26,              // nv!
  F27,              // nv!
  F28,              // nv!
  F29,              // nv!
  F30,              // nv!
  F31               // nv!
);

// Double precision float registers have virtual `high halves' that
// are needed by the allocator.
reg_class dbl_reg(
/*F0,  F0_H*/     // scratch
  F1,  F1_H,
  F2,  F2_H,
  F3,  F3_H,
  F4,  F4_H,
  F5,  F5_H,
  F6,  F6_H,
  F7,  F7_H,
  F8,  F8_H,
  F9,  F9_H,
  F10, F10_H,
  F11, F11_H,
  F12, F12_H,
  F13, F13_H,
  F14, F14_H,    // nv!
  F15, F15_H,    // nv!
  F16, F16_H,    // nv!
  F17, F17_H,    // nv!
  F18, F18_H,    // nv!
  F19, F19_H,    // nv!
  F20, F20_H,    // nv!
  F21, F21_H,    // nv!
  F22, F22_H,    // nv!
  F23, F23_H,    // nv!
  F24, F24_H,    // nv!
  F25, F25_H,    // nv!
  F26, F26_H,    // nv!
  F27, F27_H,    // nv!
  F28, F28_H,    // nv!
  F29, F29_H,    // nv!
  F30, F30_H,    // nv!
  F31, F31_H     // nv!
);

 %}

//----------DEFINITION BLOCK---------------------------------------------------
// Define name --> value mappings to inform the ADLC of an integer valued name
// Current support includes integer values in the range [0, 0x7FFFFFFF]
// Format:
//        int_def  <name>         ( <int_value>, <expression>);
// Generated Code in ad_<arch>.hpp
//        #define  <name>   (<expression>)
//        // value == <int_value>
// Generated code in ad_<arch>.cpp adlc_verification()
//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
//
definitions %{
  // The default cost (of an ALU instruction).
  int_def DEFAULT_COST_LOW        (     30,      30);
  int_def DEFAULT_COST            (    100,     100);
  int_def HUGE_COST               (1000000, 1000000);

  // Memory refs
  int_def MEMORY_REF_COST_LOW     (    200, DEFAULT_COST * 2);
  int_def MEMORY_REF_COST         (    300, DEFAULT_COST * 3);

  // Branches are even more expensive.
  int_def BRANCH_COST             (    900, DEFAULT_COST * 9);
  int_def CALL_COST               (   1300, DEFAULT_COST * 13);
%}


//----------SOURCE BLOCK-------------------------------------------------------
// This is a block of C++ code which provides values, functions, and
// definitions necessary in the rest of the architecture description.
source_hpp %{
  // Header information of the source block.
  // Method declarations/definitions which are used outside
  // the ad-scope can conveniently be defined here.
  //
  // To keep related declarations/definitions/uses close together,
  // we switch between source %{ }% and source_hpp %{ }% freely as needed.

  // Returns true if Node n is followed by a MemBar node that
  // will do an acquire. If so, this node must not do the acquire
  // operation.
  bool followed_by_acquire(const Node *n);
%}

source %{

// Optimize load-acquire.
//
// Check if acquire is unnecessary due to following operation that does
// acquire anyways.
// Walk the pattern:
//
//      n: Load.acq
//           |
//      MemBarAcquire
//       |         |
//  Proj(ctrl)  Proj(mem)
//       |         |
//   MemBarRelease/Volatile
//
bool followed_by_acquire(const Node *load) {
  assert(load->is_Load(), "So far implemented only for loads.");

  // Find MemBarAcquire.
  const Node *mba = NULL;
  for (DUIterator_Fast imax, i = load->fast_outs(imax); i < imax; i++) {
    const Node *out = load->fast_out(i);
    if (out->Opcode() == Op_MemBarAcquire) {
      if (out->in(0) == load) continue; // Skip control edge, membar should be found via precedence edge.
      mba = out;
      break;
    }
  }
  if (!mba) return false;

  // Find following MemBar node.
  //
  // The following node must be reachable by control AND memory
  // edge to assure no other operations are in between the two nodes.
  //
  // So first get the Proj node, mem_proj, to use it to iterate forward.
  Node *mem_proj = NULL;
  for (DUIterator_Fast imax, i = mba->fast_outs(imax); i < imax; i++) {
    mem_proj = mba->fast_out(i);      // Throw out-of-bounds if proj not found
    assert(mem_proj->is_Proj(), "only projections here");
    ProjNode *proj = mem_proj->as_Proj();
    if (proj->_con == TypeFunc::Memory &&
        !Compile::current()->node_arena()->contains(mem_proj)) // Unmatched old-space only
      break;
  }
  assert(mem_proj->as_Proj()->_con == TypeFunc::Memory, "Graph broken");

  // Search MemBar behind Proj. If there are other memory operations
  // behind the Proj we lost.
  for (DUIterator_Fast jmax, j = mem_proj->fast_outs(jmax); j < jmax; j++) {
    Node *x = mem_proj->fast_out(j);
    // Proj might have an edge to a store or load node which precedes the membar.
    if (x->is_Mem()) return false;

    // On PPC64 release and volatile are implemented by an instruction
    // that also has acquire semantics. I.e. there is no need for an
    // acquire before these.
    int xop = x->Opcode();
    if (xop == Op_MemBarRelease || xop == Op_MemBarVolatile) {
      // Make sure we're not missing Call/Phi/MergeMem by checking
      // control edges. The control edge must directly lead back
      // to the MemBarAcquire
      Node *ctrl_proj = x->in(0);
      if (ctrl_proj->is_Proj() && ctrl_proj->in(0) == mba) {
        return true;
      }
    }
  }

  return false;
}

#define __ _masm.

// Tertiary op of a LoadP or StoreP encoding.
#define REGP_OP true

// ****************************************************************************

// REQUIRED FUNCTIONALITY

// !!!!! Special hack to get all type of calls to specify the byte offset
//       from the start of the call to the point where the return address
//       will point.

// PPC port: Removed use of lazy constant construct.

int MachCallStaticJavaNode::ret_addr_offset() {
  // It's only a single branch-and-link instruction.
  return 4;
}

int MachCallDynamicJavaNode::ret_addr_offset() {
  // Offset is 4 with postalloc expanded calls (bl is one instruction). We use
  // postalloc expanded calls if we use inline caches and do not update method data.
  if (UseInlineCaches)
    return 4;

  int vtable_index = this->_vtable_index;
  if (vtable_index < 0) {
    // Must be invalid_vtable_index, not nonvirtual_vtable_index.
    assert(vtable_index == Method::invalid_vtable_index, "correct sentinel value");
    return 12;
  } else {
    assert(!UseInlineCaches, "expect vtable calls only if not using ICs");
    return 24;
  }
}

int MachCallRuntimeNode::ret_addr_offset() {
#if defined(ABI_ELFv2)
  return 28;
#else
  return 40;
#endif
}

//=============================================================================

// condition code conversions

static int cc_to_boint(int cc) {
  return Assembler::bcondCRbiIs0 | (cc & 8);
}

static int cc_to_inverse_boint(int cc) {
  return Assembler::bcondCRbiIs0 | (8-(cc & 8));
}

static int cc_to_biint(int cc, int flags_reg) {
  return (flags_reg << 2) | (cc & 3);
}

//=============================================================================

// Compute padding required for nodes which need alignment. The padding
// is the number of bytes (not instructions) which will be inserted before
// the instruction. The padding must match the size of a NOP instruction.

int string_indexOf_imm1_charNode::compute_padding(int current_offset) const {
  return (3*4-current_offset)&31;
}

int string_indexOf_imm1Node::compute_padding(int current_offset) const {
  return (2*4-current_offset)&31;
}

int string_indexOf_immNode::compute_padding(int current_offset) const {
  return (3*4-current_offset)&31;
}

int string_indexOfNode::compute_padding(int current_offset) const {
  return (1*4-current_offset)&31;
}

int string_compareNode::compute_padding(int current_offset) const {
  return (4*4-current_offset)&31;
}

int string_equals_immNode::compute_padding(int current_offset) const {
  if (opnd_array(3)->constant() < 16) return 0; // Don't insert nops for short version (loop completely unrolled).
  return (2*4-current_offset)&31;
}

int string_equalsNode::compute_padding(int current_offset) const {
  return (7*4-current_offset)&31;
}

int inlineCallClearArrayNode::compute_padding(int current_offset) const {
  return (2*4-current_offset)&31;
}

//=============================================================================

// Indicate if the safepoint node needs the polling page as an input.
bool SafePointNode::needs_polling_address_input() {
  // The address is loaded from thread by a seperate node.
  return true;
}

//=============================================================================

// Emit an interrupt that is caught by the debugger (for debugging compiler).
void emit_break(CodeBuffer &cbuf) {
  MacroAssembler _masm(&cbuf);
  __ illtrap();
}

#ifndef PRODUCT
void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
  st->print("BREAKPOINT");
}
#endif

void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  emit_break(cbuf);
}

uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
  return MachNode::size(ra_);
}

//=============================================================================

void emit_nop(CodeBuffer &cbuf) {
  MacroAssembler _masm(&cbuf);
  __ nop();
}

static inline void emit_long(CodeBuffer &cbuf, int value) {
  *((int*)(cbuf.insts_end())) = value;
  cbuf.set_insts_end(cbuf.insts_end() + BytesPerInstWord);
}

//=============================================================================

%} // interrupt source

source_hpp %{ // Header information of the source block.

//--------------------------------------------------------------
//---<  Used for optimization in Compile::Shorten_branches  >---
//--------------------------------------------------------------

const uint trampoline_stub_size     =  6 * BytesPerInstWord;

class CallStubImpl {

 public:

  // Emit call stub, compiled java to interpreter.
  static void emit_trampoline_stub(MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset);

  // Size of call trampoline stub.
  // This doesn't need to be accurate to the byte, but it
  // must be larger than or equal to the real size of the stub.
  static uint size_call_trampoline() {
    return trampoline_stub_size;
  }

  // number of relocations needed by a call trampoline stub
  static uint reloc_call_trampoline() {
    return 5;
  }

};

%} // end source_hpp

source %{

// Emit a trampoline stub for a call to a target which is too far away.
//
// code sequences:
//
// call-site:
//   branch-and-link to <destination> or <trampoline stub>
//
// Related trampoline stub for this call-site in the stub section:
//   load the call target from the constant pool
//   branch via CTR (LR/link still points to the call-site above)

void CallStubImpl::emit_trampoline_stub(MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset) {
  // Start the stub.
  address stub = __ start_a_stub(Compile::MAX_stubs_size/2);
  if (stub == NULL) {
    Compile::current()->env()->record_out_of_memory_failure();
    return;
  }

  // For java_to_interp stubs we use R11_scratch1 as scratch register
  // and in call trampoline stubs we use R12_scratch2. This way we
  // can distinguish them (see is_NativeCallTrampolineStub_at()).
  Register reg_scratch = R12_scratch2;

  // Create a trampoline stub relocation which relates this trampoline stub
  // with the call instruction at insts_call_instruction_offset in the
  // instructions code-section.
  __ relocate(trampoline_stub_Relocation::spec(__ code()->insts()->start() + insts_call_instruction_offset));
  const int stub_start_offset = __ offset();

  // Now, create the trampoline stub's code:
  // - load the TOC
  // - load the call target from the constant pool
  // - call
  __ calculate_address_from_global_toc(reg_scratch, __ method_toc());
  __ ld_largeoffset_unchecked(reg_scratch, destination_toc_offset, reg_scratch, false);
  __ mtctr(reg_scratch);
  __ bctr();

  const address stub_start_addr = __ addr_at(stub_start_offset);

  // FIXME: Assert that the trampoline stub can be identified and patched.

  // Assert that the encoded destination_toc_offset can be identified and that it is correct.
  assert(destination_toc_offset == NativeCallTrampolineStub_at(stub_start_addr)->destination_toc_offset(),
         "encoded offset into the constant pool must match");
  // Trampoline_stub_size should be good.
  assert((uint)(__ offset() - stub_start_offset) <= trampoline_stub_size, "should be good size");
  assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");

  // End the stub.
  __ end_a_stub();
}

//=============================================================================

// Emit an inline branch-and-link call and a related trampoline stub.
//
// code sequences:
//
// call-site:
//   branch-and-link to <destination> or <trampoline stub>
//
// Related trampoline stub for this call-site in the stub section:
//   load the call target from the constant pool
//   branch via CTR (LR/link still points to the call-site above)
//

typedef struct {
  int insts_call_instruction_offset;
  int ret_addr_offset;
} EmitCallOffsets;

// Emit a branch-and-link instruction that branches to a trampoline.
// - Remember the offset of the branch-and-link instruction.
// - Add a relocation at the branch-and-link instruction.
// - Emit a branch-and-link.
// - Remember the return pc offset.
EmitCallOffsets emit_call_with_trampoline_stub(MacroAssembler &_masm, address entry_point, relocInfo::relocType rtype) {
  EmitCallOffsets offsets = { -1, -1 };
  const int start_offset = __ offset();
  offsets.insts_call_instruction_offset = __ offset();

  // No entry point given, use the current pc.
  if (entry_point == NULL) entry_point = __ pc();

  if (!Compile::current()->in_scratch_emit_size()) {
    // Put the entry point as a constant into the constant pool.
    const address entry_point_toc_addr   = __ address_constant(entry_point, RelocationHolder::none);
    const int     entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);

    // Emit the trampoline stub which will be related to the branch-and-link below.
    CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, offsets.insts_call_instruction_offset);
    __ relocate(rtype);
  }

  // Note: At this point we do not have the address of the trampoline
  // stub, and the entry point might be too far away for bl, so __ pc()
  // serves as dummy and the bl will be patched later.
  __ bl((address) __ pc());

  offsets.ret_addr_offset = __ offset() - start_offset;

  return offsets;
}

//=============================================================================

// Factory for creating loadConL* nodes for large/small constant pool.

static inline jlong replicate_immF(float con) {
  // Replicate float con 2 times and pack into vector.
  int val = *((int*)&con);
  jlong lval = val;
  lval = (lval << 32) | (lval & 0xFFFFFFFFl);
  return lval;
}

//=============================================================================

const RegMask& MachConstantBaseNode::_out_RegMask = BITS64_CONSTANT_TABLE_BASE_mask();
int Compile::ConstantTable::calculate_table_base_offset() const {
  return 0;  // absolute addressing, no offset
}

bool MachConstantBaseNode::requires_postalloc_expand() const { return true; }
void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  Compile *C = ra_->C;

  iRegPdstOper *op_dst = new (C) iRegPdstOper();
  MachNode *m1 = new (C) loadToc_hiNode();
  MachNode *m2 = new (C) loadToc_loNode();

  m1->add_req(NULL);
  m2->add_req(NULL, m1);
  m1->_opnds[0] = op_dst;
  m2->_opnds[0] = op_dst;
  m2->_opnds[1] = op_dst;
  ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
  ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
  nodes->push(m1);
  nodes->push(m2);
}

void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  // Is postalloc expanded.
  ShouldNotReachHere();
}

uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  return 0;
}

#ifndef PRODUCT
void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  st->print("-- \t// MachConstantBaseNode (empty encoding)");
}
#endif

//=============================================================================

#ifndef PRODUCT
void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
  Compile* C = ra_->C;
  const long framesize = C->frame_slots() << LogBytesPerInt;

  st->print("PROLOG\n\t");
  if (C->need_stack_bang(framesize)) {
    st->print("stack_overflow_check\n\t");
  }

  if (!false /* TODO: PPC port C->is_frameless_method()*/) {
    st->print("save return pc\n\t");
    st->print("push frame %d\n\t", -framesize);
  }
}
#endif

// Macro used instead of the common __ to emulate the pipes of PPC.
// Instead of e.g. __ ld(...) one hase to write ___(ld) ld(...) This enables the
// micro scheduler to cope with "hand written" assembler like in the prolog. Though
// still no scheduling of this code is possible, the micro scheduler is aware of the
// code and can update its internal data. The following mechanism is used to achieve this:
// The micro scheduler calls size() of each compound node during scheduling. size() does a
// dummy emit and only during this dummy emit C->hb_scheduling() is not NULL.
#if 0 // TODO: PPC port
#define ___(op) if (UsePower6SchedulerPPC64 && C->hb_scheduling())                    \
                  C->hb_scheduling()->_pdScheduling->PdEmulatePipe(ppc64Opcode_##op); \
                _masm.
#define ___stop if (UsePower6SchedulerPPC64 && C->hb_scheduling())                    \
                  C->hb_scheduling()->_pdScheduling->PdEmulatePipe(archOpcode_none)
#define ___advance if (UsePower6SchedulerPPC64 && C->hb_scheduling())                 \
                  C->hb_scheduling()->_pdScheduling->advance_offset
#else
#define ___(op) if (UsePower6SchedulerPPC64)                                          \
                  Unimplemented();                                                    \
                _masm.
#define ___stop if (UsePower6SchedulerPPC64)                                          \
                  Unimplemented()
#define ___advance if (UsePower6SchedulerPPC64)                                       \
                  Unimplemented()
#endif

void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  Compile* C = ra_->C;
  MacroAssembler _masm(&cbuf);

  const long framesize = C->frame_size_in_bytes();
  assert(framesize % (2 * wordSize) == 0, "must preserve 2*wordSize alignment");

  const bool method_is_frameless      = false /* TODO: PPC port C->is_frameless_method()*/;

  const Register return_pc            = R20; // Must match return_addr() in frame section.
  const Register callers_sp           = R21;
  const Register push_frame_temp      = R22;
  const Register toc_temp             = R23;
  assert_different_registers(R11, return_pc, callers_sp, push_frame_temp, toc_temp);

  if (method_is_frameless) {
    // Add nop at beginning of all frameless methods to prevent any
    // oop instructions from getting overwritten by make_not_entrant
    // (patching attempt would fail).
    ___(nop) nop();
  } else {
    // Get return pc.
    ___(mflr) mflr(return_pc);
  }

  // Calls to C2R adapters often do not accept exceptional returns.
  // We require that their callers must bang for them. But be
  // careful, because some VM calls (such as call site linkage) can
  // use several kilobytes of stack. But the stack safety zone should
  // account for that. See bugs 4446381, 4468289, 4497237.

  int bangsize = C->bang_size_in_bytes();
  assert(bangsize >= framesize || bangsize <= 0, "stack bang size incorrect");
  if (C->need_stack_bang(bangsize) && UseStackBanging) {
    // Unfortunately we cannot use the function provided in
    // assembler.cpp as we have to emulate the pipes. So I had to
    // insert the code of generate_stack_overflow_check(), see
    // assembler.cpp for some illuminative comments.
    const int page_size = os::vm_page_size();
    int bang_end = StackShadowPages * page_size;

    // This is how far the previous frame's stack banging extended.
    const int bang_end_safe = bang_end;

    if (bangsize > page_size) {
      bang_end += bangsize;
    }

    int bang_offset = bang_end_safe;

    while (bang_offset <= bang_end) {
      // Need at least one stack bang at end of shadow zone.

      // Again I had to copy code, this time from assembler_ppc64.cpp,
      // bang_stack_with_offset - see there for comments.

      // Stack grows down, caller passes positive offset.
      assert(bang_offset > 0, "must bang with positive offset");

      long stdoffset = -bang_offset;

      if (Assembler::is_simm(stdoffset, 16)) {
        // Signed 16 bit offset, a simple std is ok.
        if (UseLoadInstructionsForStackBangingPPC64) {
          ___(ld) ld(R0,  (int)(signed short)stdoffset, R1_SP);
        } else {
          ___(std) std(R0, (int)(signed short)stdoffset, R1_SP);
        }
      } else if (Assembler::is_simm(stdoffset, 31)) {
        // Use largeoffset calculations for addis & ld/std.
        const int hi = MacroAssembler::largeoffset_si16_si16_hi(stdoffset);
        const int lo = MacroAssembler::largeoffset_si16_si16_lo(stdoffset);

        Register tmp = R11;
        ___(addis) addis(tmp, R1_SP, hi);
        if (UseLoadInstructionsForStackBangingPPC64) {
          ___(ld) ld(R0, lo, tmp);
        } else {
          ___(std) std(R0, lo, tmp);
        }
      } else {
        ShouldNotReachHere();
      }

      bang_offset += page_size;
    }
    // R11 trashed
  } // C->need_stack_bang(framesize) && UseStackBanging

  unsigned int bytes = (unsigned int)framesize;
  long offset = Assembler::align_addr(bytes, frame::alignment_in_bytes);
  ciMethod *currMethod = C->method();

  // Optimized version for most common case.
  if (UsePower6SchedulerPPC64 &&
      !method_is_frameless && Assembler::is_simm((int)(-offset), 16) &&
      !(false /* ConstantsALot TODO: PPC port*/)) {
    ___(or) mr(callers_sp, R1_SP);
    ___(std) std(return_pc, _abi(lr), R1_SP);
    ___(stdu) stdu(R1_SP, -offset, R1_SP);
    return;
  }

  if (!method_is_frameless) {
    // Get callers sp.
    ___(or) mr(callers_sp, R1_SP);

    // Push method's frame, modifies SP.
    assert(Assembler::is_uimm(framesize, 32U), "wrong type");
    // The ABI is already accounted for in 'framesize' via the
    // 'out_preserve' area.
    Register tmp = push_frame_temp;
    // Had to insert code of push_frame((unsigned int)framesize, push_frame_temp).
    if (Assembler::is_simm(-offset, 16)) {
      ___(stdu) stdu(R1_SP, -offset, R1_SP);
    } else {
      long x = -offset;
      // Had to insert load_const(tmp, -offset).
      ___(addis)  lis( tmp, (int)((signed short)(((x >> 32) & 0xffff0000) >> 16)));
      ___(ori)    ori( tmp, tmp, ((x >> 32) & 0x0000ffff));
      ___(rldicr) sldi(tmp, tmp, 32);
      ___(oris)   oris(tmp, tmp, (x & 0xffff0000) >> 16);
      ___(ori)    ori( tmp, tmp, (x & 0x0000ffff));

      ___(stdux) stdux(R1_SP, R1_SP, tmp);
    }
  }
#if 0 // TODO: PPC port
  // For testing large constant pools, emit a lot of constants to constant pool.
  // "Randomize" const_size.
  if (ConstantsALot) {
    const int num_consts = const_size();
    for (int i = 0; i < num_consts; i++) {
      __ long_constant(0xB0B5B00BBABE);
    }
  }
#endif
  if (!method_is_frameless) {
    // Save return pc.
    ___(std) std(return_pc, _abi(lr), callers_sp);
  }
}
#undef ___
#undef ___stop
#undef ___advance

uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  // Variable size. determine dynamically.
  return MachNode::size(ra_);
}

int MachPrologNode::reloc() const {
  // Return number of relocatable values contained in this instruction.
  return 1; // 1 reloc entry for load_const(toc).
}

//=============================================================================

#ifndef PRODUCT
void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
  Compile* C = ra_->C;

  st->print("EPILOG\n\t");
  st->print("restore return pc\n\t");
  st->print("pop frame\n\t");

  if (do_polling() && C->is_method_compilation()) {
    st->print("touch polling page\n\t");
  }
}
#endif

void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  Compile* C = ra_->C;
  MacroAssembler _masm(&cbuf);

  const long framesize = ((long)C->frame_slots()) << LogBytesPerInt;
  assert(framesize >= 0, "negative frame-size?");

  const bool method_needs_polling = do_polling() && C->is_method_compilation();
  const bool method_is_frameless  = false /* TODO: PPC port C->is_frameless_method()*/;
  const Register return_pc        = R11;
  const Register polling_page     = R12;

  if (!method_is_frameless) {
    // Restore return pc relative to callers' sp.
    __ ld(return_pc, ((int)framesize) + _abi(lr), R1_SP);
  }

  if (method_needs_polling) {
    if (LoadPollAddressFromThread) {
      // TODO: PPC port __ ld(polling_page, in_bytes(JavaThread::poll_address_offset()), R16_thread);
      Unimplemented();
    } else {
      __ load_const_optimized(polling_page, (long)(address) os::get_polling_page()); // TODO: PPC port: get_standard_polling_page()
    }
  }

  if (!method_is_frameless) {
    // Move return pc to LR.
    __ mtlr(return_pc);
    // Pop frame (fixed frame-size).
    __ addi(R1_SP, R1_SP, (int)framesize);
  }

  if (method_needs_polling) {
    // We need to mark the code position where the load from the safepoint
    // polling page was emitted as relocInfo::poll_return_type here.
    __ relocate(relocInfo::poll_return_type);
    __ load_from_polling_page(polling_page);
  }
}

uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  // Variable size. Determine dynamically.
  return MachNode::size(ra_);
}

int MachEpilogNode::reloc() const {
  // Return number of relocatable values contained in this instruction.
  return 1; // 1 for load_from_polling_page.
}

const Pipeline * MachEpilogNode::pipeline() const {
  return MachNode::pipeline_class();
}

// This method seems to be obsolete. It is declared in machnode.hpp
// and defined in all *.ad files, but it is never called. Should we
// get rid of it?
int MachEpilogNode::safepoint_offset() const {
  assert(do_polling(), "no return for this epilog node");
  return 0;
}

#if 0 // TODO: PPC port
void MachLoadPollAddrLateNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  MacroAssembler _masm(&cbuf);
  if (LoadPollAddressFromThread) {
    _masm.ld(R11, in_bytes(JavaThread::poll_address_offset()), R16_thread);
  } else {
    _masm.nop();
  }
}

uint MachLoadPollAddrLateNode::size(PhaseRegAlloc* ra_) const {
  if (LoadPollAddressFromThread) {
    return 4;
  } else {
    return 4;
  }
}

#ifndef PRODUCT
void MachLoadPollAddrLateNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  st->print_cr(" LD R11, PollAddressOffset, R16_thread \t// LoadPollAddressFromThread");
}
#endif

const RegMask &MachLoadPollAddrLateNode::out_RegMask() const {
  return RSCRATCH1_BITS64_REG_mask();
}
#endif // PPC port

// =============================================================================

// Figure out which register class each belongs in: rc_int, rc_float or
// rc_stack.
enum RC { rc_bad, rc_int, rc_float, rc_stack };

static enum RC rc_class(OptoReg::Name reg) {
  // Return the register class for the given register. The given register
  // reg is a <register>_num value, which is an index into the MachRegisterNumbers
  // enumeration in adGlobals_ppc64.hpp.

  if (reg == OptoReg::Bad) return rc_bad;

  // We have 64 integer register halves, starting at index 0.
  if (reg < 64) return rc_int;

  // We have 64 floating-point register halves, starting at index 64.
  if (reg < 64+64) return rc_float;

  // Between float regs & stack are the flags regs.
  assert(OptoReg::is_stack(reg), "blow up if spilling flags");

  return rc_stack;
}

static int ld_st_helper(CodeBuffer *cbuf, const char *op_str, uint opcode, int reg, int offset,
                        bool do_print, Compile* C, outputStream *st) {

  assert(opcode == Assembler::LD_OPCODE   ||
         opcode == Assembler::STD_OPCODE  ||
         opcode == Assembler::LWZ_OPCODE  ||
         opcode == Assembler::STW_OPCODE  ||
         opcode == Assembler::LFD_OPCODE  ||
         opcode == Assembler::STFD_OPCODE ||
         opcode == Assembler::LFS_OPCODE  ||
         opcode == Assembler::STFS_OPCODE,
         "opcode not supported");

  if (cbuf) {
    int d =
      (Assembler::LD_OPCODE == opcode || Assembler::STD_OPCODE == opcode) ?
        Assembler::ds(offset+0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/)
      : Assembler::d1(offset+0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/); // Makes no difference in opt build.
    emit_long(*cbuf, opcode | Assembler::rt(Matcher::_regEncode[reg]) | d | Assembler::ra(R1_SP));
  }
#ifndef PRODUCT
  else if (do_print) {
    st->print("%-7s %s, [R1_SP + #%d+%d] \t// spill copy",
              op_str,
              Matcher::regName[reg],
              offset, 0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/);
  }
#endif
  return 4; // size
}

uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
  Compile* C = ra_->C;

  // Get registers to move.
  OptoReg::Name src_hi = ra_->get_reg_second(in(1));
  OptoReg::Name src_lo = ra_->get_reg_first(in(1));
  OptoReg::Name dst_hi = ra_->get_reg_second(this);
  OptoReg::Name dst_lo = ra_->get_reg_first(this);

  enum RC src_hi_rc = rc_class(src_hi);
  enum RC src_lo_rc = rc_class(src_lo);
  enum RC dst_hi_rc = rc_class(dst_hi);
  enum RC dst_lo_rc = rc_class(dst_lo);

  assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
  if (src_hi != OptoReg::Bad)
    assert((src_lo&1)==0 && src_lo+1==src_hi &&
           (dst_lo&1)==0 && dst_lo+1==dst_hi,
           "expected aligned-adjacent pairs");
  // Generate spill code!
  int size = 0;

  if (src_lo == dst_lo && src_hi == dst_hi)
    return size;            // Self copy, no move.

  // --------------------------------------
  // Memory->Memory Spill. Use R0 to hold the value.
  if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
    int src_offset = ra_->reg2offset(src_lo);
    int dst_offset = ra_->reg2offset(dst_lo);
    if (src_hi != OptoReg::Bad) {
      assert(src_hi_rc==rc_stack && dst_hi_rc==rc_stack,
             "expected same type of move for high parts");
      size += ld_st_helper(cbuf, "LD  ", Assembler::LD_OPCODE,  R0_num, src_offset, !do_size, C, st);
      if (!cbuf && !do_size) st->print("\n\t");
      size += ld_st_helper(cbuf, "STD ", Assembler::STD_OPCODE, R0_num, dst_offset, !do_size, C, st);
    } else {
      size += ld_st_helper(cbuf, "LWZ ", Assembler::LWZ_OPCODE, R0_num, src_offset, !do_size, C, st);
      if (!cbuf && !do_size) st->print("\n\t");
      size += ld_st_helper(cbuf, "STW ", Assembler::STW_OPCODE, R0_num, dst_offset, !do_size, C, st);
    }
    return size;
  }

  // --------------------------------------
  // Check for float->int copy; requires a trip through memory.
  if (src_lo_rc == rc_float && dst_lo_rc == rc_int) {
    Unimplemented();
  }

  // --------------------------------------
  // Check for integer reg-reg copy.
  if (src_lo_rc == rc_int && dst_lo_rc == rc_int) {
      Register Rsrc = as_Register(Matcher::_regEncode[src_lo]);
      Register Rdst = as_Register(Matcher::_regEncode[dst_lo]);
      size = (Rsrc != Rdst) ? 4 : 0;

      if (cbuf) {
        MacroAssembler _masm(cbuf);
        if (size) {
          __ mr(Rdst, Rsrc);
        }
      }
#ifndef PRODUCT
      else if (!do_size) {
        if (size) {
          st->print("%-7s %s, %s \t// spill copy", "MR", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
        } else {
          st->print("%-7s %s, %s \t// spill copy", "MR-NOP", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
        }
      }
#endif
      return size;
  }

  // Check for integer store.
  if (src_lo_rc == rc_int && dst_lo_rc == rc_stack) {
    int dst_offset = ra_->reg2offset(dst_lo);
    if (src_hi != OptoReg::Bad) {
      assert(src_hi_rc==rc_int && dst_hi_rc==rc_stack,
             "expected same type of move for high parts");
      size += ld_st_helper(cbuf, "STD ", Assembler::STD_OPCODE, src_lo, dst_offset, !do_size, C, st);
    } else {
      size += ld_st_helper(cbuf, "STW ", Assembler::STW_OPCODE, src_lo, dst_offset, !do_size, C, st);
    }
    return size;
  }

  // Check for integer load.
  if (dst_lo_rc == rc_int && src_lo_rc == rc_stack) {
    int src_offset = ra_->reg2offset(src_lo);
    if (src_hi != OptoReg::Bad) {
      assert(dst_hi_rc==rc_int && src_hi_rc==rc_stack,
             "expected same type of move for high parts");
      size += ld_st_helper(cbuf, "LD  ", Assembler::LD_OPCODE, dst_lo, src_offset, !do_size, C, st);
    } else {
      size += ld_st_helper(cbuf, "LWZ ", Assembler::LWZ_OPCODE, dst_lo, src_offset, !do_size, C, st);
    }
    return size;
  }

  // Check for float reg-reg copy.
  if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
    if (cbuf) {
      MacroAssembler _masm(cbuf);
      FloatRegister Rsrc = as_FloatRegister(Matcher::_regEncode[src_lo]);
      FloatRegister Rdst = as_FloatRegister(Matcher::_regEncode[dst_lo]);
      __ fmr(Rdst, Rsrc);
    }
#ifndef PRODUCT
    else if (!do_size) {
      st->print("%-7s %s, %s \t// spill copy", "FMR", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
    }
#endif
    return 4;
  }

  // Check for float store.
  if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
    int dst_offset = ra_->reg2offset(dst_lo);
    if (src_hi != OptoReg::Bad) {
      assert(src_hi_rc==rc_float && dst_hi_rc==rc_stack,
             "expected same type of move for high parts");
      size += ld_st_helper(cbuf, "STFD", Assembler::STFD_OPCODE, src_lo, dst_offset, !do_size, C, st);
    } else {
      size += ld_st_helper(cbuf, "STFS", Assembler::STFS_OPCODE, src_lo, dst_offset, !do_size, C, st);
    }
    return size;
  }

  // Check for float load.
  if (dst_lo_rc == rc_float && src_lo_rc == rc_stack) {
    int src_offset = ra_->reg2offset(src_lo);
    if (src_hi != OptoReg::Bad) {
      assert(dst_hi_rc==rc_float && src_hi_rc==rc_stack,
             "expected same type of move for high parts");
      size += ld_st_helper(cbuf, "LFD ", Assembler::LFD_OPCODE, dst_lo, src_offset, !do_size, C, st);
    } else {
      size += ld_st_helper(cbuf, "LFS ", Assembler::LFS_OPCODE, dst_lo, src_offset, !do_size, C, st);
    }
    return size;
  }

  // --------------------------------------------------------------------
  // Check for hi bits still needing moving. Only happens for misaligned
  // arguments to native calls.
  if (src_hi == dst_hi)
    return size;               // Self copy; no move.

  assert(src_hi_rc != rc_bad && dst_hi_rc != rc_bad, "src_hi & dst_hi cannot be Bad");
  ShouldNotReachHere(); // Unimplemented
  return 0;
}

#ifndef PRODUCT
void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
  if (!ra_)
    st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
  else
    implementation(NULL, ra_, false, st);
}
#endif

void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  implementation(&cbuf, ra_, false, NULL);
}

uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
  return implementation(NULL, ra_, true, NULL);
}

#if 0 // TODO: PPC port
ArchOpcode MachSpillCopyNode_archOpcode(MachSpillCopyNode *n, PhaseRegAlloc *ra_) {
#ifndef PRODUCT
  if (ra_->node_regs_max_index() == 0) return archOpcode_undefined;
#endif
  assert(ra_->node_regs_max_index() != 0, "");

  // Get registers to move.
  OptoReg::Name src_hi = ra_->get_reg_second(n->in(1));
  OptoReg::Name src_lo = ra_->get_reg_first(n->in(1));
  OptoReg::Name dst_hi = ra_->get_reg_second(n);
  OptoReg::Name dst_lo = ra_->get_reg_first(n);

  enum RC src_lo_rc = rc_class(src_lo);
  enum RC dst_lo_rc = rc_class(dst_lo);

  if (src_lo == dst_lo && src_hi == dst_hi)
    return ppc64Opcode_none;            // Self copy, no move.

  // --------------------------------------
  // Memory->Memory Spill. Use R0 to hold the value.
  if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
    return ppc64Opcode_compound;
  }

  // --------------------------------------
  // Check for float->int copy; requires a trip through memory.
  if (src_lo_rc == rc_float && dst_lo_rc == rc_int) {
    Unimplemented();
  }

  // --------------------------------------
  // Check for integer reg-reg copy.
  if (src_lo_rc == rc_int && dst_lo_rc == rc_int) {
    Register Rsrc = as_Register(Matcher::_regEncode[src_lo]);
    Register Rdst = as_Register(Matcher::_regEncode[dst_lo]);
    if (Rsrc == Rdst) {
      return ppc64Opcode_none;
    } else {
      return ppc64Opcode_or;
    }
  }

  // Check for integer store.
  if (src_lo_rc == rc_int && dst_lo_rc == rc_stack) {
    if (src_hi != OptoReg::Bad) {
      return ppc64Opcode_std;
    } else {
      return ppc64Opcode_stw;
    }
  }

  // Check for integer load.
  if (dst_lo_rc == rc_int && src_lo_rc == rc_stack) {
    if (src_hi != OptoReg::Bad) {
      return ppc64Opcode_ld;
    } else {
      return ppc64Opcode_lwz;
    }
  }

  // Check for float reg-reg copy.
  if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
    return ppc64Opcode_fmr;
  }

  // Check for float store.
  if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
    if (src_hi != OptoReg::Bad) {
      return ppc64Opcode_stfd;
    } else {
      return ppc64Opcode_stfs;
    }
  }

  // Check for float load.
  if (dst_lo_rc == rc_float && src_lo_rc == rc_stack) {
    if (src_hi != OptoReg::Bad) {
      return ppc64Opcode_lfd;
    } else {
      return ppc64Opcode_lfs;
    }
  }

  // --------------------------------------------------------------------
  // Check for hi bits still needing moving. Only happens for misaligned
  // arguments to native calls.
  if (src_hi == dst_hi)
    return ppc64Opcode_none;               // Self copy; no move.

  ShouldNotReachHere();
  return ppc64Opcode_undefined;
}
#endif // PPC port

#ifndef PRODUCT
void MachNopNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
  st->print("NOP \t// %d nops to pad for loops.", _count);
}
#endif

void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *) const {
  MacroAssembler _masm(&cbuf);
  // _count contains the number of nops needed for padding.
  for (int i = 0; i < _count; i++) {
    __ nop();
  }
}

uint MachNopNode::size(PhaseRegAlloc *ra_) const {
   return _count * 4;
}

#ifndef PRODUCT
void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
  int reg = ra_->get_reg_first(this);
  st->print("ADDI %s, SP, %d \t// box node", Matcher::regName[reg], offset);
}
#endif

void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  MacroAssembler _masm(&cbuf);

  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
  int reg    = ra_->get_encode(this);

  if (Assembler::is_simm(offset, 16)) {
    __ addi(as_Register(reg), R1, offset);
  } else {
    ShouldNotReachHere();
  }
}

uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
  // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
  return 4;
}

#ifndef PRODUCT
void MachUEPNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
  st->print_cr("---- MachUEPNode ----");
  st->print_cr("...");
}
#endif

void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  // This is the unverified entry point.
  MacroAssembler _masm(&cbuf);

  // Inline_cache contains a klass.
  Register ic_klass       = as_Register(Matcher::inline_cache_reg_encode());
  Register receiver_klass = R0;  // tmp

  assert_different_registers(ic_klass, receiver_klass, R11_scratch1, R3_ARG1);
  assert(R11_scratch1 == R11, "need prologue scratch register");

  // Check for NULL argument if we don't have implicit null checks.
  if (!ImplicitNullChecks || !os::zero_page_read_protected()) {
    if (TrapBasedNullChecks) {
      __ trap_null_check(R3_ARG1);
    } else {
      Label valid;
      __ cmpdi(CCR0, R3_ARG1, 0);
      __ bne_predict_taken(CCR0, valid);
      // We have a null argument, branch to ic_miss_stub.
      __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
                           relocInfo::runtime_call_type);
      __ bind(valid);
    }
  }
  // Assume argument is not NULL, load klass from receiver.
  __ load_klass(receiver_klass, R3_ARG1);

  if (TrapBasedICMissChecks) {
    __ trap_ic_miss_check(receiver_klass, ic_klass);
  } else {
    Label valid;
    __ cmpd(CCR0, receiver_klass, ic_klass);
    __ beq_predict_taken(CCR0, valid);
    // We have an unexpected klass, branch to ic_miss_stub.
    __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
                         relocInfo::runtime_call_type);
    __ bind(valid);
  }

  // Argument is valid and klass is as expected, continue.
}

#if 0 // TODO: PPC port
// Optimize UEP code on z (save a load_const() call in main path).
int MachUEPNode::ep_offset() {
  return 0;
}
#endif

uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
  // Variable size. Determine dynamically.
  return MachNode::size(ra_);
}

//=============================================================================

%} // interrupt source

source_hpp %{ // Header information of the source block.

class HandlerImpl {

 public:

  static int emit_exception_handler(CodeBuffer &cbuf);
  static int emit_deopt_handler(CodeBuffer& cbuf);

  static uint size_exception_handler() {
    // The exception_handler is a b64_patchable.
    return MacroAssembler::b64_patchable_size;
  }

  static uint size_deopt_handler() {
    // The deopt_handler is a bl64_patchable.
    return MacroAssembler::bl64_patchable_size;
  }

};

%} // end source_hpp

source %{

int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
  MacroAssembler _masm(&cbuf);

  address base = __ start_a_stub(size_exception_handler());
  if (base == NULL) return 0; // CodeBuffer::expand failed

  int offset = __ offset();
  __ b64_patchable((address)OptoRuntime::exception_blob()->content_begin(),
                       relocInfo::runtime_call_type);
  assert(__ offset() - offset == (int)size_exception_handler(), "must be fixed size");
  __ end_a_stub();

  return offset;
}

// The deopt_handler is like the exception handler, but it calls to
// the deoptimization blob instead of jumping to the exception blob.
int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
  MacroAssembler _masm(&cbuf);

  address base = __ start_a_stub(size_deopt_handler());
  if (base == NULL) return 0; // CodeBuffer::expand failed

  int offset = __ offset();
  __ bl64_patchable((address)SharedRuntime::deopt_blob()->unpack(),
                        relocInfo::runtime_call_type);
  assert(__ offset() - offset == (int) size_deopt_handler(), "must be fixed size");
  __ end_a_stub();

  return offset;
}

//=============================================================================

// Use a frame slots bias for frameless methods if accessing the stack.
static int frame_slots_bias(int reg_enc, PhaseRegAlloc* ra_) {
  if (as_Register(reg_enc) == R1_SP) {
    return 0; // TODO: PPC port ra_->C->frame_slots_sp_bias_in_bytes();
  }
  return 0;
}

const bool Matcher::match_rule_supported(int opcode) {
  if (!has_match_rule(opcode))
    return false;

  switch (opcode) {
  case Op_SqrtD:
    return VM_Version::has_fsqrt();
  case Op_CountLeadingZerosI:
  case Op_CountLeadingZerosL:
  case Op_CountTrailingZerosI:
  case Op_CountTrailingZerosL:
    if (!UseCountLeadingZerosInstructionsPPC64)
      return false;
    break;

  case Op_PopCountI:
  case Op_PopCountL:
    return (UsePopCountInstruction && VM_Version::has_popcntw());

  case Op_StrComp:
    return SpecialStringCompareTo;
  case Op_StrEquals:
    return SpecialStringEquals;
  case Op_StrIndexOf:
    return SpecialStringIndexOf;
  }

  return true;  // Per default match rules are supported.
}

int Matcher::regnum_to_fpu_offset(int regnum) {
  // No user for this method?
  Unimplemented();
  return 999;
}

const bool Matcher::convL2FSupported(void) {
  // fcfids can do the conversion (>= Power7).
  // fcfid + frsp showed rounding problem when result should be 0x3f800001.
  return VM_Version::has_fcfids(); // False means that conversion is done by runtime call.
}

// Vector width in bytes.
const int Matcher::vector_width_in_bytes(BasicType bt) {
  assert(MaxVectorSize == 8, "");
  return 8;
}

// Vector ideal reg.
const int Matcher::vector_ideal_reg(int size) {
  assert(MaxVectorSize == 8 && size == 8, "");
  return Op_RegL;
}

const int Matcher::vector_shift_count_ideal_reg(int size) {
  fatal("vector shift is not supported");
  return Node::NotAMachineReg;
}

// Limits on vector size (number of elements) loaded into vector.
const int Matcher::max_vector_size(const BasicType bt) {
  assert(is_java_primitive(bt), "only primitive type vectors");
  return vector_width_in_bytes(bt)/type2aelembytes(bt);
}

const int Matcher::min_vector_size(const BasicType bt) {
  return max_vector_size(bt); // Same as max.
}

// PPC doesn't support misaligned vectors store/load.
const bool Matcher::misaligned_vectors_ok() {
  return false;
}

// PPC AES support not yet implemented
const bool Matcher::pass_original_key_for_aes() {
  return false;
}

// RETURNS: whether this branch offset is short enough that a short
// branch can be used.
//
// If the platform does not provide any short branch variants, then
// this method should return `false' for offset 0.
//
// `Compile::Fill_buffer' will decide on basis of this information
// whether to do the pass `Compile::Shorten_branches' at all.
//
// And `Compile::Shorten_branches' will decide on basis of this
// information whether to replace particular branch sites by short
// ones.
bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
  // Is the offset within the range of a ppc64 pc relative branch?
  bool b;

  const int safety_zone = 3 * BytesPerInstWord;
  b = Assembler::is_simm((offset<0 ? offset-safety_zone : offset+safety_zone),
                         29 - 16 + 1 + 2);
  return b;
}

const bool Matcher::isSimpleConstant64(jlong value) {
  // Probably always true, even if a temp register is required.
  return true;
}
/* TODO: PPC port
// Make a new machine dependent decode node (with its operands).
MachTypeNode *Matcher::make_decode_node(Compile *C) {
  assert(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0,
         "This method is only implemented for unscaled cOops mode so far");
  MachTypeNode *decode = new (C) decodeN_unscaledNode();
  decode->set_opnd_array(0, new (C) iRegPdstOper());
  decode->set_opnd_array(1, new (C) iRegNsrcOper());
  return decode;
}
*/
// Threshold size for cleararray.
const int Matcher::init_array_short_size = 8 * BytesPerLong;

// false => size gets scaled to BytesPerLong, ok.
const bool Matcher::init_array_count_is_in_bytes = false;

// Use conditional move (CMOVL) on Power7.
const int Matcher::long_cmove_cost() { return 0; } // this only makes long cmoves more expensive than int cmoves

// Suppress CMOVF. Conditional move available (sort of) on PPC64 only from P7 onwards. Not exploited yet.
// fsel doesn't accept a condition register as input, so this would be slightly different.
const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }

// Power6 requires postalloc expand (see block.cpp for description of postalloc expand).
const bool Matcher::require_postalloc_expand = true;

// Should the Matcher clone shifts on addressing modes, expecting them to
// be subsumed into complex addressing expressions or compute them into
// registers? True for Intel but false for most RISCs.
const bool Matcher::clone_shift_expressions = false;

// Do we need to mask the count passed to shift instructions or does
// the cpu only look at the lower 5/6 bits anyway?
// Off, as masks are generated in expand rules where required.
// Constant shift counts are handled in Ideal phase.
const bool Matcher::need_masked_shift_count = false;

// This affects two different things:
//  - how Decode nodes are matched
//  - how ImplicitNullCheck opportunities are recognized
// If true, the matcher will try to remove all Decodes and match them
// (as operands) into nodes. NullChecks are not prepared to deal with
// Decodes by final_graph_reshaping().
// If false, final_graph_reshaping() forces the decode behind the Cmp
// for a NullCheck. The matcher matches the Decode node into a register.
// Implicit_null_check optimization moves the Decode along with the
// memory operation back up before the NullCheck.
bool Matcher::narrow_oop_use_complex_address() {
  // TODO: PPC port if (MatchDecodeNodes) return true;
  return false;
}

bool Matcher::narrow_klass_use_complex_address() {
  NOT_LP64(ShouldNotCallThis());
  assert(UseCompressedClassPointers, "only for compressed klass code");
  // TODO: PPC port if (MatchDecodeNodes) return true;
  return false;
}

// Is it better to copy float constants, or load them directly from memory?
// Intel can load a float constant from a direct address, requiring no
// extra registers. Most RISCs will have to materialize an address into a
// register first, so they would do better to copy the constant from stack.
const bool Matcher::rematerialize_float_constants = false;

// If CPU can load and store mis-aligned doubles directly then no fixup is
// needed. Else we split the double into 2 integer pieces and move it
// piece-by-piece. Only happens when passing doubles into C code as the
// Java calling convention forces doubles to be aligned.
const bool Matcher::misaligned_doubles_ok = true;

void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
 Unimplemented();
}

// Advertise here if the CPU requires explicit rounding operations
// to implement the UseStrictFP mode.
const bool Matcher::strict_fp_requires_explicit_rounding = false;

// Do floats take an entire double register or just half?
//
// A float occupies a ppc64 double register. For the allocator, a
// ppc64 double register appears as a pair of float registers.
bool Matcher::float_in_double() { return true; }

// Do ints take an entire long register or just half?
// The relevant question is how the int is callee-saved:
// the whole long is written but de-opt'ing will have to extract
// the relevant 32 bits.
const bool Matcher::int_in_long = true;

// Constants for c2c and c calling conventions.

const MachRegisterNumbers iarg_reg[8] = {
  R3_num, R4_num, R5_num, R6_num,
  R7_num, R8_num, R9_num, R10_num
};

const MachRegisterNumbers farg_reg[13] = {
  F1_num, F2_num, F3_num, F4_num,
  F5_num, F6_num, F7_num, F8_num,
  F9_num, F10_num, F11_num, F12_num,
  F13_num
};

const int num_iarg_registers = sizeof(iarg_reg) / sizeof(iarg_reg[0]);

const int num_farg_registers = sizeof(farg_reg) / sizeof(farg_reg[0]);

// Return whether or not this register is ever used as an argument. This
// function is used on startup to build the trampoline stubs in generateOptoStub.
// Registers not mentioned will be killed by the VM call in the trampoline, and
// arguments in those registers not be available to the callee.
bool Matcher::can_be_java_arg(int reg) {
  // We return true for all registers contained in iarg_reg[] and
  // farg_reg[] and their virtual halves.
  // We must include the virtual halves in order to get STDs and LDs
  // instead of STWs and LWs in the trampoline stubs.

  if (   reg == R3_num  || reg == R3_H_num
      || reg == R4_num  || reg == R4_H_num
      || reg == R5_num  || reg == R5_H_num
      || reg == R6_num  || reg == R6_H_num
      || reg == R7_num  || reg == R7_H_num
      || reg == R8_num  || reg == R8_H_num
      || reg == R9_num  || reg == R9_H_num
      || reg == R10_num || reg == R10_H_num)
    return true;

  if (   reg == F1_num  || reg == F1_H_num
      || reg == F2_num  || reg == F2_H_num
      || reg == F3_num  || reg == F3_H_num
      || reg == F4_num  || reg == F4_H_num
      || reg == F5_num  || reg == F5_H_num
      || reg == F6_num  || reg == F6_H_num
      || reg == F7_num  || reg == F7_H_num
      || reg == F8_num  || reg == F8_H_num
      || reg == F9_num  || reg == F9_H_num
      || reg == F10_num || reg == F10_H_num
      || reg == F11_num || reg == F11_H_num
      || reg == F12_num || reg == F12_H_num
      || reg == F13_num || reg == F13_H_num)
    return true;

  return false;
}

bool Matcher::is_spillable_arg(int reg) {
  return can_be_java_arg(reg);
}

bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
  return false;
}

// Register for DIVI projection of divmodI.
RegMask Matcher::divI_proj_mask() {
  ShouldNotReachHere();
  return RegMask();
}

// Register for MODI projection of divmodI.
RegMask Matcher::modI_proj_mask() {
  ShouldNotReachHere();
  return RegMask();
}

// Register for DIVL projection of divmodL.
RegMask Matcher::divL_proj_mask() {
  ShouldNotReachHere();
  return RegMask();
}

// Register for MODL projection of divmodL.
RegMask Matcher::modL_proj_mask() {
  ShouldNotReachHere();
  return RegMask();
}

const RegMask Matcher::method_handle_invoke_SP_save_mask() {
  return RegMask();
}

%}

//----------ENCODING BLOCK-----------------------------------------------------
// This block specifies the encoding classes used by the compiler to output
// byte streams. Encoding classes are parameterized macros used by
// Machine Instruction Nodes in order to generate the bit encoding of the
// instruction. Operands specify their base encoding interface with the
// interface keyword. There are currently supported four interfaces,
// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an
// operand to generate a function which returns its register number when
// queried. CONST_INTER causes an operand to generate a function which
// returns the value of the constant when queried. MEMORY_INTER causes an
// operand to generate four functions which return the Base Register, the
// Index Register, the Scale Value, and the Offset Value of the operand when
// queried. COND_INTER causes an operand to generate six functions which
// return the encoding code (ie - encoding bits for the instruction)
// associated with each basic boolean condition for a conditional instruction.
//
// Instructions specify two basic values for encoding. Again, a function
// is available to check if the constant displacement is an oop. They use the
// ins_encode keyword to specify their encoding classes (which must be
// a sequence of enc_class names, and their parameters, specified in
// the encoding block), and they use the
// opcode keyword to specify, in order, their primary, secondary, and
// tertiary opcode. Only the opcode sections which a particular instruction
// needs for encoding need to be specified.
encode %{
  enc_class enc_unimplemented %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    MacroAssembler _masm(&cbuf);
    __ unimplemented("Unimplemented mach node encoding in AD file.", 13);
  %}

  enc_class enc_untested %{
#ifdef ASSERT
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    MacroAssembler _masm(&cbuf);
    __ untested("Untested mach node encoding in AD file.");
#else
    // TODO: PPC port $archOpcode(ppc64Opcode_none);
#endif
  %}

  enc_class enc_lbz(iRegIdst dst, memory mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_lbz);
    MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ lbz($dst$$Register, Idisp, $mem$$base$$Register);
  %}

  // Load acquire.
  enc_class enc_lbz_ac(iRegIdst dst, memory mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ lbz($dst$$Register, Idisp, $mem$$base$$Register);
    __ twi_0($dst$$Register);
    __ isync();
  %}

  enc_class enc_lhz(iRegIdst dst, memory mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_lhz);

    MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ lhz($dst$$Register, Idisp, $mem$$base$$Register);
  %}

  // Load acquire.
  enc_class enc_lhz_ac(iRegIdst dst, memory mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);

    MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ lhz($dst$$Register, Idisp, $mem$$base$$Register);
    __ twi_0($dst$$Register);
    __ isync();
  %}

  enc_class enc_lwz(iRegIdst dst, memory mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_lwz);

    MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ lwz($dst$$Register, Idisp, $mem$$base$$Register);
  %}

  // Load acquire.
  enc_class enc_lwz_ac(iRegIdst dst, memory mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);

    MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ lwz($dst$$Register, Idisp, $mem$$base$$Register);
    __ twi_0($dst$$Register);
    __ isync();
  %}

  enc_class enc_ld(iRegLdst dst, memoryAlg4 mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_ld);
    MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    // Operand 'ds' requires 4-alignment.
    assert((Idisp & 0x3) == 0, "unaligned offset");
    __ ld($dst$$Register, Idisp, $mem$$base$$Register);
  %}

  // Load acquire.
  enc_class enc_ld_ac(iRegLdst dst, memoryAlg4 mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    // Operand 'ds' requires 4-alignment.
    assert((Idisp & 0x3) == 0, "unaligned offset");
    __ ld($dst$$Register, Idisp, $mem$$base$$Register);
    __ twi_0($dst$$Register);
    __ isync();
  %}

  enc_class enc_lfd(RegF dst, memory mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_lfd);
    MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ lfd($dst$$FloatRegister, Idisp, $mem$$base$$Register);
  %}

  enc_class enc_load_long_constL(iRegLdst dst, immL src, iRegLdst toc) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_ld);

    MacroAssembler _masm(&cbuf);
    int toc_offset = 0;

    if (!ra_->C->in_scratch_emit_size()) {
      address const_toc_addr;
      // Create a non-oop constant, no relocation needed.
      // If it is an IC, it has a virtual_call_Relocation.
      const_toc_addr = __ long_constant((jlong)$src$$constant);

      // Get the constant's TOC offset.
      toc_offset = __ offset_to_method_toc(const_toc_addr);

      // Keep the current instruction offset in mind.
      ((loadConLNode*)this)->_cbuf_insts_offset = __ offset();
    }

    __ ld($dst$$Register, toc_offset, $toc$$Register);
  %}

  enc_class enc_load_long_constL_hi(iRegLdst dst, iRegLdst toc, immL src) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addis);

    MacroAssembler _masm(&cbuf);

    if (!ra_->C->in_scratch_emit_size()) {
      address const_toc_addr;
      // Create a non-oop constant, no relocation needed.
      // If it is an IC, it has a virtual_call_Relocation.
      const_toc_addr = __ long_constant((jlong)$src$$constant);

      // Get the constant's TOC offset.
      const int toc_offset = __ offset_to_method_toc(const_toc_addr);
      // Store the toc offset of the constant.
      ((loadConL_hiNode*)this)->_const_toc_offset = toc_offset;

      // Also keep the current instruction offset in mind.
      ((loadConL_hiNode*)this)->_cbuf_insts_offset = __ offset();
    }

    __ addis($dst$$Register, $toc$$Register, MacroAssembler::largeoffset_si16_si16_hi(_const_toc_offset));
  %}

%} // encode

source %{

typedef struct {
  loadConL_hiNode *_large_hi;
  loadConL_loNode *_large_lo;
  loadConLNode    *_small;
  MachNode        *_last;
} loadConLNodesTuple;

loadConLNodesTuple loadConLNodesTuple_create(Compile *C, PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc,
                                             OptoReg::Name reg_second, OptoReg::Name reg_first) {
  loadConLNodesTuple nodes;

  const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
  if (large_constant_pool) {
    // Create new nodes.
    loadConL_hiNode *m1 = new (C) loadConL_hiNode();
    loadConL_loNode *m2 = new (C) loadConL_loNode();

    // inputs for new nodes
    m1->add_req(NULL, toc);
    m2->add_req(NULL, m1);

    // operands for new nodes
    m1->_opnds[0] = new (C) iRegLdstOper(); // dst
    m1->_opnds[1] = immSrc;                 // src
    m1->_opnds[2] = new (C) iRegPdstOper(); // toc
    m2->_opnds[0] = new (C) iRegLdstOper(); // dst
    m2->_opnds[1] = immSrc;                 // src
    m2->_opnds[2] = new (C) iRegLdstOper(); // base

    // Initialize ins_attrib TOC fields.
    m1->_const_toc_offset = -1;
    m2->_const_toc_offset_hi_node = m1;

    // Initialize ins_attrib instruction offset.
    m1->_cbuf_insts_offset = -1;

    // register allocation for new nodes
    ra_->set_pair(m1->_idx, reg_second, reg_first);
    ra_->set_pair(m2->_idx, reg_second, reg_first);

    // Create result.
    nodes._large_hi = m1;
    nodes._large_lo = m2;
    nodes._small = NULL;
    nodes._last = nodes._large_lo;
    assert(m2->bottom_type()->isa_long(), "must be long");
  } else {
    loadConLNode *m2 = new (C) loadConLNode();

    // inputs for new nodes
    m2->add_req(NULL, toc);

    // operands for new nodes
    m2->_opnds[0] = new (C) iRegLdstOper(); // dst
    m2->_opnds[1] = immSrc;                 // src
    m2->_opnds[2] = new (C) iRegPdstOper(); // toc

    // Initialize ins_attrib instruction offset.
    m2->_cbuf_insts_offset = -1;

    // register allocation for new nodes
    ra_->set_pair(m2->_idx, reg_second, reg_first);

    // Create result.
    nodes._large_hi = NULL;
    nodes._large_lo = NULL;
    nodes._small = m2;
    nodes._last = nodes._small;
    assert(m2->bottom_type()->isa_long(), "must be long");
  }

  return nodes;
}

%} // source

encode %{
  // Postalloc expand emitter for loading a long constant from the method's TOC.
  // Enc_class needed as consttanttablebase is not supported by postalloc
  // expand.
  enc_class postalloc_expand_load_long_constant(iRegLdst dst, immL src, iRegLdst toc) %{
    // Create new nodes.
    loadConLNodesTuple loadConLNodes =
      loadConLNodesTuple_create(C, ra_, n_toc, op_src,
                                ra_->get_reg_second(this), ra_->get_reg_first(this));

    // Push new nodes.
    if (loadConLNodes._large_hi) nodes->push(loadConLNodes._large_hi);
    if (loadConLNodes._last)     nodes->push(loadConLNodes._last);

    // some asserts
    assert(nodes->length() >= 1, "must have created at least 1 node");
    assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long");
  %}

  enc_class enc_load_long_constP(iRegLdst dst, immP src, iRegLdst toc) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_ld);

    MacroAssembler _masm(&cbuf);
    int toc_offset = 0;

    if (!ra_->C->in_scratch_emit_size()) {
      intptr_t val = $src$$constant;
      relocInfo::relocType constant_reloc = $src->constant_reloc();  // src
      address const_toc_addr;
      if (constant_reloc == relocInfo::oop_type) {
        // Create an oop constant and a corresponding relocation.
        AddressLiteral a = __ allocate_oop_address((jobject)val);
        const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
        __ relocate(a.rspec());
      } else if (constant_reloc == relocInfo::metadata_type) {
        AddressLiteral a = __ allocate_metadata_address((Metadata *)val);
        const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
        __ relocate(a.rspec());
      } else {
        // Create a non-oop constant, no relocation needed.
        const_toc_addr = __ long_constant((jlong)$src$$constant);
      }

      // Get the constant's TOC offset.
      toc_offset = __ offset_to_method_toc(const_toc_addr);
    }

    __ ld($dst$$Register, toc_offset, $toc$$Register);
  %}

  enc_class enc_load_long_constP_hi(iRegLdst dst, immP src, iRegLdst toc) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addis);

    MacroAssembler _masm(&cbuf);
    if (!ra_->C->in_scratch_emit_size()) {
      intptr_t val = $src$$constant;
      relocInfo::relocType constant_reloc = $src->constant_reloc();  // src
      address const_toc_addr;
      if (constant_reloc == relocInfo::oop_type) {
        // Create an oop constant and a corresponding relocation.
        AddressLiteral a = __ allocate_oop_address((jobject)val);
        const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
        __ relocate(a.rspec());
      } else if (constant_reloc == relocInfo::metadata_type) {
        AddressLiteral a = __ allocate_metadata_address((Metadata *)val);
        const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
        __ relocate(a.rspec());
      } else {  // non-oop pointers, e.g. card mark base, heap top
        // Create a non-oop constant, no relocation needed.
        const_toc_addr = __ long_constant((jlong)$src$$constant);
      }

      // Get the constant's TOC offset.
      const int toc_offset = __ offset_to_method_toc(const_toc_addr);
      // Store the toc offset of the constant.
      ((loadConP_hiNode*)this)->_const_toc_offset = toc_offset;
    }

    __ addis($dst$$Register, $toc$$Register, MacroAssembler::largeoffset_si16_si16_hi(_const_toc_offset));
  %}

  // Postalloc expand emitter for loading a ptr constant from the method's TOC.
  // Enc_class needed as consttanttablebase is not supported by postalloc
  // expand.
  enc_class postalloc_expand_load_ptr_constant(iRegPdst dst, immP src, iRegLdst toc) %{
    const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
    if (large_constant_pool) {
      // Create new nodes.
      loadConP_hiNode *m1 = new (C) loadConP_hiNode();
      loadConP_loNode *m2 = new (C) loadConP_loNode();

      // inputs for new nodes
      m1->add_req(NULL, n_toc);
      m2->add_req(NULL, m1);

      // operands for new nodes
      m1->_opnds[0] = new (C) iRegPdstOper(); // dst
      m1->_opnds[1] = op_src;                 // src
      m1->_opnds[2] = new (C) iRegPdstOper(); // toc
      m2->_opnds[0] = new (C) iRegPdstOper(); // dst
      m2->_opnds[1] = op_src;                 // src
      m2->_opnds[2] = new (C) iRegLdstOper(); // base

      // Initialize ins_attrib TOC fields.
      m1->_const_toc_offset = -1;
      m2->_const_toc_offset_hi_node = m1;

      // Register allocation for new nodes.
      ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
      ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));

      nodes->push(m1);
      nodes->push(m2);
      assert(m2->bottom_type()->isa_ptr(), "must be ptr");
    } else {
      loadConPNode *m2 = new (C) loadConPNode();

      // inputs for new nodes
      m2->add_req(NULL, n_toc);

      // operands for new nodes
      m2->_opnds[0] = new (C) iRegPdstOper(); // dst
      m2->_opnds[1] = op_src;                 // src
      m2->_opnds[2] = new (C) iRegPdstOper(); // toc

      // Register allocation for new nodes.
      ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));

      nodes->push(m2);
      assert(m2->bottom_type()->isa_ptr(), "must be ptr");
    }
  %}

  // Enc_class needed as consttanttablebase is not supported by postalloc
  // expand.
  enc_class postalloc_expand_load_float_constant(regF dst, immF src, iRegLdst toc) %{
    bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;

    MachNode *m2;
    if (large_constant_pool) {
      m2 = new (C) loadConFCompNode();
    } else {
      m2 = new (C) loadConFNode();
    }
    // inputs for new nodes
    m2->add_req(NULL, n_toc);

    // operands for new nodes
    m2->_opnds[0] = op_dst;
    m2->_opnds[1] = op_src;
    m2->_opnds[2] = new (C) iRegPdstOper(); // constanttablebase

    // register allocation for new nodes
    ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
    nodes->push(m2);
  %}

  // Enc_class needed as consttanttablebase is not supported by postalloc
  // expand.
  enc_class postalloc_expand_load_double_constant(regD dst, immD src, iRegLdst toc) %{
    bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;

    MachNode *m2;
    if (large_constant_pool) {
      m2 = new (C) loadConDCompNode();
    } else {
      m2 = new (C) loadConDNode();
    }
    // inputs for new nodes
    m2->add_req(NULL, n_toc);

    // operands for new nodes
    m2->_opnds[0] = op_dst;
    m2->_opnds[1] = op_src;
    m2->_opnds[2] = new (C) iRegPdstOper(); // constanttablebase

    // register allocation for new nodes
    ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
    nodes->push(m2);
  %}

  enc_class enc_stw(iRegIsrc src, memory mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_stw);
    MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ stw($src$$Register, Idisp, $mem$$base$$Register);
  %}

  enc_class enc_std(iRegIsrc src, memoryAlg4 mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_std);
    MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    // Operand 'ds' requires 4-alignment.
    assert((Idisp & 0x3) == 0, "unaligned offset");
    __ std($src$$Register, Idisp, $mem$$base$$Register);
  %}

  enc_class enc_stfs(RegF src, memory mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_stfs);
    MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ stfs($src$$FloatRegister, Idisp, $mem$$base$$Register);
  %}

  enc_class enc_stfd(RegF src, memory mem) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_stfd);
    MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ stfd($src$$FloatRegister, Idisp, $mem$$base$$Register);
  %}

  // Use release_store for card-marking to ensure that previous
  // oop-stores are visible before the card-mark change.
  enc_class enc_cms_card_mark(memory mem, iRegLdst releaseFieldAddr) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    // FIXME: Implement this as a cmove and use a fixed condition code
    // register which is written on every transition to compiled code,
    // e.g. in call-stub and when returning from runtime stubs.
    //
    // Proposed code sequence for the cmove implementation:
    //
    // Label skip_release;
    // __ beq(CCRfixed, skip_release);
    // __ release();
    // __ bind(skip_release);
    // __ stb(card mark);

    MacroAssembler _masm(&cbuf);
    Label skip_storestore;

#if 0 // TODO: PPC port
    // Check CMSCollectorCardTableModRefBSExt::_requires_release and do the
    // StoreStore barrier conditionally.
    __ lwz(R0, 0, $releaseFieldAddr$$Register);
    __ cmpwi(CCR0, R0, 0);
    __ beq_predict_taken(CCR0, skip_storestore);
#endif
    __ li(R0, 0);
    __ membar(Assembler::StoreStore);
#if 0 // TODO: PPC port
    __ bind(skip_storestore);
#endif

    // Do the store.
    if ($mem$$index == 0) {
      __ stb(R0, $mem$$disp, $mem$$base$$Register);
    } else {
      assert(0 == $mem$$disp, "no displacement possible with indexed load/stores on ppc");
      __ stbx(R0, $mem$$base$$Register, $mem$$index$$Register);
    }
  %}

  enc_class postalloc_expand_encode_oop(iRegNdst dst, iRegPdst src, flagsReg crx) %{

    if (VM_Version::has_isel()) {
      // use isel instruction with Power 7
      cmpP_reg_imm16Node *n_compare  = new (C) cmpP_reg_imm16Node();
      encodeP_subNode    *n_sub_base = new (C) encodeP_subNode();
      encodeP_shiftNode  *n_shift    = new (C) encodeP_shiftNode();
      cond_set_0_oopNode *n_cond_set = new (C) cond_set_0_oopNode();

      n_compare->add_req(n_region, n_src);
      n_compare->_opnds[0] = op_crx;
      n_compare->_opnds[1] = op_src;
      n_compare->_opnds[2] = new (C) immL16Oper(0);

      n_sub_base->add_req(n_region, n_src);
      n_sub_base->_opnds[0] = op_dst;
      n_sub_base->_opnds[1] = op_src;
      n_sub_base->_bottom_type = _bottom_type;

      n_shift->add_req(n_region, n_sub_base);
      n_shift->_opnds[0] = op_dst;
      n_shift->_opnds[1] = op_dst;
      n_shift->_bottom_type = _bottom_type;

      n_cond_set->add_req(n_region, n_compare, n_shift);
      n_cond_set->_opnds[0] = op_dst;
      n_cond_set->_opnds[1] = op_crx;
      n_cond_set->_opnds[2] = op_dst;
      n_cond_set->_bottom_type = _bottom_type;

      ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
      ra_->set_pair(n_sub_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
      ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
      ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));

      nodes->push(n_compare);
      nodes->push(n_sub_base);
      nodes->push(n_shift);
      nodes->push(n_cond_set);

    } else {
      // before Power 7
      moveRegNode        *n_move     = new (C) moveRegNode();
      cmpP_reg_imm16Node *n_compare  = new (C) cmpP_reg_imm16Node();
      encodeP_shiftNode  *n_shift    = new (C) encodeP_shiftNode();
      cond_sub_baseNode  *n_sub_base = new (C) cond_sub_baseNode();

      n_move->add_req(n_region, n_src);
      n_move->_opnds[0] = op_dst;
      n_move->_opnds[1] = op_src;
      ra_->set_oop(n_move, true); // Until here, 'n_move' still produces an oop.

      n_compare->add_req(n_region, n_src);
      n_compare->add_prec(n_move);

      n_compare->_opnds[0] = op_crx;
      n_compare->_opnds[1] = op_src;
      n_compare->_opnds[2] = new (C) immL16Oper(0);

      n_sub_base->add_req(n_region, n_compare, n_src);
      n_sub_base->_opnds[0] = op_dst;
      n_sub_base->_opnds[1] = op_crx;
      n_sub_base->_opnds[2] = op_src;
      n_sub_base->_bottom_type = _bottom_type;

      n_shift->add_req(n_region, n_sub_base);
      n_shift->_opnds[0] = op_dst;
      n_shift->_opnds[1] = op_dst;
      n_shift->_bottom_type = _bottom_type;

      ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
      ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
      ra_->set_pair(n_sub_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
      ra_->set_pair(n_move->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));

      nodes->push(n_move);
      nodes->push(n_compare);
      nodes->push(n_sub_base);
      nodes->push(n_shift);
    }

    assert(!(ra_->is_oop(this)), "sanity"); // This is not supposed to be GC'ed.
  %}

  enc_class postalloc_expand_encode_oop_not_null(iRegNdst dst, iRegPdst src) %{

    encodeP_subNode *n1 = new (C) encodeP_subNode();
    n1->add_req(n_region, n_src);
    n1->_opnds[0] = op_dst;
    n1->_opnds[1] = op_src;
    n1->_bottom_type = _bottom_type;

    encodeP_shiftNode *n2 = new (C) encodeP_shiftNode();
    n2->add_req(n_region, n1);
    n2->_opnds[0] = op_dst;
    n2->_opnds[1] = op_dst;
    n2->_bottom_type = _bottom_type;
    ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
    ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));

    nodes->push(n1);
    nodes->push(n2);
    assert(!(ra_->is_oop(this)), "sanity"); // This is not supposed to be GC'ed.
  %}

  enc_class postalloc_expand_decode_oop(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
    decodeN_shiftNode *n_shift    = new (C) decodeN_shiftNode();
    cmpN_reg_imm0Node *n_compare  = new (C) cmpN_reg_imm0Node();

    n_compare->add_req(n_region, n_src);
    n_compare->_opnds[0] = op_crx;
    n_compare->_opnds[1] = op_src;
    n_compare->_opnds[2] = new (C) immN_0Oper(TypeNarrowOop::NULL_PTR);

    n_shift->add_req(n_region, n_src);
    n_shift->_opnds[0] = op_dst;
    n_shift->_opnds[1] = op_src;
    n_shift->_bottom_type = _bottom_type;

    if (VM_Version::has_isel()) {
      // use isel instruction with Power 7

      decodeN_addNode *n_add_base = new (C) decodeN_addNode();
      n_add_base->add_req(n_region, n_shift);
      n_add_base->_opnds[0] = op_dst;
      n_add_base->_opnds[1] = op_dst;
      n_add_base->_bottom_type = _bottom_type;

      cond_set_0_ptrNode *n_cond_set = new (C) cond_set_0_ptrNode();
      n_cond_set->add_req(n_region, n_compare, n_add_base);
      n_cond_set->_opnds[0] = op_dst;
      n_cond_set->_opnds[1] = op_crx;
      n_cond_set->_opnds[2] = op_dst;
      n_cond_set->_bottom_type = _bottom_type;

      assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
      ra_->set_oop(n_cond_set, true);

      ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
      ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
      ra_->set_pair(n_add_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
      ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));

      nodes->push(n_compare);
      nodes->push(n_shift);
      nodes->push(n_add_base);
      nodes->push(n_cond_set);

    } else {
      // before Power 7
      cond_add_baseNode *n_add_base = new (C) cond_add_baseNode();

      n_add_base->add_req(n_region, n_compare, n_shift);
      n_add_base->_opnds[0] = op_dst;
      n_add_base->_opnds[1] = op_crx;
      n_add_base->_opnds[2] = op_dst;
      n_add_base->_bottom_type = _bottom_type;

      assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
      ra_->set_oop(n_add_base, true);

      ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
      ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
      ra_->set_pair(n_add_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));

      nodes->push(n_compare);
      nodes->push(n_shift);
      nodes->push(n_add_base);
    }
  %}

  enc_class postalloc_expand_decode_oop_not_null(iRegPdst dst, iRegNsrc src) %{
    decodeN_shiftNode *n1 = new (C) decodeN_shiftNode();
    n1->add_req(n_region, n_src);
    n1->_opnds[0] = op_dst;
    n1->_opnds[1] = op_src;
    n1->_bottom_type = _bottom_type;

    decodeN_addNode *n2 = new (C) decodeN_addNode();
    n2->add_req(n_region, n1);
    n2->_opnds[0] = op_dst;
    n2->_opnds[1] = op_dst;
    n2->_bottom_type = _bottom_type;
    ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
    ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));

    assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
    ra_->set_oop(n2, true);

    nodes->push(n1);
    nodes->push(n2);
  %}

  enc_class enc_cmove_reg(iRegIdst dst, flagsReg crx, iRegIsrc src, cmpOp cmp) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmove);

    MacroAssembler _masm(&cbuf);
    int cc        = $cmp$$cmpcode;
    int flags_reg = $crx$$reg;
    Label done;
    assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
    // Branch if not (cmp crx).
    __ bc(cc_to_inverse_boint(cc), cc_to_biint(cc, flags_reg), done);
    __ mr($dst$$Register, $src$$Register);
    // TODO PPC port __ endgroup_if_needed(_size == 12);
    __ bind(done);
  %}

  enc_class enc_cmove_imm(iRegIdst dst, flagsReg crx, immI16 src, cmpOp cmp) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmove);

    MacroAssembler _masm(&cbuf);
    Label done;
    assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
    // Branch if not (cmp crx).
    __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done);
    __ li($dst$$Register, $src$$constant);
    // TODO PPC port __ endgroup_if_needed(_size == 12);
    __ bind(done);
  %}

  // New atomics.
  enc_class enc_GetAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);

    MacroAssembler _masm(&cbuf);
    Register Rtmp   = R0;
    Register Rres   = $res$$Register;
    Register Rsrc   = $src$$Register;
    Register Rptr   = $mem_ptr$$Register;
    bool RegCollision = (Rres == Rsrc) || (Rres == Rptr);
    Register Rold   = RegCollision ? Rtmp : Rres;

    Label Lretry;
    __ bind(Lretry);
    __ lwarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update());
    __ add(Rtmp, Rsrc, Rold);
    __ stwcx_(Rtmp, Rptr);
    if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
      __ bne_predict_not_taken(CCR0, Lretry);
    } else {
      __ bne(                  CCR0, Lretry);
    }
    if (RegCollision) __ subf(Rres, Rsrc, Rtmp);
    __ fence();
  %}

  enc_class enc_GetAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);

    MacroAssembler _masm(&cbuf);
    Register Rtmp   = R0;
    Register Rres   = $res$$Register;
    Register Rsrc   = $src$$Register;
    Register Rptr   = $mem_ptr$$Register;
    bool RegCollision = (Rres == Rsrc) || (Rres == Rptr);
    Register Rold   = RegCollision ? Rtmp : Rres;

    Label Lretry;
    __ bind(Lretry);
    __ ldarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update());
    __ add(Rtmp, Rsrc, Rold);
    __ stdcx_(Rtmp, Rptr);
    if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
      __ bne_predict_not_taken(CCR0, Lretry);
    } else {
      __ bne(                  CCR0, Lretry);
    }
    if (RegCollision) __ subf(Rres, Rsrc, Rtmp);
    __ fence();
  %}

  enc_class enc_GetAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);

    MacroAssembler _masm(&cbuf);
    Register Rtmp   = R0;
    Register Rres   = $res$$Register;
    Register Rsrc   = $src$$Register;
    Register Rptr   = $mem_ptr$$Register;
    bool RegCollision = (Rres == Rsrc) || (Rres == Rptr);
    Register Rold   = RegCollision ? Rtmp : Rres;

    Label Lretry;
    __ bind(Lretry);
    __ lwarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update());
    __ stwcx_(Rsrc, Rptr);
    if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
      __ bne_predict_not_taken(CCR0, Lretry);
    } else {
      __ bne(                  CCR0, Lretry);
    }
    if (RegCollision) __ mr(Rres, Rtmp);
    __ fence();
  %}

  enc_class enc_GetAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);

    MacroAssembler _masm(&cbuf);
    Register Rtmp   = R0;
    Register Rres   = $res$$Register;
    Register Rsrc   = $src$$Register;
    Register Rptr   = $mem_ptr$$Register;
    bool RegCollision = (Rres == Rsrc) || (Rres == Rptr);
    Register Rold   = RegCollision ? Rtmp : Rres;

    Label Lretry;
    __ bind(Lretry);
    __ ldarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update());
    __ stdcx_(Rsrc, Rptr);
    if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
      __ bne_predict_not_taken(CCR0, Lretry);
    } else {
      __ bne(                  CCR0, Lretry);
    }
    if (RegCollision) __ mr(Rres, Rtmp);
    __ fence();
  %}

  // This enc_class is needed so that scheduler gets proper
  // input mapping for latency computation.
  enc_class enc_andc(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_andc);
    MacroAssembler _masm(&cbuf);
    __ andc($dst$$Register, $src1$$Register, $src2$$Register);
  %}

  enc_class enc_convI2B_regI__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);

    MacroAssembler _masm(&cbuf);

    Label done;
    __ cmpwi($crx$$CondRegister, $src$$Register, 0);
    __ li($dst$$Register, $zero$$constant);
    __ beq($crx$$CondRegister, done);
    __ li($dst$$Register, $notzero$$constant);
    __ bind(done);
  %}

  enc_class enc_convP2B_regP__cmove(iRegIdst dst, iRegPsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);

    MacroAssembler _masm(&cbuf);

    Label done;
    __ cmpdi($crx$$CondRegister, $src$$Register, 0);
    __ li($dst$$Register, $zero$$constant);
    __ beq($crx$$CondRegister, done);
    __ li($dst$$Register, $notzero$$constant);
    __ bind(done);
  %}

  enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsReg crx, stackSlotL mem ) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmove);

    MacroAssembler _masm(&cbuf);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    Label done;
    __ bso($crx$$CondRegister, done);
    __ ld($dst$$Register, Idisp, $mem$$base$$Register);
    // TODO PPC port __ endgroup_if_needed(_size == 12);
    __ bind(done);
  %}

  enc_class enc_bc(flagsReg crx, cmpOp cmp, Label lbl) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_bc);

    MacroAssembler _masm(&cbuf);
    Label d;   // dummy
    __ bind(d);
    Label* p = ($lbl$$label);
    // `p' is `NULL' when this encoding class is used only to
    // determine the size of the encoded instruction.
    Label& l = (NULL == p)? d : *(p);
    int cc = $cmp$$cmpcode;
    int flags_reg = $crx$$reg;
    assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
    int bhint = Assembler::bhintNoHint;

    if (UseStaticBranchPredictionForUncommonPathsPPC64) {
      if (_prob <= PROB_NEVER) {
        bhint = Assembler::bhintIsNotTaken;
      } else if (_prob >= PROB_ALWAYS) {
        bhint = Assembler::bhintIsTaken;
      }
    }

    __ bc(Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)),
          cc_to_biint(cc, flags_reg),
          l);
  %}

  enc_class enc_bc_far(flagsReg crx, cmpOp cmp, Label lbl) %{
    // The scheduler doesn't know about branch shortening, so we set the opcode
    // to ppc64Opcode_bc in order to hide this detail from the scheduler.
    // TODO: PPC port $archOpcode(ppc64Opcode_bc);

    MacroAssembler _masm(&cbuf);
    Label d;    // dummy
    __ bind(d);
    Label* p = ($lbl$$label);
    // `p' is `NULL' when this encoding class is used only to
    // determine the size of the encoded instruction.
    Label& l = (NULL == p)? d : *(p);
    int cc = $cmp$$cmpcode;
    int flags_reg = $crx$$reg;
    int bhint = Assembler::bhintNoHint;

    if (UseStaticBranchPredictionForUncommonPathsPPC64) {
      if (_prob <= PROB_NEVER) {
        bhint = Assembler::bhintIsNotTaken;
      } else if (_prob >= PROB_ALWAYS) {
        bhint = Assembler::bhintIsTaken;
      }
    }

    // Tell the conditional far branch to optimize itself when being relocated.
    __ bc_far(Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)),
                  cc_to_biint(cc, flags_reg),
                  l,
                  MacroAssembler::bc_far_optimize_on_relocate);
  %}

  // Branch used with Power6 scheduling (can be shortened without changing the node).
  enc_class enc_bc_short_far(flagsReg crx, cmpOp cmp, Label lbl) %{
    // The scheduler doesn't know about branch shortening, so we set the opcode
    // to ppc64Opcode_bc in order to hide this detail from the scheduler.
    // TODO: PPC port $archOpcode(ppc64Opcode_bc);

    MacroAssembler _masm(&cbuf);
    Label d;   // dummy
    __ bind(d);
    Label* p = ($lbl$$label);
    // `p' is `NULL' when this encoding class is used only to
    // determine the size of the encoded instruction.
    Label& l = (NULL == p)? d : *(p);
    int cc = $cmp$$cmpcode;
    int flags_reg = $crx$$reg;
    int bhint = Assembler::bhintNoHint;

    if (UseStaticBranchPredictionForUncommonPathsPPC64) {
      if (_prob <= PROB_NEVER) {
        bhint = Assembler::bhintIsNotTaken;
      } else if (_prob >= PROB_ALWAYS) {
        bhint = Assembler::bhintIsTaken;
      }
    }

#if 0 // TODO: PPC port
    if (_size == 8) {
      // Tell the conditional far branch to optimize itself when being relocated.
      __ bc_far(Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)),
                    cc_to_biint(cc, flags_reg),
                    l,
                    MacroAssembler::bc_far_optimize_on_relocate);
    } else {
      __ bc    (Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)),
                    cc_to_biint(cc, flags_reg),
                    l);
    }
#endif
    Unimplemented();
  %}

  // Postalloc expand emitter for loading a replicatef float constant from
  // the method's TOC.
  // Enc_class needed as consttanttablebase is not supported by postalloc
  // expand.
  enc_class postalloc_expand_load_replF_constant(iRegLdst dst, immF src, iRegLdst toc) %{
    // Create new nodes.

    // Make an operand with the bit pattern to load as float.
    immLOper *op_repl = new (C) immLOper((jlong)replicate_immF(op_src->constantF()));

    loadConLNodesTuple loadConLNodes =
      loadConLNodesTuple_create(C, ra_, n_toc, op_repl,
                                ra_->get_reg_second(this), ra_->get_reg_first(this));

    // Push new nodes.
    if (loadConLNodes._large_hi) nodes->push(loadConLNodes._large_hi);
    if (loadConLNodes._last)     nodes->push(loadConLNodes._last);

    assert(nodes->length() >= 1, "must have created at least 1 node");
    assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long");
  %}

  // This enc_class is needed so that scheduler gets proper
  // input mapping for latency computation.
  enc_class enc_poll(immI dst, iRegLdst poll) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_ld);
    // Fake operand dst needed for PPC scheduler.
    assert($dst$$constant == 0x0, "dst must be 0x0");

    MacroAssembler _masm(&cbuf);
    // Mark the code position where the load from the safepoint
    // polling page was emitted as relocInfo::poll_type.
    __ relocate(relocInfo::poll_type);
    __ load_from_polling_page($poll$$Register);
  %}

  // A Java static call or a runtime call.
  //
  // Branch-and-link relative to a trampoline.
  // The trampoline loads the target address and does a long branch to there.
  // In case we call java, the trampoline branches to a interpreter_stub
  // which loads the inline cache and the real call target from the constant pool.
  //
  // This basically looks like this:
  //
  // >>>> consts      -+  -+
  //                   |   |- offset1
  // [call target1]    | <-+
  // [IC cache]        |- offset2
  // [call target2] <--+
  //
  // <<<< consts
  // >>>> insts
  //
  // bl offset16               -+  -+             ??? // How many bits available?
  //                            |   |
  // <<<< insts                 |   |
  // >>>> stubs                 |   |
  //                            |   |- trampoline_stub_Reloc
  // trampoline stub:           | <-+
  //   r2 = toc                 |
  //   r2 = [r2 + offset1]      |       // Load call target1 from const section
  //   mtctr r2                 |
  //   bctr                     |- static_stub_Reloc
  // comp_to_interp_stub:   <---+
  //   r1 = toc
  //   ICreg = [r1 + IC_offset]         // Load IC from const section
  //   r1    = [r1 + offset2]           // Load call target2 from const section
  //   mtctr r1
  //   bctr
  //
  // <<<< stubs
  //
  // The call instruction in the code either
  // - Branches directly to a compiled method if the offset is encodable in instruction.
  // - Branches to the trampoline stub if the offset to the compiled method is not encodable.
  // - Branches to the compiled_to_interp stub if the target is interpreted.
  //
  // Further there are three relocations from the loads to the constants in
  // the constant section.
  //
  // Usage of r1 and r2 in the stubs allows to distinguish them.
  enc_class enc_java_static_call(method meth) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_bl);

    MacroAssembler _masm(&cbuf);
    address entry_point = (address)$meth$$method;

    if (!_method) {
      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
      emit_call_with_trampoline_stub(_masm, entry_point, relocInfo::runtime_call_type);
    } else {
      // Remember the offset not the address.
      const int start_offset = __ offset();
      // The trampoline stub.
      if (!Compile::current()->in_scratch_emit_size()) {
        // No entry point given, use the current pc.
        // Make sure branch fits into
        if (entry_point == 0) entry_point = __ pc();

        // Put the entry point as a constant into the constant pool.
        const address entry_point_toc_addr   = __ address_constant(entry_point, RelocationHolder::none);
        const int     entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);

        // Emit the trampoline stub which will be related to the branch-and-link below.
        CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, start_offset);
        __ relocate(_optimized_virtual ?
                    relocInfo::opt_virtual_call_type : relocInfo::static_call_type);
      }

      // The real call.
      // Note: At this point we do not have the address of the trampoline
      // stub, and the entry point might be too far away for bl, so __ pc()
      // serves as dummy and the bl will be patched later.
      cbuf.set_insts_mark();
      __ bl(__ pc());  // Emits a relocation.

      // The stub for call to interpreter.
      CompiledStaticCall::emit_to_interp_stub(cbuf);
    }
  %}

  // Emit a method handle call.
  //
  // Method handle calls from compiled to compiled are going thru a
  // c2i -> i2c adapter, extending the frame for their arguments. The
  // caller however, returns directly to the compiled callee, that has
  // to cope with the extended frame. We restore the original frame by
  // loading the callers sp and adding the calculated framesize.
  enc_class enc_java_handle_call(method meth) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);

    MacroAssembler _masm(&cbuf);
    address entry_point = (address)$meth$$method;

    // Remember the offset not the address.
    const int start_offset = __ offset();
    // The trampoline stub.
    if (!ra_->C->in_scratch_emit_size()) {
      // No entry point given, use the current pc.
      // Make sure branch fits into
      if (entry_point == 0) entry_point = __ pc();

      // Put the entry point as a constant into the constant pool.
      const address entry_point_toc_addr   = __ address_constant(entry_point, RelocationHolder::none);
      const int     entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);

      // Emit the trampoline stub which will be related to the branch-and-link below.
      CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, start_offset);
      assert(_optimized_virtual, "methodHandle call should be a virtual call");
      __ relocate(relocInfo::opt_virtual_call_type);
    }

    // The real call.
    // Note: At this point we do not have the address of the trampoline
    // stub, and the entry point might be too far away for bl, so __ pc()
    // serves as dummy and the bl will be patched later.
    cbuf.set_insts_mark();
    __ bl(__ pc());  // Emits a relocation.

    assert(_method, "execute next statement conditionally");
    // The stub for call to interpreter.
    CompiledStaticCall::emit_to_interp_stub(cbuf);

    // Restore original sp.
    __ ld(R11_scratch1, 0, R1_SP); // Load caller sp.
    const long framesize = ra_->C->frame_slots() << LogBytesPerInt;
    unsigned int bytes = (unsigned int)framesize;
    long offset = Assembler::align_addr(bytes, frame::alignment_in_bytes);
    if (Assembler::is_simm(-offset, 16)) {
      __ addi(R1_SP, R11_scratch1, -offset);
    } else {
      __ load_const_optimized(R12_scratch2, -offset);
      __ add(R1_SP, R11_scratch1, R12_scratch2);
    }
#ifdef ASSERT
  __ ld(R12_scratch2, 0, R1_SP); // Load from unextended_sp.
  __ cmpd(CCR0, R11_scratch1, R12_scratch2);
  __ asm_assert_eq("backlink changed", 0x8000);
#endif
    // If fails should store backlink before unextending.

    if (ra_->C->env()->failing()) {
      return;
    }
  %}

  // Second node of expanded dynamic call - the call.
  enc_class enc_java_dynamic_call_sched(method meth) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_bl);

    MacroAssembler _masm(&cbuf);

    if (!ra_->C->in_scratch_emit_size()) {
      // Create a call trampoline stub for the given method.
      const address entry_point = !($meth$$method) ? 0 : (address)$meth$$method;
      const address entry_point_const = __ address_constant(entry_point, RelocationHolder::none);
      const int entry_point_const_toc_offset = __ offset_to_method_toc(entry_point_const);
      CallStubImpl::emit_trampoline_stub(_masm, entry_point_const_toc_offset, __ offset());

      if (ra_->C->env()->failing())
        return;

      // Build relocation at call site with ic position as data.
      assert((_load_ic_hi_node != NULL && _load_ic_node == NULL) ||
             (_load_ic_hi_node == NULL && _load_ic_node != NULL),
             "must have one, but can't have both");
      assert((_load_ic_hi_node != NULL && _load_ic_hi_node->_cbuf_insts_offset != -1) ||
             (_load_ic_node != NULL    && _load_ic_node->_cbuf_insts_offset != -1),
             "must contain instruction offset");
      const int virtual_call_oop_addr_offset = _load_ic_hi_node != NULL
        ? _load_ic_hi_node->_cbuf_insts_offset
        : _load_ic_node->_cbuf_insts_offset;
      const address virtual_call_oop_addr = __ addr_at(virtual_call_oop_addr_offset);
      assert(MacroAssembler::is_load_const_from_method_toc_at(virtual_call_oop_addr),
             "should be load from TOC");

      __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr));
    }

    // At this point I do not have the address of the trampoline stub,
    // and the entry point might be too far away for bl. Pc() serves
    // as dummy and bl will be patched later.
    __ bl((address) __ pc());
  %}

  // postalloc expand emitter for virtual calls.
  enc_class postalloc_expand_java_dynamic_call_sched(method meth, iRegLdst toc) %{

    // Create the nodes for loading the IC from the TOC.
    loadConLNodesTuple loadConLNodes_IC =
      loadConLNodesTuple_create(C, ra_, n_toc, new (C) immLOper((jlong)Universe::non_oop_word()),
                                OptoReg::Name(R19_H_num), OptoReg::Name(R19_num));

    // Create the call node.
    CallDynamicJavaDirectSchedNode *call = new (C) CallDynamicJavaDirectSchedNode();
    call->_method_handle_invoke = _method_handle_invoke;
    call->_vtable_index      = _vtable_index;
    call->_method            = _method;
    call->_bci               = _bci;
    call->_optimized_virtual = _optimized_virtual;
    call->_tf                = _tf;
    call->_entry_point       = _entry_point;
    call->_cnt               = _cnt;
    call->_argsize           = _argsize;
    call->_oop_map           = _oop_map;
    call->_jvms              = _jvms;
    call->_jvmadj            = _jvmadj;
    call->_in_rms            = _in_rms;
    call->_nesting           = _nesting;

    // New call needs all inputs of old call.
    // Req...
    for (uint i = 0; i < req(); ++i) {
      // The expanded node does not need toc any more.
      // Add the inline cache constant here instead. This expresses the
      // register of the inline cache must be live at the call.
      // Else we would have to adapt JVMState by -1.
      if (i == mach_constant_base_node_input()) {
        call->add_req(loadConLNodes_IC._last);
      } else {
        call->add_req(in(i));
      }
    }
    // ...as well as prec
    for (uint i = req(); i < len(); ++i) {
      call->add_prec(in(i));
    }

    // Remember nodes loading the inline cache into r19.
    call->_load_ic_hi_node = loadConLNodes_IC._large_hi;
    call->_load_ic_node    = loadConLNodes_IC._small;

    // Operands for new nodes.
    call->_opnds[0] = _opnds[0];
    call->_opnds[1] = _opnds[1];

    // Only the inline cache is associated with a register.
    assert(Matcher::inline_cache_reg() == OptoReg::Name(R19_num), "ic reg should be R19");

    // Push new nodes.
    if (loadConLNodes_IC._large_hi) nodes->push(loadConLNodes_IC._large_hi);
    if (loadConLNodes_IC._last)     nodes->push(loadConLNodes_IC._last);
    nodes->push(call);
  %}

  // Compound version of call dynamic
  // Toc is only passed so that it can be used in ins_encode statement.
  // In the code we have to use $constanttablebase.
  enc_class enc_java_dynamic_call(method meth, iRegLdst toc) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    MacroAssembler _masm(&cbuf);
    int start_offset = __ offset();

    Register Rtoc = (ra_) ? $constanttablebase : R2_TOC;
#if 0
    int vtable_index = this->_vtable_index;
    if (_vtable_index < 0) {
      // Must be invalid_vtable_index, not nonvirtual_vtable_index.
      assert(_vtable_index == Method::invalid_vtable_index, "correct sentinel value");
      Register ic_reg = as_Register(Matcher::inline_cache_reg_encode());

      // Virtual call relocation will point to ic load.
      address virtual_call_meta_addr = __ pc();
      // Load a clear inline cache.
      AddressLiteral empty_ic((address) Universe::non_oop_word());
      __ load_const_from_method_toc(ic_reg, empty_ic, Rtoc);
      // CALL to fixup routine.  Fixup routine uses ScopeDesc info
      // to determine who we intended to call.
      __ relocate(virtual_call_Relocation::spec(virtual_call_meta_addr));
      emit_call_with_trampoline_stub(_masm, (address)$meth$$method, relocInfo::none);
      assert(((MachCallDynamicJavaNode*)this)->ret_addr_offset() == __ offset() - start_offset,
             "Fix constant in ret_addr_offset()");
    } else {
      assert(!UseInlineCaches, "expect vtable calls only if not using ICs");
      // Go thru the vtable. Get receiver klass. Receiver already
      // checked for non-null. If we'll go thru a C2I adapter, the
      // interpreter expects method in R19_method.

      __ load_klass(R11_scratch1, R3);

      int entry_offset = InstanceKlass::vtable_start_offset() + _vtable_index * vtableEntry::size();
      int v_off = entry_offset * wordSize + vtableEntry::method_offset_in_bytes();
      __ li(R19_method, v_off);
      __ ldx(R19_method/*method oop*/, R19_method/*method offset*/, R11_scratch1/*class*/);
      // NOTE: for vtable dispatches, the vtable entry will never be
      // null. However it may very well end up in handle_wrong_method
      // if the method is abstract for the particular class.
      __ ld(R11_scratch1, in_bytes(Method::from_compiled_offset()), R19_method);
      // Call target. Either compiled code or C2I adapter.
      __ mtctr(R11_scratch1);
      __ bctrl();
      if (((MachCallDynamicJavaNode*)this)->ret_addr_offset() != __ offset() - start_offset) {
        tty->print(" %d, %d\n", ((MachCallDynamicJavaNode*)this)->ret_addr_offset(),__ offset() - start_offset);
      }
      assert(((MachCallDynamicJavaNode*)this)->ret_addr_offset() == __ offset() - start_offset,
             "Fix constant in ret_addr_offset()");
    }
#endif
    Unimplemented();  // ret_addr_offset not yet fixed. Depends on compressed oops (load klass!).
  %}

  // a runtime call
  enc_class enc_java_to_runtime_call (method meth) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);

    MacroAssembler _masm(&cbuf);
    const address start_pc = __ pc();

#if defined(ABI_ELFv2)
    address entry= !($meth$$method) ? NULL : (address)$meth$$method;
    __ call_c(entry, relocInfo::runtime_call_type);
#else
    // The function we're going to call.
    FunctionDescriptor fdtemp;
    const FunctionDescriptor* fd = !($meth$$method) ? &fdtemp : (FunctionDescriptor*)$meth$$method;

    Register Rtoc = R12_scratch2;
    // Calculate the method's TOC.
    __ calculate_address_from_global_toc(Rtoc, __ method_toc());
    // Put entry, env, toc into the constant pool, this needs up to 3 constant
    // pool entries; call_c_using_toc will optimize the call.
    __ call_c_using_toc(fd, relocInfo::runtime_call_type, Rtoc);
#endif

    // Check the ret_addr_offset.
    assert(((MachCallRuntimeNode*)this)->ret_addr_offset() ==  __ last_calls_return_pc() - start_pc,
           "Fix constant in ret_addr_offset()");
  %}

  // Move to ctr for leaf call.
  // This enc_class is needed so that scheduler gets proper
  // input mapping for latency computation.
  enc_class enc_leaf_call_mtctr(iRegLsrc src) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_mtctr);
    MacroAssembler _masm(&cbuf);
    __ mtctr($src$$Register);
  %}

  // Postalloc expand emitter for runtime leaf calls.
  enc_class postalloc_expand_java_to_runtime_call(method meth, iRegLdst toc) %{
    loadConLNodesTuple loadConLNodes_Entry;
#if defined(ABI_ELFv2)
    jlong entry_address = (jlong) this->entry_point();
    assert(entry_address, "need address here");
    loadConLNodes_Entry = loadConLNodesTuple_create(C, ra_, n_toc, new (C) immLOper(entry_address),
                                                    OptoReg::Name(R12_H_num), OptoReg::Name(R12_num));
#else
    // Get the struct that describes the function we are about to call.
    FunctionDescriptor* fd = (FunctionDescriptor*) this->entry_point();
    assert(fd, "need fd here");
    jlong entry_address = (jlong) fd->entry();
    // new nodes
    loadConLNodesTuple loadConLNodes_Env;
    loadConLNodesTuple loadConLNodes_Toc;

    // Create nodes and operands for loading the entry point.
    loadConLNodes_Entry = loadConLNodesTuple_create(C, ra_, n_toc, new (C) immLOper(entry_address),
                                                    OptoReg::Name(R12_H_num), OptoReg::Name(R12_num));


    // Create nodes and operands for loading the env pointer.
    if (fd->env() != NULL) {
      loadConLNodes_Env = loadConLNodesTuple_create(C, ra_, n_toc, new (C) immLOper((jlong) fd->env()),
                                                    OptoReg::Name(R11_H_num), OptoReg::Name(R11_num));
    } else {
      loadConLNodes_Env._large_hi = NULL;
      loadConLNodes_Env._large_lo = NULL;
      loadConLNodes_Env._small    = NULL;
      loadConLNodes_Env._last = new (C) loadConL16Node();
      loadConLNodes_Env._last->_opnds[0] = new (C) iRegLdstOper();
      loadConLNodes_Env._last->_opnds[1] = new (C) immL16Oper(0);
      ra_->set_pair(loadConLNodes_Env._last->_idx, OptoReg::Name(R11_H_num), OptoReg::Name(R11_num));
    }

    // Create nodes and operands for loading the Toc point.
    loadConLNodes_Toc = loadConLNodesTuple_create(C, ra_, n_toc, new (C) immLOper((jlong) fd->toc()),
                                                  OptoReg::Name(R2_H_num), OptoReg::Name(R2_num));
#endif // ABI_ELFv2
    // mtctr node
    MachNode *mtctr = new (C) CallLeafDirect_mtctrNode();

    assert(loadConLNodes_Entry._last != NULL, "entry must exist");
    mtctr->add_req(0, loadConLNodes_Entry._last);

    mtctr->_opnds[0] = new (C) iRegLdstOper();
    mtctr->_opnds[1] = new (C) iRegLdstOper();

    // call node
    MachCallLeafNode *call = new (C) CallLeafDirectNode();

    call->_opnds[0] = _opnds[0];
    call->_opnds[1] = new (C) methodOper((intptr_t) entry_address); // May get set later.

    // Make the new call node look like the old one.
    call->_name        = _name;
    call->_tf          = _tf;
    call->_entry_point = _entry_point;
    call->_cnt         = _cnt;
    call->_argsize     = _argsize;
    call->_oop_map     = _oop_map;
    guarantee(!_jvms, "You must clone the jvms and adapt the offsets by fix_jvms().");
    call->_jvms        = NULL;
    call->_jvmadj      = _jvmadj;
    call->_in_rms      = _in_rms;
    call->_nesting     = _nesting;


    // New call needs all inputs of old call.
    // Req...
    for (uint i = 0; i < req(); ++i) {
      if (i != mach_constant_base_node_input()) {
        call->add_req(in(i));
      }
    }

    // These must be reqired edges, as the registers are live up to
    // the call. Else the constants are handled as kills.
    call->add_req(mtctr);
#if !defined(ABI_ELFv2)
    call->add_req(loadConLNodes_Env._last);
    call->add_req(loadConLNodes_Toc._last);
#endif

    // ...as well as prec
    for (uint i = req(); i < len(); ++i) {
      call->add_prec(in(i));
    }

    // registers
    ra_->set1(mtctr->_idx, OptoReg::Name(SR_CTR_num));

    // Insert the new nodes.
    if (loadConLNodes_Entry._large_hi) nodes->push(loadConLNodes_Entry._large_hi);
    if (loadConLNodes_Entry._last)     nodes->push(loadConLNodes_Entry._last);
#if !defined(ABI_ELFv2)
    if (loadConLNodes_Env._large_hi)   nodes->push(loadConLNodes_Env._large_hi);
    if (loadConLNodes_Env._last)       nodes->push(loadConLNodes_Env._last);
    if (loadConLNodes_Toc._large_hi)   nodes->push(loadConLNodes_Toc._large_hi);
    if (loadConLNodes_Toc._last)       nodes->push(loadConLNodes_Toc._last);
#endif
    nodes->push(mtctr);
    nodes->push(call);
  %}
%}

//----------FRAME--------------------------------------------------------------
// Definition of frame structure and management information.

frame %{
  // What direction does stack grow in (assumed to be same for native & Java).
  stack_direction(TOWARDS_LOW);

  // These two registers define part of the calling convention between
  // compiled code and the interpreter.

  // Inline Cache Register or method for I2C.
  inline_cache_reg(R19); // R19_method

  // Method Oop Register when calling interpreter.
  interpreter_method_oop_reg(R19); // R19_method

  // Optional: name the operand used by cisc-spilling to access
  // [stack_pointer + offset].
  cisc_spilling_operand_name(indOffset);

  // Number of stack slots consumed by a Monitor enter.
  sync_stack_slots((frame::jit_monitor_size / VMRegImpl::stack_slot_size));

  // Compiled code's Frame Pointer.
  frame_pointer(R1); // R1_SP

  // Interpreter stores its frame pointer in a register which is
  // stored to the stack by I2CAdaptors. I2CAdaptors convert from
  // interpreted java to compiled java.
  //
  // R14_state holds pointer to caller's cInterpreter.
  interpreter_frame_pointer(R14); // R14_state

  stack_alignment(frame::alignment_in_bytes);

  in_preserve_stack_slots((frame::jit_in_preserve_size / VMRegImpl::stack_slot_size));

  // Number of outgoing stack slots killed above the
  // out_preserve_stack_slots for calls to C. Supports the var-args
  // backing area for register parms.
  //
  varargs_C_out_slots_killed(((frame::abi_reg_args_size - frame::jit_out_preserve_size) / VMRegImpl::stack_slot_size));

  // The after-PROLOG location of the return address. Location of
  // return address specifies a type (REG or STACK) and a number
  // representing the register number (i.e. - use a register name) or
  // stack slot.
  //
  // A: Link register is stored in stack slot ...
  // M:  ... but it's in the caller's frame according to PPC-64 ABI.
  // J: Therefore, we make sure that the link register is also in R11_scratch1
  //    at the end of the prolog.
  // B: We use R20, now.
  //return_addr(REG R20);

  // G: After reading the comments made by all the luminaries on their
  //    failure to tell the compiler where the return address really is,
  //    I hardly dare to try myself.  However, I'm convinced it's in slot
  //    4 what apparently works and saves us some spills.
  return_addr(STACK 4);

  // This is the body of the function
  //
  // void Matcher::calling_convention(OptoRegPair* sig, // array of ideal regs
  //                                  uint length,      // length of array
  //                                  bool is_outgoing)
  //
  // The `sig' array is to be updated. sig[j] represents the location
  // of the j-th argument, either a register or a stack slot.

  // Comment taken from i486.ad:
  // Body of function which returns an integer array locating
  // arguments either in registers or in stack slots. Passed an array
  // of ideal registers called "sig" and a "length" count. Stack-slot
  // offsets are based on outgoing arguments, i.e. a CALLER setting up
  // arguments for a CALLEE. Incoming stack arguments are
  // automatically biased by the preserve_stack_slots field above.
  calling_convention %{
    // No difference between ingoing/outgoing. Just pass false.
    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
  %}

  // Comment taken from i486.ad:
  // Body of function which returns an integer array locating
  // arguments either in registers or in stack slots. Passed an array
  // of ideal registers called "sig" and a "length" count. Stack-slot
  // offsets are based on outgoing arguments, i.e. a CALLER setting up
  // arguments for a CALLEE. Incoming stack arguments are
  // automatically biased by the preserve_stack_slots field above.
  c_calling_convention %{
    // This is obviously always outgoing.
    // C argument in register AND stack slot.
    (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
  %}

  // Location of native (C/C++) and interpreter return values. This
  // is specified to be the same as Java. In the 32-bit VM, long
  // values are actually returned from native calls in O0:O1 and
  // returned to the interpreter in I0:I1. The copying to and from
  // the register pairs is done by the appropriate call and epilog
  // opcodes. This simplifies the register allocator.
  c_return_value %{
    assert((ideal_reg >= Op_RegI && ideal_reg <= Op_RegL) ||
            (ideal_reg == Op_RegN && Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0),
            "only return normal values");
    // enum names from opcodes.hpp:    Op_Node Op_Set Op_RegN       Op_RegI       Op_RegP       Op_RegF       Op_RegD       Op_RegL
    static int typeToRegLo[Op_RegL+1] = { 0,   0,     R3_num,   R3_num,   R3_num,   F1_num,   F1_num,   R3_num };
    static int typeToRegHi[Op_RegL+1] = { 0,   0,     OptoReg::Bad, R3_H_num, R3_H_num, OptoReg::Bad, F1_H_num, R3_H_num };
    return OptoRegPair(typeToRegHi[ideal_reg], typeToRegLo[ideal_reg]);
  %}

  // Location of compiled Java return values.  Same as C
  return_value %{
    assert((ideal_reg >= Op_RegI && ideal_reg <= Op_RegL) ||
            (ideal_reg == Op_RegN && Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0),
            "only return normal values");
    // enum names from opcodes.hpp:    Op_Node Op_Set Op_RegN       Op_RegI       Op_RegP       Op_RegF       Op_RegD       Op_RegL
    static int typeToRegLo[Op_RegL+1] = { 0,   0,     R3_num,   R3_num,   R3_num,   F1_num,   F1_num,   R3_num };
    static int typeToRegHi[Op_RegL+1] = { 0,   0,     OptoReg::Bad, R3_H_num, R3_H_num, OptoReg::Bad, F1_H_num, R3_H_num };
    return OptoRegPair(typeToRegHi[ideal_reg], typeToRegLo[ideal_reg]);
  %}
%}


//----------ATTRIBUTES---------------------------------------------------------

//----------Operand Attributes-------------------------------------------------
op_attrib op_cost(1);          // Required cost attribute.

//----------Instruction Attributes---------------------------------------------

// Cost attribute. required.
ins_attrib ins_cost(DEFAULT_COST);

// Is this instruction a non-matching short branch variant of some
// long branch? Not required.
ins_attrib ins_short_branch(0);

ins_attrib ins_is_TrapBasedCheckNode(true);

// Number of constants.
// This instruction uses the given number of constants
// (optional attribute).
// This is needed to determine in time whether the constant pool will
// exceed 4000 entries. Before postalloc_expand the overall number of constants
// is determined. It's also used to compute the constant pool size
// in Output().
ins_attrib ins_num_consts(0);

// Required alignment attribute (must be a power of 2) specifies the
// alignment that some part of the instruction (not necessarily the
// start) requires. If > 1, a compute_padding() function must be
// provided for the instruction.
ins_attrib ins_alignment(1);

// Enforce/prohibit rematerializations.
// - If an instruction is attributed with 'ins_cannot_rematerialize(true)'
//   then rematerialization of that instruction is prohibited and the
//   instruction's value will be spilled if necessary.
//   Causes that MachNode::rematerialize() returns false.
// - If an instruction is attributed with 'ins_should_rematerialize(true)'
//   then rematerialization should be enforced and a copy of the instruction
//   should be inserted if possible; rematerialization is not guaranteed.
//   Note: this may result in rematerializations in front of every use.
//   Causes that MachNode::rematerialize() can return true.
// (optional attribute)
ins_attrib ins_cannot_rematerialize(false);
ins_attrib ins_should_rematerialize(false);

// Instruction has variable size depending on alignment.
ins_attrib ins_variable_size_depending_on_alignment(false);

// Instruction is a nop.
ins_attrib ins_is_nop(false);

// Instruction is mapped to a MachIfFastLock node (instead of MachFastLock).
ins_attrib ins_use_mach_if_fast_lock_node(false);

// Field for the toc offset of a constant.
//
// This is needed if the toc offset is not encodable as an immediate in
// the PPC load instruction. If so, the upper (hi) bits of the offset are
// added to the toc, and from this a load with immediate is performed.
// With postalloc expand, we get two nodes that require the same offset
// but which don't know about each other. The offset is only known
// when the constant is added to the constant pool during emitting.
// It is generated in the 'hi'-node adding the upper bits, and saved
// in this node.  The 'lo'-node has a link to the 'hi'-node and reads
// the offset from there when it gets encoded.
ins_attrib ins_field_const_toc_offset(0);
ins_attrib ins_field_const_toc_offset_hi_node(0);

// A field that can hold the instructions offset in the code buffer.
// Set in the nodes emitter.
ins_attrib ins_field_cbuf_insts_offset(-1);

// Fields for referencing a call's load-IC-node.
// If the toc offset can not be encoded as an immediate in a load, we
// use two nodes.
ins_attrib ins_field_load_ic_hi_node(0);
ins_attrib ins_field_load_ic_node(0);

//----------OPERANDS-----------------------------------------------------------
// Operand definitions must precede instruction definitions for correct
// parsing in the ADLC because operands constitute user defined types
// which are used in instruction definitions.
//
// Formats are generated automatically for constants and base registers.

//----------Simple Operands----------------------------------------------------
// Immediate Operands

// Integer Immediate: 32-bit
operand immI() %{
  match(ConI);
  op_cost(40);
  format %{ %}
  interface(CONST_INTER);
%}

operand immI8() %{
  predicate(Assembler::is_simm(n->get_int(), 8));
  op_cost(0);
  match(ConI);
  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: 16-bit
operand immI16() %{
  predicate(Assembler::is_simm(n->get_int(), 16));
  op_cost(0);
  match(ConI);
  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: 32-bit, where lowest 16 bits are 0x0000.
operand immIhi16() %{
  predicate(((n->get_int() & 0xffff0000) != 0) && ((n->get_int() & 0xffff) == 0));
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

operand immInegpow2() %{
  predicate(is_power_of_2_long((jlong) (julong) (juint) (-(n->get_int()))));
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

operand immIpow2minus1() %{
  predicate(is_power_of_2_long((((jlong) (n->get_int()))+1)));
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

operand immIpowerOf2() %{
  predicate(is_power_of_2_long((((jlong) (julong) (juint) (n->get_int())))));
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Unsigned Integer Immediate: the values 0-31
operand uimmI5() %{
  predicate(Assembler::is_uimm(n->get_int(), 5));
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Unsigned Integer Immediate: 6-bit
operand uimmI6() %{
  predicate(Assembler::is_uimm(n->get_int(), 6));
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Unsigned Integer Immediate:  6-bit int, greater than 32
operand uimmI6_ge32() %{
  predicate(Assembler::is_uimm(n->get_int(), 6) && n->get_int() >= 32);
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Unsigned Integer Immediate: 15-bit
operand uimmI15() %{
  predicate(Assembler::is_uimm(n->get_int(), 15));
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Unsigned Integer Immediate: 16-bit
operand uimmI16() %{
  predicate(Assembler::is_uimm(n->get_int(), 16));
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// constant 'int 0'.
operand immI_0() %{
  predicate(n->get_int() == 0);
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// constant 'int 1'.
operand immI_1() %{
  predicate(n->get_int() == 1);
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// constant 'int -1'.
operand immI_minus1() %{
  predicate(n->get_int() == -1);
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// int value 16.
operand immI_16() %{
  predicate(n->get_int() == 16);
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// int value 24.
operand immI_24() %{
  predicate(n->get_int() == 24);
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Compressed oops constants
// Pointer Immediate
operand immN() %{
  match(ConN);

  op_cost(10);
  format %{ %}
  interface(CONST_INTER);
%}

// NULL Pointer Immediate
operand immN_0() %{
  predicate(n->get_narrowcon() == 0);
  match(ConN);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Compressed klass constants
operand immNKlass() %{
  match(ConNKlass);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// This operand can be used to avoid matching of an instruct
// with chain rule.
operand immNKlass_NM() %{
  match(ConNKlass);
  predicate(false);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Pointer Immediate: 64-bit
operand immP() %{
  match(ConP);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Operand to avoid match of loadConP.
// This operand can be used to avoid matching of an instruct
// with chain rule.
operand immP_NM() %{
  match(ConP);
  predicate(false);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// costant 'pointer 0'.
operand immP_0() %{
  predicate(n->get_ptr() == 0);
  match(ConP);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// pointer 0x0 or 0x1
operand immP_0or1() %{
  predicate((n->get_ptr() == 0) || (n->get_ptr() == 1));
  match(ConP);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

operand immL() %{
  match(ConL);
  op_cost(40);
  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate: 16-bit
operand immL16() %{
  predicate(Assembler::is_simm(n->get_long(), 16));
  match(ConL);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate: 16-bit, 4-aligned
operand immL16Alg4() %{
  predicate(Assembler::is_simm(n->get_long(), 16) && ((n->get_long() & 0x3) == 0));
  match(ConL);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate: 32-bit, where lowest 16 bits are 0x0000.
operand immL32hi16() %{
  predicate(Assembler::is_simm(n->get_long(), 32) && ((n->get_long() & 0xffffL) == 0L));
  match(ConL);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate: 32-bit
operand immL32() %{
  predicate(Assembler::is_simm(n->get_long(), 32));
  match(ConL);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate: 64-bit, where highest 16 bits are not 0x0000.
operand immLhighest16() %{
  predicate((n->get_long() & 0xffff000000000000L) != 0L && (n->get_long() & 0x0000ffffffffffffL) == 0L);
  match(ConL);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

operand immLnegpow2() %{
  predicate(is_power_of_2_long((jlong)-(n->get_long())));
  match(ConL);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

operand immLpow2minus1() %{
  predicate(is_power_of_2_long((((jlong) (n->get_long()))+1)) &&
            (n->get_long() != (jlong)0xffffffffffffffffL));
  match(ConL);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// constant 'long 0'.
operand immL_0() %{
  predicate(n->get_long() == 0L);
  match(ConL);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// constat ' long -1'.
operand immL_minus1() %{
  predicate(n->get_long() == -1L);
  match(ConL);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate: low 32-bit mask
operand immL_32bits() %{
  predicate(n->get_long() == 0xFFFFFFFFL);
  match(ConL);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Unsigned Long Immediate: 16-bit
operand uimmL16() %{
  predicate(Assembler::is_uimm(n->get_long(), 16));
  match(ConL);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Float Immediate
operand immF() %{
  match(ConF);
  op_cost(40);
  format %{ %}
  interface(CONST_INTER);
%}

// constant 'float +0.0'.
operand immF_0() %{
  predicate((n->getf() == 0) &&
            (fpclassify(n->getf()) == FP_ZERO) && (signbit(n->getf()) == 0));
  match(ConF);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Double Immediate
operand immD() %{
  match(ConD);
  op_cost(40);
  format %{ %}
  interface(CONST_INTER);
%}

// Integer Register Operands
// Integer Destination Register
// See definition of reg_class bits32_reg_rw.
operand iRegIdst() %{
  constraint(ALLOC_IN_RC(bits32_reg_rw));
  match(RegI);
  match(rscratch1RegI);
  match(rscratch2RegI);
  match(rarg1RegI);
  match(rarg2RegI);
  match(rarg3RegI);
  match(rarg4RegI);
  format %{ %}
  interface(REG_INTER);
%}

// Integer Source Register
// See definition of reg_class bits32_reg_ro.
operand iRegIsrc() %{
  constraint(ALLOC_IN_RC(bits32_reg_ro));
  match(RegI);
  match(rscratch1RegI);
  match(rscratch2RegI);
  match(rarg1RegI);
  match(rarg2RegI);
  match(rarg3RegI);
  match(rarg4RegI);
  format %{ %}
  interface(REG_INTER);
%}

operand rscratch1RegI() %{
  constraint(ALLOC_IN_RC(rscratch1_bits32_reg));
  match(iRegIdst);
  format %{ %}
  interface(REG_INTER);
%}

operand rscratch2RegI() %{
  constraint(ALLOC_IN_RC(rscratch2_bits32_reg));
  match(iRegIdst);
  format %{ %}
  interface(REG_INTER);
%}

operand rarg1RegI() %{
  constraint(ALLOC_IN_RC(rarg1_bits32_reg));
  match(iRegIdst);
  format %{ %}
  interface(REG_INTER);
%}

operand rarg2RegI() %{
  constraint(ALLOC_IN_RC(rarg2_bits32_reg));
  match(iRegIdst);
  format %{ %}
  interface(REG_INTER);
%}

operand rarg3RegI() %{
  constraint(ALLOC_IN_RC(rarg3_bits32_reg));
  match(iRegIdst);
  format %{ %}
  interface(REG_INTER);
%}

operand rarg4RegI() %{
  constraint(ALLOC_IN_RC(rarg4_bits32_reg));
  match(iRegIdst);
  format %{ %}
  interface(REG_INTER);
%}

operand rarg1RegL() %{
  constraint(ALLOC_IN_RC(rarg1_bits64_reg));
  match(iRegLdst);
  format %{ %}
  interface(REG_INTER);
%}

operand rarg2RegL() %{
  constraint(ALLOC_IN_RC(rarg2_bits64_reg));
  match(iRegLdst);
  format %{ %}
  interface(REG_INTER);
%}

operand rarg3RegL() %{
  constraint(ALLOC_IN_RC(rarg3_bits64_reg));
  match(iRegLdst);
  format %{ %}
  interface(REG_INTER);
%}

operand rarg4RegL() %{
  constraint(ALLOC_IN_RC(rarg4_bits64_reg));
  match(iRegLdst);
  format %{ %}
  interface(REG_INTER);
%}

// Pointer Destination Register
// See definition of reg_class bits64_reg_rw.
operand iRegPdst() %{
  constraint(ALLOC_IN_RC(bits64_reg_rw));
  match(RegP);
  match(rscratch1RegP);
  match(rscratch2RegP);
  match(rarg1RegP);
  match(rarg2RegP);
  match(rarg3RegP);
  match(rarg4RegP);
  format %{ %}
  interface(REG_INTER);
%}

// Pointer Destination Register
// Operand not using r11 and r12 (killed in epilog).
operand iRegPdstNoScratch() %{
  constraint(ALLOC_IN_RC(bits64_reg_leaf_call));
  match(RegP);
  match(rarg1RegP);
  match(rarg2RegP);
  match(rarg3RegP);
  match(rarg4RegP);
  format %{ %}
  interface(REG_INTER);
%}

// Pointer Source Register
// See definition of reg_class bits64_reg_ro.
operand iRegPsrc() %{
  constraint(ALLOC_IN_RC(bits64_reg_ro));
  match(RegP);
  match(iRegPdst);
  match(rscratch1RegP);
  match(rscratch2RegP);
  match(rarg1RegP);
  match(rarg2RegP);
  match(rarg3RegP);
  match(rarg4RegP);
  match(threadRegP);
  format %{ %}
  interface(REG_INTER);
%}

// Thread operand.
operand threadRegP() %{
  constraint(ALLOC_IN_RC(thread_bits64_reg));
  match(iRegPdst);
  format %{ "R16" %}
  interface(REG_INTER);
%}

operand rscratch1RegP() %{
  constraint(ALLOC_IN_RC(rscratch1_bits64_reg));
  match(iRegPdst);
  format %{ "R11" %}
  interface(REG_INTER);
%}

operand rscratch2RegP() %{
  constraint(ALLOC_IN_RC(rscratch2_bits64_reg));
  match(iRegPdst);
  format %{ %}
  interface(REG_INTER);
%}

operand rarg1RegP() %{
  constraint(ALLOC_IN_RC(rarg1_bits64_reg));
  match(iRegPdst);
  format %{ %}
  interface(REG_INTER);
%}

operand rarg2RegP() %{
  constraint(ALLOC_IN_RC(rarg2_bits64_reg));
  match(iRegPdst);
  format %{ %}
  interface(REG_INTER);
%}

operand rarg3RegP() %{
  constraint(ALLOC_IN_RC(rarg3_bits64_reg));
  match(iRegPdst);
  format %{ %}
  interface(REG_INTER);
%}

operand rarg4RegP() %{
  constraint(ALLOC_IN_RC(rarg4_bits64_reg));
  match(iRegPdst);
  format %{ %}
  interface(REG_INTER);
%}

operand iRegNsrc() %{
  constraint(ALLOC_IN_RC(bits32_reg_ro));
  match(RegN);
  match(iRegNdst);

  format %{ %}
  interface(REG_INTER);
%}

operand iRegNdst() %{
  constraint(ALLOC_IN_RC(bits32_reg_rw));
  match(RegN);

  format %{ %}
  interface(REG_INTER);
%}

// Long Destination Register
// See definition of reg_class bits64_reg_rw.
operand iRegLdst() %{
  constraint(ALLOC_IN_RC(bits64_reg_rw));
  match(RegL);
  match(rscratch1RegL);
  match(rscratch2RegL);
  format %{ %}
  interface(REG_INTER);
%}

// Long Source Register
// See definition of reg_class bits64_reg_ro.
operand iRegLsrc() %{
  constraint(ALLOC_IN_RC(bits64_reg_ro));
  match(RegL);
  match(iRegLdst);
  match(rscratch1RegL);
  match(rscratch2RegL);
  format %{ %}
  interface(REG_INTER);
%}

// Special operand for ConvL2I.
operand iRegL2Isrc(iRegLsrc reg) %{
  constraint(ALLOC_IN_RC(bits64_reg_ro));
  match(ConvL2I reg);
  format %{ "ConvL2I($reg)" %}
  interface(REG_INTER)
%}

operand rscratch1RegL() %{
  constraint(ALLOC_IN_RC(rscratch1_bits64_reg));
  match(RegL);
  format %{ %}
  interface(REG_INTER);
%}

operand rscratch2RegL() %{
  constraint(ALLOC_IN_RC(rscratch2_bits64_reg));
  match(RegL);
  format %{ %}
  interface(REG_INTER);
%}

// Condition Code Flag Registers
operand flagsReg() %{
  constraint(ALLOC_IN_RC(int_flags));
  match(RegFlags);
  format %{ %}
  interface(REG_INTER);
%}

// Condition Code Flag Register CR0
operand flagsRegCR0() %{
  constraint(ALLOC_IN_RC(int_flags_CR0));
  match(RegFlags);
  format %{ "CR0" %}
  interface(REG_INTER);
%}

operand flagsRegCR1() %{
  constraint(ALLOC_IN_RC(int_flags_CR1));
  match(RegFlags);
  format %{ "CR1" %}
  interface(REG_INTER);
%}

operand flagsRegCR6() %{
  constraint(ALLOC_IN_RC(int_flags_CR6));
  match(RegFlags);
  format %{ "CR6" %}
  interface(REG_INTER);
%}

operand regCTR() %{
  constraint(ALLOC_IN_RC(ctr_reg));
  // RegFlags should work. Introducing a RegSpecial type would cause a
  // lot of changes.
  match(RegFlags);
  format %{"SR_CTR" %}
  interface(REG_INTER);
%}

operand regD() %{
  constraint(ALLOC_IN_RC(dbl_reg));
  match(RegD);
  format %{ %}
  interface(REG_INTER);
%}

operand regF() %{
  constraint(ALLOC_IN_RC(flt_reg));
  match(RegF);
  format %{ %}
  interface(REG_INTER);
%}

// Special Registers

// Method Register
operand inline_cache_regP(iRegPdst reg) %{
  constraint(ALLOC_IN_RC(r19_bits64_reg)); // inline_cache_reg
  match(reg);
  format %{ %}
  interface(REG_INTER);
%}

operand compiler_method_oop_regP(iRegPdst reg) %{
  constraint(ALLOC_IN_RC(rscratch1_bits64_reg)); // compiler_method_oop_reg
  match(reg);
  format %{ %}
  interface(REG_INTER);
%}

operand interpreter_method_oop_regP(iRegPdst reg) %{
  constraint(ALLOC_IN_RC(r19_bits64_reg)); // interpreter_method_oop_reg
  match(reg);
  format %{ %}
  interface(REG_INTER);
%}

// Operands to remove register moves in unscaled mode.
// Match read/write registers with an EncodeP node if neither shift nor add are required.
operand iRegP2N(iRegPsrc reg) %{
  predicate(false /* TODO: PPC port MatchDecodeNodes*/&& Universe::narrow_oop_shift() == 0);
  constraint(ALLOC_IN_RC(bits64_reg_ro));
  match(EncodeP reg);
  format %{ "$reg" %}
  interface(REG_INTER)
%}

operand iRegN2P(iRegNsrc reg) %{
  predicate(false /* TODO: PPC port MatchDecodeNodes*/);
  constraint(ALLOC_IN_RC(bits32_reg_ro));
  match(DecodeN reg);
  match(DecodeNKlass reg);
  format %{ "$reg" %}
  interface(REG_INTER)
%}

//----------Complex Operands---------------------------------------------------
// Indirect Memory Reference
operand indirect(iRegPsrc reg) %{
  constraint(ALLOC_IN_RC(bits64_reg_ro));
  match(reg);
  op_cost(100);
  format %{ "[$reg]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index(0x0);
    scale(0x0);
    disp(0x0);
  %}
%}

// Indirect with Offset
operand indOffset16(iRegPsrc reg, immL16 offset) %{
  constraint(ALLOC_IN_RC(bits64_reg_ro));
  match(AddP reg offset);
  op_cost(100);
  format %{ "[$reg + $offset]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index(0x0);
    scale(0x0);
    disp($offset);
  %}
%}

// Indirect with 4-aligned Offset
operand indOffset16Alg4(iRegPsrc reg, immL16Alg4 offset) %{
  constraint(ALLOC_IN_RC(bits64_reg_ro));
  match(AddP reg offset);
  op_cost(100);
  format %{ "[$reg + $offset]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index(0x0);
    scale(0x0);
    disp($offset);
  %}
%}

//----------Complex Operands for Compressed OOPs-------------------------------
// Compressed OOPs with narrow_oop_shift == 0.

// Indirect Memory Reference, compressed OOP
operand indirectNarrow(iRegNsrc reg) %{
  predicate(false /* TODO: PPC port MatchDecodeNodes*/);
  constraint(ALLOC_IN_RC(bits64_reg_ro));
  match(DecodeN reg);
  match(DecodeNKlass reg);
  op_cost(100);
  format %{ "[$reg]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index(0x0);
    scale(0x0);
    disp(0x0);
  %}
%}

// Indirect with Offset, compressed OOP
operand indOffset16Narrow(iRegNsrc reg, immL16 offset) %{
  predicate(false /* TODO: PPC port MatchDecodeNodes*/);
  constraint(ALLOC_IN_RC(bits64_reg_ro));
  match(AddP (DecodeN reg) offset);
  match(AddP (DecodeNKlass reg) offset);
  op_cost(100);
  format %{ "[$reg + $offset]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index(0x0);
    scale(0x0);
    disp($offset);
  %}
%}

// Indirect with 4-aligned Offset, compressed OOP
operand indOffset16NarrowAlg4(iRegNsrc reg, immL16Alg4 offset) %{
  predicate(false /* TODO: PPC port MatchDecodeNodes*/);
  constraint(ALLOC_IN_RC(bits64_reg_ro));
  match(AddP (DecodeN reg) offset);
  match(AddP (DecodeNKlass reg) offset);
  op_cost(100);
  format %{ "[$reg + $offset]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index(0x0);
    scale(0x0);
    disp($offset);
  %}
%}

//----------Special Memory Operands--------------------------------------------
// Stack Slot Operand
//
// This operand is used for loading and storing temporary values on
// the stack where a match requires a value to flow through memory.
operand stackSlotI(sRegI reg) %{
  constraint(ALLOC_IN_RC(stack_slots));
  op_cost(100);
  //match(RegI);
  format %{ "[sp+$reg]" %}
  interface(MEMORY_INTER) %{
    base(0x1);   // R1_SP
    index(0x0);
    scale(0x0);
    disp($reg);  // Stack Offset
  %}
%}

operand stackSlotL(sRegL reg) %{
  constraint(ALLOC_IN_RC(stack_slots));
  op_cost(100);
  //match(RegL);
  format %{ "[sp+$reg]" %}
  interface(MEMORY_INTER) %{
    base(0x1);   // R1_SP
    index(0x0);
    scale(0x0);
    disp($reg);  // Stack Offset
  %}
%}

operand stackSlotP(sRegP reg) %{
  constraint(ALLOC_IN_RC(stack_slots));
  op_cost(100);
  //match(RegP);
  format %{ "[sp+$reg]" %}
  interface(MEMORY_INTER) %{
    base(0x1);   // R1_SP
    index(0x0);
    scale(0x0);
    disp($reg);  // Stack Offset
  %}
%}

operand stackSlotF(sRegF reg) %{
  constraint(ALLOC_IN_RC(stack_slots));
  op_cost(100);
  //match(RegF);
  format %{ "[sp+$reg]" %}
  interface(MEMORY_INTER) %{
    base(0x1);   // R1_SP
    index(0x0);
    scale(0x0);
    disp($reg);  // Stack Offset
  %}
%}

operand stackSlotD(sRegD reg) %{
  constraint(ALLOC_IN_RC(stack_slots));
  op_cost(100);
  //match(RegD);
  format %{ "[sp+$reg]" %}
  interface(MEMORY_INTER) %{
    base(0x1);   // R1_SP
    index(0x0);
    scale(0x0);
    disp($reg);  // Stack Offset
  %}
%}

// Operands for expressing Control Flow
// NOTE: Label is a predefined operand which should not be redefined in
//       the AD file. It is generically handled within the ADLC.

//----------Conditional Branch Operands----------------------------------------
// Comparison Op
//
// This is the operation of the comparison, and is limited to the
// following set of codes: L (<), LE (<=), G (>), GE (>=), E (==), NE
// (!=).
//
// Other attributes of the comparison, such as unsignedness, are specified
// by the comparison instruction that sets a condition code flags register.
// That result is represented by a flags operand whose subtype is appropriate
// to the unsignedness (etc.) of the comparison.
//
// Later, the instruction which matches both the Comparison Op (a Bool) and
// the flags (produced by the Cmp) specifies the coding of the comparison op
// by matching a specific subtype of Bool operand below.

// When used for floating point comparisons: unordered same as less.
operand cmpOp() %{
  match(Bool);
  format %{ "" %}
  interface(COND_INTER) %{
                           // BO only encodes bit 4 of bcondCRbiIsX, as bits 1-3 are always '100'.
                           //           BO          &  BI
    equal(0xA);            // 10 10:   bcondCRbiIs1 & Condition::equal
    not_equal(0x2);        // 00 10:   bcondCRbiIs0 & Condition::equal
    less(0x8);             // 10 00:   bcondCRbiIs1 & Condition::less
    greater_equal(0x0);    // 00 00:   bcondCRbiIs0 & Condition::less
    less_equal(0x1);       // 00 01:   bcondCRbiIs0 & Condition::greater
    greater(0x9);          // 10 01:   bcondCRbiIs1 & Condition::greater
    overflow(0xB);         // 10 11:   bcondCRbiIs1 & Condition::summary_overflow
    no_overflow(0x3);      // 00 11:   bcondCRbiIs0 & Condition::summary_overflow
  %}
%}

//----------OPERAND CLASSES----------------------------------------------------
// Operand Classes are groups of operands that are used to simplify
// instruction definitions by not requiring the AD writer to specify
// seperate instructions for every form of operand when the
// instruction accepts multiple operand types with the same basic
// encoding and format. The classic case of this is memory operands.
// Indirect is not included since its use is limited to Compare & Swap.

opclass memory(indirect, indOffset16 /*, indIndex, tlsReference*/, indirectNarrow, indOffset16Narrow);
// Memory operand where offsets are 4-aligned. Required for ld, std.
opclass memoryAlg4(indirect, indOffset16Alg4, indirectNarrow, indOffset16NarrowAlg4);
opclass indirectMemory(indirect, indirectNarrow);

// Special opclass for I and ConvL2I.
opclass iRegIsrc_iRegL2Isrc(iRegIsrc, iRegL2Isrc);

// Operand classes to match encode and decode. iRegN_P2N is only used
// for storeN. I have never seen an encode node elsewhere.
opclass iRegN_P2N(iRegNsrc, iRegP2N);
opclass iRegP_N2P(iRegPsrc, iRegN2P);

//----------PIPELINE-----------------------------------------------------------

pipeline %{

// See J.M.Tendler et al. "Power4 system microarchitecture", IBM
// J. Res. & Dev., No. 1, Jan. 2002.

//----------ATTRIBUTES---------------------------------------------------------
attributes %{

  // Power4 instructions are of fixed length.
  fixed_size_instructions;

  // TODO: if `bundle' means number of instructions fetched
  // per cycle, this is 8. If `bundle' means Power4 `group', that is
  // max instructions issued per cycle, this is 5.
  max_instructions_per_bundle = 8;

  // A Power4 instruction is 4 bytes long.
  instruction_unit_size = 4;

  // The Power4 processor fetches 64 bytes...
  instruction_fetch_unit_size = 64;

  // ...in one line
  instruction_fetch_units = 1

  // Unused, list one so that array generated by adlc is not empty.
  // Aix compiler chokes if _nop_count = 0.
  nops(fxNop);
%}

//----------RESOURCES----------------------------------------------------------
// Resources are the functional units available to the machine
resources(
   PPC_BR,         // branch unit
   PPC_CR,         // condition unit
   PPC_FX1,        // integer arithmetic unit 1
   PPC_FX2,        // integer arithmetic unit 2
   PPC_LDST1,      // load/store unit 1
   PPC_LDST2,      // load/store unit 2
   PPC_FP1,        // float arithmetic unit 1
   PPC_FP2,        // float arithmetic unit 2
   PPC_LDST = PPC_LDST1 | PPC_LDST2,
   PPC_FX = PPC_FX1 | PPC_FX2,
   PPC_FP = PPC_FP1 | PPC_FP2
 );

//----------PIPELINE DESCRIPTION-----------------------------------------------
// Pipeline Description specifies the stages in the machine's pipeline
pipe_desc(
   // Power4 longest pipeline path
   PPC_IF,   // instruction fetch
   PPC_IC,
   //PPC_BP, // branch prediction
   PPC_D0,   // decode
   PPC_D1,   // decode
   PPC_D2,   // decode
   PPC_D3,   // decode
   PPC_Xfer1,
   PPC_GD,   // group definition
   PPC_MP,   // map
   PPC_ISS,  // issue
   PPC_RF,   // resource fetch
   PPC_EX1,  // execute (all units)
   PPC_EX2,  // execute (FP, LDST)
   PPC_EX3,  // execute (FP, LDST)
   PPC_EX4,  // execute (FP)
   PPC_EX5,  // execute (FP)
   PPC_EX6,  // execute (FP)
   PPC_WB,   // write back
   PPC_Xfer2,
   PPC_CP
 );

//----------PIPELINE CLASSES---------------------------------------------------
// Pipeline Classes describe the stages in which input and output are
// referenced by the hardware pipeline.

// Simple pipeline classes.

// Default pipeline class.
pipe_class pipe_class_default() %{
  single_instruction;
  fixed_latency(2);
%}

// Pipeline class for empty instructions.
pipe_class pipe_class_empty() %{
  single_instruction;
  fixed_latency(0);
%}

// Pipeline class for compares.
pipe_class pipe_class_compare() %{
  single_instruction;
  fixed_latency(16);
%}

// Pipeline class for traps.
pipe_class pipe_class_trap() %{
  single_instruction;
  fixed_latency(100);
%}

// Pipeline class for memory operations.
pipe_class pipe_class_memory() %{
  single_instruction;
  fixed_latency(16);
%}

// Pipeline class for call.
pipe_class pipe_class_call() %{
  single_instruction;
  fixed_latency(100);
%}

// Define the class for the Nop node.
define %{
   MachNop = pipe_class_default;
%}

%}

//----------INSTRUCTIONS-------------------------------------------------------

// Naming of instructions:
//   opA_operB / opA_operB_operC:
//     Operation 'op' with one or two source operands 'oper'. Result
//     type is A, source operand types are B and C.
//     Iff A == B == C, B and C are left out.
//
// The instructions are ordered according to the following scheme:
//  - loads
//  - load constants
//  - prefetch
//  - store
//  - encode/decode
//  - membar
//  - conditional moves
//  - compare & swap
//  - arithmetic and logic operations
//    * int: Add, Sub, Mul, Div, Mod
//    * int: lShift, arShift, urShift, rot
//    * float: Add, Sub, Mul, Div
//    * and, or, xor ...
//  - register moves: float <-> int, reg <-> stack, repl
//  - cast (high level type cast, XtoP, castPP, castII, not_null etc.
//  - conv (low level type cast requiring bit changes (sign extend etc)
//  - compares, range & zero checks.
//  - branches
//  - complex operations, intrinsics, min, max, replicate
//  - lock
//  - Calls
//
// If there are similar instructions with different types they are sorted:
// int before float
// small before big
// signed before unsigned
// e.g., loadS before loadUS before loadI before loadF.


//----------Load/Store Instructions--------------------------------------------

//----------Load Instructions--------------------------------------------------

// Converts byte to int.
// As convB2I_reg, but without match rule.  The match rule of convB2I_reg
// reuses the 'amount' operand, but adlc expects that operand specification
// and operands in match rule are equivalent.
instruct convB2I_reg_2(iRegIdst dst, iRegIsrc src) %{
  effect(DEF dst, USE src);
  format %{ "EXTSB   $dst, $src \t// byte->int" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_extsb);
    __ extsb($dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

instruct loadUB_indirect(iRegIdst dst, indirectMemory mem) %{
  // match-rule, false predicate
  match(Set dst (LoadB mem));
  predicate(false);

  format %{ "LBZ     $dst, $mem" %}
  size(4);
  ins_encode( enc_lbz(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

instruct loadUB_indirect_ac(iRegIdst dst, indirectMemory mem) %{
  // match-rule, false predicate
  match(Set dst (LoadB mem));
  predicate(false);

  format %{ "LBZ     $dst, $mem\n\t"
            "TWI     $dst\n\t"
            "ISYNC" %}
  size(12);
  ins_encode( enc_lbz_ac(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Load Byte (8bit signed). LoadB = LoadUB + ConvUB2B.
instruct loadB_indirect_Ex(iRegIdst dst, indirectMemory mem) %{
  match(Set dst (LoadB mem));
  predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
  ins_cost(MEMORY_REF_COST + DEFAULT_COST);
  expand %{
    iRegIdst tmp;
    loadUB_indirect(tmp, mem);
    convB2I_reg_2(dst, tmp);
  %}
%}

instruct loadB_indirect_ac_Ex(iRegIdst dst, indirectMemory mem) %{
  match(Set dst (LoadB mem));
  ins_cost(3*MEMORY_REF_COST + DEFAULT_COST);
  expand %{
    iRegIdst tmp;
    loadUB_indirect_ac(tmp, mem);
    convB2I_reg_2(dst, tmp);
  %}
%}

instruct loadUB_indOffset16(iRegIdst dst, indOffset16 mem) %{
  // match-rule, false predicate
  match(Set dst (LoadB mem));
  predicate(false);

  format %{ "LBZ     $dst, $mem" %}
  size(4);
  ins_encode( enc_lbz(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

instruct loadUB_indOffset16_ac(iRegIdst dst, indOffset16 mem) %{
  // match-rule, false predicate
  match(Set dst (LoadB mem));
  predicate(false);

  format %{ "LBZ     $dst, $mem\n\t"
            "TWI     $dst\n\t"
            "ISYNC" %}
  size(12);
  ins_encode( enc_lbz_ac(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Load Byte (8bit signed). LoadB = LoadUB + ConvUB2B.
instruct loadB_indOffset16_Ex(iRegIdst dst, indOffset16 mem) %{
  match(Set dst (LoadB mem));
  predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
  ins_cost(MEMORY_REF_COST + DEFAULT_COST);

  expand %{
    iRegIdst tmp;
    loadUB_indOffset16(tmp, mem);
    convB2I_reg_2(dst, tmp);
  %}
%}

instruct loadB_indOffset16_ac_Ex(iRegIdst dst, indOffset16 mem) %{
  match(Set dst (LoadB mem));
  ins_cost(3*MEMORY_REF_COST + DEFAULT_COST);

  expand %{
    iRegIdst tmp;
    loadUB_indOffset16_ac(tmp, mem);
    convB2I_reg_2(dst, tmp);
  %}
%}

// Load Unsigned Byte (8bit UNsigned) into an int reg.
instruct loadUB(iRegIdst dst, memory mem) %{
  predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
  match(Set dst (LoadUB mem));
  ins_cost(MEMORY_REF_COST);

  format %{ "LBZ     $dst, $mem \t// byte, zero-extend to int" %}
  size(4);
  ins_encode( enc_lbz(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Load  Unsigned Byte (8bit UNsigned) acquire.
instruct loadUB_ac(iRegIdst dst, memory mem) %{
  match(Set dst (LoadUB mem));
  ins_cost(3*MEMORY_REF_COST);

  format %{ "LBZ     $dst, $mem \t// byte, zero-extend to int, acquire\n\t"
            "TWI     $dst\n\t"
            "ISYNC" %}
  size(12);
  ins_encode( enc_lbz_ac(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Load Unsigned Byte (8bit UNsigned) into a Long Register.
instruct loadUB2L(iRegLdst dst, memory mem) %{
  match(Set dst (ConvI2L (LoadUB mem)));
  predicate(_kids[0]->_leaf->as_Load()->is_unordered() || followed_by_acquire(_kids[0]->_leaf));
  ins_cost(MEMORY_REF_COST);

  format %{ "LBZ     $dst, $mem \t// byte, zero-extend to long" %}
  size(4);
  ins_encode( enc_lbz(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

instruct loadUB2L_ac(iRegLdst dst, memory mem) %{
  match(Set dst (ConvI2L (LoadUB mem)));
  ins_cost(3*MEMORY_REF_COST);

  format %{ "LBZ     $dst, $mem \t// byte, zero-extend to long, acquire\n\t"
            "TWI     $dst\n\t"
            "ISYNC" %}
  size(12);
  ins_encode( enc_lbz_ac(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Load Short (16bit signed)
instruct loadS(iRegIdst dst, memory mem) %{
  match(Set dst (LoadS mem));
  predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
  ins_cost(MEMORY_REF_COST);

  format %{ "LHA     $dst, $mem" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_lha);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ lha($dst$$Register, Idisp, $mem$$base$$Register);
  %}
  ins_pipe(pipe_class_memory);
%}

// Load Short (16bit signed) acquire.
instruct loadS_ac(iRegIdst dst, memory mem) %{
  match(Set dst (LoadS mem));
  ins_cost(3*MEMORY_REF_COST);

  format %{ "LHA     $dst, $mem\t acquire\n\t"
            "TWI     $dst\n\t"
            "ISYNC" %}
  size(12);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ lha($dst$$Register, Idisp, $mem$$base$$Register);
    __ twi_0($dst$$Register);
    __ isync();
  %}
  ins_pipe(pipe_class_memory);
%}

// Load Char (16bit unsigned)
instruct loadUS(iRegIdst dst, memory mem) %{
  match(Set dst (LoadUS mem));
  predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
  ins_cost(MEMORY_REF_COST);

  format %{ "LHZ     $dst, $mem" %}
  size(4);
  ins_encode( enc_lhz(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Load Char (16bit unsigned) acquire.
instruct loadUS_ac(iRegIdst dst, memory mem) %{
  match(Set dst (LoadUS mem));
  ins_cost(3*MEMORY_REF_COST);

  format %{ "LHZ     $dst, $mem \t// acquire\n\t"
            "TWI     $dst\n\t"
            "ISYNC" %}
  size(12);
  ins_encode( enc_lhz_ac(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Load Unsigned Short/Char (16bit UNsigned) into a Long Register.
instruct loadUS2L(iRegLdst dst, memory mem) %{
  match(Set dst (ConvI2L (LoadUS mem)));
  predicate(_kids[0]->_leaf->as_Load()->is_unordered() || followed_by_acquire(_kids[0]->_leaf));
  ins_cost(MEMORY_REF_COST);

  format %{ "LHZ     $dst, $mem \t// short, zero-extend to long" %}
  size(4);
  ins_encode( enc_lhz(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Load Unsigned Short/Char (16bit UNsigned) into a Long Register acquire.
instruct loadUS2L_ac(iRegLdst dst, memory mem) %{
  match(Set dst (ConvI2L (LoadUS mem)));
  ins_cost(3*MEMORY_REF_COST);

  format %{ "LHZ     $dst, $mem \t// short, zero-extend to long, acquire\n\t"
            "TWI     $dst\n\t"
            "ISYNC" %}
  size(12);
  ins_encode( enc_lhz_ac(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Load Integer.
instruct loadI(iRegIdst dst, memory mem) %{
  match(Set dst (LoadI mem));
  predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
  ins_cost(MEMORY_REF_COST);

  format %{ "LWZ     $dst, $mem" %}
  size(4);
  ins_encode( enc_lwz(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Load Integer acquire.
instruct loadI_ac(iRegIdst dst, memory mem) %{
  match(Set dst (LoadI mem));
  ins_cost(3*MEMORY_REF_COST);

  format %{ "LWZ     $dst, $mem \t// load acquire\n\t"
            "TWI     $dst\n\t"
            "ISYNC" %}
  size(12);
  ins_encode( enc_lwz_ac(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Match loading integer and casting it to unsigned int in
// long register.
// LoadI + ConvI2L + AndL 0xffffffff.
instruct loadUI2L(iRegLdst dst, memory mem, immL_32bits mask) %{
  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
  predicate(_kids[0]->_kids[0]->_leaf->as_Load()->is_unordered());
  ins_cost(MEMORY_REF_COST);

  format %{ "LWZ     $dst, $mem \t// zero-extend to long" %}
  size(4);
  ins_encode( enc_lwz(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Match loading integer and casting it to long.
instruct loadI2L(iRegLdst dst, memory mem) %{
  match(Set dst (ConvI2L (LoadI mem)));
  predicate(_kids[0]->_leaf->as_Load()->is_unordered());
  ins_cost(MEMORY_REF_COST);

  format %{ "LWA     $dst, $mem \t// loadI2L" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_lwa);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ lwa($dst$$Register, Idisp, $mem$$base$$Register);
  %}
  ins_pipe(pipe_class_memory);
%}

// Match loading integer and casting it to long - acquire.
instruct loadI2L_ac(iRegLdst dst, memory mem) %{
  match(Set dst (ConvI2L (LoadI mem)));
  ins_cost(3*MEMORY_REF_COST);

  format %{ "LWA     $dst, $mem \t// loadI2L acquire"
            "TWI     $dst\n\t"
            "ISYNC" %}
  size(12);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_lwa);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ lwa($dst$$Register, Idisp, $mem$$base$$Register);
    __ twi_0($dst$$Register);
    __ isync();
  %}
  ins_pipe(pipe_class_memory);
%}

// Load Long - aligned
instruct loadL(iRegLdst dst, memoryAlg4 mem) %{
  match(Set dst (LoadL mem));
  predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
  ins_cost(MEMORY_REF_COST);

  format %{ "LD      $dst, $mem \t// long" %}
  size(4);
  ins_encode( enc_ld(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Load Long - aligned acquire.
instruct loadL_ac(iRegLdst dst, memoryAlg4 mem) %{
  match(Set dst (LoadL mem));
  ins_cost(3*MEMORY_REF_COST);

  format %{ "LD      $dst, $mem \t// long acquire\n\t"
            "TWI     $dst\n\t"
            "ISYNC" %}
  size(12);
  ins_encode( enc_ld_ac(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Load Long - UNaligned
instruct loadL_unaligned(iRegLdst dst, memoryAlg4 mem) %{
  match(Set dst (LoadL_unaligned mem));
  // predicate(...) // Unaligned_ac is not needed (and wouldn't make sense).
  ins_cost(MEMORY_REF_COST);

  format %{ "LD      $dst, $mem \t// unaligned long" %}
  size(4);
  ins_encode( enc_ld(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Load nodes for superwords

// Load Aligned Packed Byte
instruct loadV8(iRegLdst dst, memoryAlg4 mem) %{
  predicate(n->as_LoadVector()->memory_size() == 8);
  match(Set dst (LoadVector mem));
  ins_cost(MEMORY_REF_COST);

  format %{ "LD      $dst, $mem \t// load 8-byte Vector" %}
  size(4);
  ins_encode( enc_ld(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Load Range, range = array length (=jint)
instruct loadRange(iRegIdst dst, memory mem) %{
  match(Set dst (LoadRange mem));
  ins_cost(MEMORY_REF_COST);

  format %{ "LWZ     $dst, $mem \t// range" %}
  size(4);
  ins_encode( enc_lwz(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Load Compressed Pointer
instruct loadN(iRegNdst dst, memory mem) %{
  match(Set dst (LoadN mem));
  predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
  ins_cost(MEMORY_REF_COST);

  format %{ "LWZ     $dst, $mem \t// load compressed ptr" %}
  size(4);
  ins_encode( enc_lwz(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Load Compressed Pointer acquire.
instruct loadN_ac(iRegNdst dst, memory mem) %{
  match(Set dst (LoadN mem));
  ins_cost(3*MEMORY_REF_COST);

  format %{ "LWZ     $dst, $mem \t// load acquire compressed ptr\n\t"
            "TWI     $dst\n\t"
            "ISYNC" %}
  size(12);
  ins_encode( enc_lwz_ac(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Load Compressed Pointer and decode it if narrow_oop_shift == 0.
instruct loadN2P_unscaled(iRegPdst dst, memory mem) %{
  match(Set dst (DecodeN (LoadN mem)));
  predicate(_kids[0]->_leaf->as_Load()->is_unordered() && Universe::narrow_oop_shift() == 0);
  ins_cost(MEMORY_REF_COST);

  format %{ "LWZ     $dst, $mem \t// DecodeN (unscaled)" %}
  size(4);
  ins_encode( enc_lwz(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Load Pointer
instruct loadP(iRegPdst dst, memoryAlg4 mem) %{
  match(Set dst (LoadP mem));
  predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
  ins_cost(MEMORY_REF_COST);

  format %{ "LD      $dst, $mem \t// ptr" %}
  size(4);
  ins_encode( enc_ld(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Load Pointer acquire.
instruct loadP_ac(iRegPdst dst, memoryAlg4 mem) %{
  match(Set dst (LoadP mem));
  ins_cost(3*MEMORY_REF_COST);

  format %{ "LD      $dst, $mem \t// ptr acquire\n\t"
            "TWI     $dst\n\t"
            "ISYNC" %}
  size(12);
  ins_encode( enc_ld_ac(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// LoadP + CastP2L
instruct loadP2X(iRegLdst dst, memoryAlg4 mem) %{
  match(Set dst (CastP2X (LoadP mem)));
  predicate(_kids[0]->_leaf->as_Load()->is_unordered());
  ins_cost(MEMORY_REF_COST);

  format %{ "LD      $dst, $mem \t// ptr + p2x" %}
  size(4);
  ins_encode( enc_ld(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Load compressed klass pointer.
instruct loadNKlass(iRegNdst dst, memory mem) %{
  match(Set dst (LoadNKlass mem));
  ins_cost(MEMORY_REF_COST);

  format %{ "LWZ     $dst, $mem \t// compressed klass ptr" %}
  size(4);
  ins_encode( enc_lwz(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

//// Load compressed klass and decode it if narrow_klass_shift == 0.
//// TODO: will narrow_klass_shift ever be 0?
//instruct decodeNKlass2Klass(iRegPdst dst, memory mem) %{
//  match(Set dst (DecodeNKlass (LoadNKlass mem)));
//  predicate(false /* TODO: PPC port Universe::narrow_klass_shift() == 0*);
//  ins_cost(MEMORY_REF_COST);
//
//  format %{ "LWZ     $dst, $mem \t// DecodeNKlass (unscaled)" %}
//  size(4);
//  ins_encode( enc_lwz(dst, mem) );
//  ins_pipe(pipe_class_memory);
//%}

// Load Klass Pointer
instruct loadKlass(iRegPdst dst, memoryAlg4 mem) %{
  match(Set dst (LoadKlass mem));
  ins_cost(MEMORY_REF_COST);

  format %{ "LD      $dst, $mem \t// klass ptr" %}
  size(4);
  ins_encode( enc_ld(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Load Float
instruct loadF(regF dst, memory mem) %{
  match(Set dst (LoadF mem));
  predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
  ins_cost(MEMORY_REF_COST);

  format %{ "LFS     $dst, $mem" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_lfs);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ lfs($dst$$FloatRegister, Idisp, $mem$$base$$Register);
  %}
  ins_pipe(pipe_class_memory);
%}

// Load Float acquire.
instruct loadF_ac(regF dst, memory mem) %{
  match(Set dst (LoadF mem));
  ins_cost(3*MEMORY_REF_COST);

  format %{ "LFS     $dst, $mem \t// acquire\n\t"
            "FCMPU   cr0, $dst, $dst\n\t"
            "BNE     cr0, next\n"
            "next:\n\t"
            "ISYNC" %}
  size(16);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    Label next;
    __ lfs($dst$$FloatRegister, Idisp, $mem$$base$$Register);
    __ fcmpu(CCR0, $dst$$FloatRegister, $dst$$FloatRegister);
    __ bne(CCR0, next);
    __ bind(next);
    __ isync();
  %}
  ins_pipe(pipe_class_memory);
%}

// Load Double - aligned
instruct loadD(regD dst, memory mem) %{
  match(Set dst (LoadD mem));
  predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
  ins_cost(MEMORY_REF_COST);

  format %{ "LFD     $dst, $mem" %}
  size(4);
  ins_encode( enc_lfd(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

// Load Double - aligned acquire.
instruct loadD_ac(regD dst, memory mem) %{
  match(Set dst (LoadD mem));
  ins_cost(3*MEMORY_REF_COST);

  format %{ "LFD     $dst, $mem \t// acquire\n\t"
            "FCMPU   cr0, $dst, $dst\n\t"
            "BNE     cr0, next\n"
            "next:\n\t"
            "ISYNC" %}
  size(16);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    Label next;
    __ lfd($dst$$FloatRegister, Idisp, $mem$$base$$Register);
    __ fcmpu(CCR0, $dst$$FloatRegister, $dst$$FloatRegister);
    __ bne(CCR0, next);
    __ bind(next);
    __ isync();
  %}
  ins_pipe(pipe_class_memory);
%}

// Load Double - UNaligned
instruct loadD_unaligned(regD dst, memory mem) %{
  match(Set dst (LoadD_unaligned mem));
  // predicate(...) // Unaligned_ac is not needed (and wouldn't make sense).
  ins_cost(MEMORY_REF_COST);

  format %{ "LFD     $dst, $mem" %}
  size(4);
  ins_encode( enc_lfd(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

//----------Constants--------------------------------------------------------

// Load MachConstantTableBase: add hi offset to global toc.
// TODO: Handle hidden register r29 in bundler!
instruct loadToc_hi(iRegLdst dst) %{
  effect(DEF dst);
  ins_cost(DEFAULT_COST);

  format %{ "ADDIS   $dst, R29, DISP.hi \t// load TOC hi" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addis);
    __ calculate_address_from_global_toc_hi16only($dst$$Register, __ method_toc());
  %}
  ins_pipe(pipe_class_default);
%}

// Load MachConstantTableBase: add lo offset to global toc.
instruct loadToc_lo(iRegLdst dst, iRegLdst src) %{
  effect(DEF dst, USE src);
  ins_cost(DEFAULT_COST);

  format %{ "ADDI    $dst, $src, DISP.lo \t// load TOC lo" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_ori);
    __ calculate_address_from_global_toc_lo16only($dst$$Register, __ method_toc());
  %}
  ins_pipe(pipe_class_default);
%}

// Load 16-bit integer constant 0xssss????
instruct loadConI16(iRegIdst dst, immI16 src) %{
  match(Set dst src);

  format %{ "LI      $dst, $src" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
    __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
  %}
  ins_pipe(pipe_class_default);
%}

// Load integer constant 0x????0000
instruct loadConIhi16(iRegIdst dst, immIhi16 src) %{
  match(Set dst src);
  ins_cost(DEFAULT_COST);

  format %{ "LIS     $dst, $src.hi" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addis);
    // Lis sign extends 16-bit src then shifts it 16 bit to the left.
    __ lis($dst$$Register, (int)((short)(($src$$constant & 0xFFFF0000) >> 16)));
  %}
  ins_pipe(pipe_class_default);
%}

// Part 2 of loading 32 bit constant: hi16 is is src1 (properly shifted
// and sign extended), this adds the low 16 bits.
instruct loadConI32_lo16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
  // no match-rule, false predicate
  effect(DEF dst, USE src1, USE src2);
  predicate(false);

  format %{ "ORI     $dst, $src1.hi, $src2.lo" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_ori);
    __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF);
  %}
  ins_pipe(pipe_class_default);
%}

instruct loadConI_Ex(iRegIdst dst, immI src) %{
  match(Set dst src);
  ins_cost(DEFAULT_COST*2);

  expand %{
    // Would like to use $src$$constant.
    immI16 srcLo %{ _opnds[1]->constant() %}
    // srcHi can be 0000 if srcLo sign-extends to a negative number.
    immIhi16 srcHi %{ _opnds[1]->constant() %}
    iRegIdst tmpI;
    loadConIhi16(tmpI, srcHi);
    loadConI32_lo16(dst, tmpI, srcLo);
  %}
%}

// No constant pool entries required.
instruct loadConL16(iRegLdst dst, immL16 src) %{
  match(Set dst src);

  format %{ "LI      $dst, $src \t// long" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
    __ li($dst$$Register, (int)((short) ($src$$constant & 0xFFFF)));
  %}
  ins_pipe(pipe_class_default);
%}

// Load long constant 0xssssssss????0000
instruct loadConL32hi16(iRegLdst dst, immL32hi16 src) %{
  match(Set dst src);
  ins_cost(DEFAULT_COST);

  format %{ "LIS     $dst, $src.hi \t// long" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addis);
    __ lis($dst$$Register, (int)((short)(($src$$constant & 0xFFFF0000) >> 16)));
  %}
  ins_pipe(pipe_class_default);
%}

// To load a 32 bit constant: merge lower 16 bits into already loaded
// high 16 bits.
instruct loadConL32_lo16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
  // no match-rule, false predicate
  effect(DEF dst, USE src1, USE src2);
  predicate(false);

  format %{ "ORI     $dst, $src1, $src2.lo" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_ori);
    __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF);
  %}
  ins_pipe(pipe_class_default);
%}

// Load 32-bit long constant
instruct loadConL32_Ex(iRegLdst dst, immL32 src) %{
  match(Set dst src);
  ins_cost(DEFAULT_COST*2);

  expand %{
    // Would like to use $src$$constant.
    immL16     srcLo %{ _opnds[1]->constant() /*& 0x0000FFFFL */%}
    // srcHi can be 0000 if srcLo sign-extends to a negative number.
    immL32hi16 srcHi %{ _opnds[1]->constant() /*& 0xFFFF0000L */%}
    iRegLdst tmpL;
    loadConL32hi16(tmpL, srcHi);
    loadConL32_lo16(dst, tmpL, srcLo);
  %}
%}

// Load long constant 0x????000000000000.
instruct loadConLhighest16_Ex(iRegLdst dst, immLhighest16 src) %{
  match(Set dst src);
  ins_cost(DEFAULT_COST);

  expand %{
    immL32hi16 srcHi %{ _opnds[1]->constant() >> 32 /*& 0xFFFF0000L */%}
    immI shift32 %{ 32 %}
    iRegLdst tmpL;
    loadConL32hi16(tmpL, srcHi);
    lshiftL_regL_immI(dst, tmpL, shift32);
  %}
%}

// Expand node for constant pool load: small offset.
instruct loadConL(iRegLdst dst, immL src, iRegLdst toc) %{
  effect(DEF dst, USE src, USE toc);
  ins_cost(MEMORY_REF_COST);

  ins_num_consts(1);
  // Needed so that CallDynamicJavaDirect can compute the address of this
  // instruction for relocation.
  ins_field_cbuf_insts_offset(int);

  format %{ "LD      $dst, offset, $toc \t// load long $src from TOC" %}
  size(4);
  ins_encode( enc_load_long_constL(dst, src, toc) );
  ins_pipe(pipe_class_memory);
%}

// Expand node for constant pool load: large offset.
instruct loadConL_hi(iRegLdst dst, immL src, iRegLdst toc) %{
  effect(DEF dst, USE src, USE toc);
  predicate(false);

  ins_num_consts(1);
  ins_field_const_toc_offset(int);
  // Needed so that CallDynamicJavaDirect can compute the address of this
  // instruction for relocation.
  ins_field_cbuf_insts_offset(int);

  format %{ "ADDIS   $dst, $toc, offset \t// load long $src from TOC (hi)" %}
  size(4);
  ins_encode( enc_load_long_constL_hi(dst, toc, src) );
  ins_pipe(pipe_class_default);
%}

// Expand node for constant pool load: large offset.
// No constant pool entries required.
instruct loadConL_lo(iRegLdst dst, immL src, iRegLdst base) %{
  effect(DEF dst, USE src, USE base);
  predicate(false);

  ins_field_const_toc_offset_hi_node(loadConL_hiNode*);

  format %{ "LD      $dst, offset, $base \t// load long $src from TOC (lo)" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_ld);
    int offset = ra_->C->in_scratch_emit_size() ? 0 : _const_toc_offset_hi_node->_const_toc_offset;
    __ ld($dst$$Register, MacroAssembler::largeoffset_si16_si16_lo(offset), $base$$Register);
  %}
  ins_pipe(pipe_class_memory);
%}

// Load long constant from constant table. Expand in case of
// offset > 16 bit is needed.
// Adlc adds toc node MachConstantTableBase.
instruct loadConL_Ex(iRegLdst dst, immL src) %{
  match(Set dst src);
  ins_cost(MEMORY_REF_COST);

  format %{ "LD      $dst, offset, $constanttablebase\t// load long $src from table, postalloc expanded" %}
  // We can not inline the enc_class for the expand as that does not support constanttablebase.
  postalloc_expand( postalloc_expand_load_long_constant(dst, src, constanttablebase) );
%}

// Load NULL as compressed oop.
instruct loadConN0(iRegNdst dst, immN_0 src) %{
  match(Set dst src);
  ins_cost(DEFAULT_COST);

  format %{ "LI      $dst, $src \t// compressed ptr" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
    __ li($dst$$Register, 0);
  %}
  ins_pipe(pipe_class_default);
%}

// Load hi part of compressed oop constant.
instruct loadConN_hi(iRegNdst dst, immN src) %{
  effect(DEF dst, USE src);
  ins_cost(DEFAULT_COST);

  format %{ "LIS     $dst, $src \t// narrow oop hi" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addis);
    __ lis($dst$$Register, (int)(short)(($src$$constant >> 16) & 0xffff));
  %}
  ins_pipe(pipe_class_default);
%}

// Add lo part of compressed oop constant to already loaded hi part.
instruct loadConN_lo(iRegNdst dst, iRegNsrc src1, immN src2) %{
  effect(DEF dst, USE src1, USE src2);
  ins_cost(DEFAULT_COST);

  format %{ "ORI     $dst, $src1, $src2 \t// narrow oop lo" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
    assert(__ oop_recorder() != NULL, "this assembler needs an OopRecorder");
    int oop_index = __ oop_recorder()->find_index((jobject)$src2$$constant);
    RelocationHolder rspec = oop_Relocation::spec(oop_index);
    __ relocate(rspec, 1);
    __ ori($dst$$Register, $src1$$Register, $src2$$constant & 0xffff);
  %}
  ins_pipe(pipe_class_default);
%}

// Needed to postalloc expand loadConN: ConN is loaded as ConI
// leaving the upper 32 bits with sign-extension bits.
// This clears these bits: dst = src & 0xFFFFFFFF.
// TODO: Eventually call this maskN_regN_FFFFFFFF.
instruct clearMs32b(iRegNdst dst, iRegNsrc src) %{
  effect(DEF dst, USE src);
  predicate(false);

  format %{ "MASK    $dst, $src, 0xFFFFFFFF" %} // mask
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
    __ clrldi($dst$$Register, $src$$Register, 0x20);
  %}
  ins_pipe(pipe_class_default);
%}

// Loading ConN must be postalloc expanded so that edges between
// the nodes are safe. They may not interfere with a safepoint.
// GL TODO: This needs three instructions: better put this into the constant pool.
instruct loadConN_Ex(iRegNdst dst, immN src) %{
  match(Set dst src);
  ins_cost(DEFAULT_COST*2);

  format %{ "LoadN   $dst, $src \t// postalloc expanded" %} // mask
  postalloc_expand %{
    MachNode *m1 = new (C) loadConN_hiNode();
    MachNode *m2 = new (C) loadConN_loNode();
    MachNode *m3 = new (C) clearMs32bNode();
    m1->add_req(NULL);
    m2->add_req(NULL, m1);
    m3->add_req(NULL, m2);
    m1->_opnds[0] = op_dst;
    m1->_opnds[1] = op_src;
    m2->_opnds[0] = op_dst;
    m2->_opnds[1] = op_dst;
    m2->_opnds[2] = op_src;
    m3->_opnds[0] = op_dst;
    m3->_opnds[1] = op_dst;
    ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
    ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
    ra_->set_pair(m3->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
    nodes->push(m1);
    nodes->push(m2);
    nodes->push(m3);
  %}
%}

instruct loadConNKlass_hi(iRegNdst dst, immNKlass src) %{
  effect(DEF dst, USE src);
  ins_cost(DEFAULT_COST);

  format %{ "LIS     $dst, $src \t// narrow oop hi" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addis);
    intptr_t Csrc = Klass::encode_klass((Klass *)$src$$constant);
    __ lis($dst$$Register, (int)(short)((Csrc >> 16) & 0xffff));
  %}
  ins_pipe(pipe_class_default);
%}

// This needs a match rule so that build_oop_map knows this is
// not a narrow oop.
instruct loadConNKlass_lo(iRegNdst dst, immNKlass_NM src1, iRegNsrc src2) %{
  match(Set dst src1);
  effect(TEMP src2);
  ins_cost(DEFAULT_COST);

  format %{ "ADDI    $dst, $src1, $src2 \t// narrow oop lo" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
    intptr_t Csrc = Klass::encode_klass((Klass *)$src1$$constant);
    assert(__ oop_recorder() != NULL, "this assembler needs an OopRecorder");
    int klass_index = __ oop_recorder()->find_index((Klass *)$src1$$constant);
    RelocationHolder rspec = metadata_Relocation::spec(klass_index);

    __ relocate(rspec, 1);
    __ ori($dst$$Register, $src2$$Register, Csrc & 0xffff);
  %}
  ins_pipe(pipe_class_default);
%}

// Loading ConNKlass must be postalloc expanded so that edges between
// the nodes are safe. They may not interfere with a safepoint.
instruct loadConNKlass_Ex(iRegNdst dst, immNKlass src) %{
  match(Set dst src);
  ins_cost(DEFAULT_COST*2);

  format %{ "LoadN   $dst, $src \t// postalloc expanded" %} // mask
  postalloc_expand %{
    // Load high bits into register. Sign extended.
    MachNode *m1 = new (C) loadConNKlass_hiNode();
    m1->add_req(NULL);
    m1->_opnds[0] = op_dst;
    m1->_opnds[1] = op_src;
    ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
    nodes->push(m1);

    MachNode *m2 = m1;
    if (!Assembler::is_uimm((jlong)Klass::encode_klass((Klass *)op_src->constant()), 31)) {
      // Value might be 1-extended. Mask out these bits.
      m2 = new (C) clearMs32bNode();
      m2->add_req(NULL, m1);
      m2->_opnds[0] = op_dst;
      m2->_opnds[1] = op_dst;
      ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
      nodes->push(m2);
    }

    MachNode *m3 = new (C) loadConNKlass_loNode();
    m3->add_req(NULL, m2);
    m3->_opnds[0] = op_dst;
    m3->_opnds[1] = op_src;
    m3->_opnds[2] = op_dst;
    ra_->set_pair(m3->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
    nodes->push(m3);
  %}
%}

// 0x1 is used in object initialization (initial object header).
// No constant pool entries required.
instruct loadConP0or1(iRegPdst dst, immP_0or1 src) %{
  match(Set dst src);

  format %{ "LI      $dst, $src \t// ptr" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
    __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
  %}
  ins_pipe(pipe_class_default);
%}

// Expand node for constant pool load: small offset.
// The match rule is needed to generate the correct bottom_type(),
// however this node should never match. The use of predicate is not
// possible since ADLC forbids predicates for chain rules. The higher
// costs do not prevent matching in this case. For that reason the
// operand immP_NM with predicate(false) is used.
instruct loadConP(iRegPdst dst, immP_NM src, iRegLdst toc) %{
  match(Set dst src);
  effect(TEMP toc);

  ins_num_consts(1);

  format %{ "LD      $dst, offset, $toc \t// load ptr $src from TOC" %}
  size(4);
  ins_encode( enc_load_long_constP(dst, src, toc) );
  ins_pipe(pipe_class_memory);
%}

// Expand node for constant pool load: large offset.
instruct loadConP_hi(iRegPdst dst, immP_NM src, iRegLdst toc) %{
  effect(DEF dst, USE src, USE toc);
  predicate(false);

  ins_num_consts(1);
  ins_field_const_toc_offset(int);

  format %{ "ADDIS   $dst, $toc, offset \t// load ptr $src from TOC (hi)" %}
  size(4);
  ins_encode( enc_load_long_constP_hi(dst, src, toc) );
  ins_pipe(pipe_class_default);
%}

// Expand node for constant pool load: large offset.
instruct loadConP_lo(iRegPdst dst, immP_NM src, iRegLdst base) %{
  match(Set dst src);
  effect(TEMP base);

  ins_field_const_toc_offset_hi_node(loadConP_hiNode*);

  format %{ "LD      $dst, offset, $base \t// load ptr $src from TOC (lo)" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_ld);
    int offset = ra_->C->in_scratch_emit_size() ? 0 : _const_toc_offset_hi_node->_const_toc_offset;
    __ ld($dst$$Register, MacroAssembler::largeoffset_si16_si16_lo(offset), $base$$Register);
  %}
  ins_pipe(pipe_class_memory);
%}

// Load pointer constant from constant table. Expand in case an
// offset > 16 bit is needed.
// Adlc adds toc node MachConstantTableBase.
instruct loadConP_Ex(iRegPdst dst, immP src) %{
  match(Set dst src);
  ins_cost(MEMORY_REF_COST);

  // This rule does not use "expand" because then
  // the result type is not known to be an Oop.  An ADLC
  // enhancement will be needed to make that work - not worth it!

  // If this instruction rematerializes, it prolongs the live range
  // of the toc node, causing illegal graphs.
  // assert(edge_from_to(_reg_node[reg_lo],def)) fails in verify_good_schedule().
  ins_cannot_rematerialize(true);

  format %{ "LD    $dst, offset, $constanttablebase \t//  load ptr $src from table, postalloc expanded" %}
  postalloc_expand( postalloc_expand_load_ptr_constant(dst, src, constanttablebase) );
%}

// Expand node for constant pool load: small offset.
instruct loadConF(regF dst, immF src, iRegLdst toc) %{
  effect(DEF dst, USE src, USE toc);
  ins_cost(MEMORY_REF_COST);

  ins_num_consts(1);

  format %{ "LFS     $dst, offset, $toc \t// load float $src from TOC" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_lfs);
    address float_address = __ float_constant($src$$constant);
    __ lfs($dst$$FloatRegister, __ offset_to_method_toc(float_address), $toc$$Register);
  %}
  ins_pipe(pipe_class_memory);
%}

// Expand node for constant pool load: large offset.
instruct loadConFComp(regF dst, immF src, iRegLdst toc) %{
  effect(DEF dst, USE src, USE toc);
  ins_cost(MEMORY_REF_COST);

  ins_num_consts(1);

  format %{ "ADDIS   $toc, $toc, offset_hi\n\t"
            "LFS     $dst, offset_lo, $toc \t// load float $src from TOC (hi/lo)\n\t"
            "ADDIS   $toc, $toc, -offset_hi"%}
  size(12);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    FloatRegister Rdst    = $dst$$FloatRegister;
    Register Rtoc         = $toc$$Register;
    address float_address = __ float_constant($src$$constant);
    int offset            = __ offset_to_method_toc(float_address);
    int hi = (offset + (1<<15))>>16;
    int lo = offset - hi * (1<<16);

    __ addis(Rtoc, Rtoc, hi);
    __ lfs(Rdst, lo, Rtoc);
    __ addis(Rtoc, Rtoc, -hi);
  %}
  ins_pipe(pipe_class_memory);
%}

// Adlc adds toc node MachConstantTableBase.
instruct loadConF_Ex(regF dst, immF src) %{
  match(Set dst src);
  ins_cost(MEMORY_REF_COST);

  // See loadConP.
  ins_cannot_rematerialize(true);

  format %{ "LFS     $dst, offset, $constanttablebase \t// load $src from table, postalloc expanded" %}
  postalloc_expand( postalloc_expand_load_float_constant(dst, src, constanttablebase) );
%}

// Expand node for constant pool load: small offset.
instruct loadConD(regD dst, immD src, iRegLdst toc) %{
  effect(DEF dst, USE src, USE toc);
  ins_cost(MEMORY_REF_COST);

  ins_num_consts(1);

  format %{ "LFD     $dst, offset, $toc \t// load double $src from TOC" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_lfd);
    int offset =  __ offset_to_method_toc(__ double_constant($src$$constant));
    __ lfd($dst$$FloatRegister, offset, $toc$$Register);
  %}
  ins_pipe(pipe_class_memory);
%}

// Expand node for constant pool load: large offset.
instruct loadConDComp(regD dst, immD src, iRegLdst toc) %{
  effect(DEF dst, USE src, USE toc);
  ins_cost(MEMORY_REF_COST);

  ins_num_consts(1);

  format %{ "ADDIS   $toc, $toc, offset_hi\n\t"
            "LFD     $dst, offset_lo, $toc \t// load double $src from TOC (hi/lo)\n\t"
            "ADDIS   $toc, $toc, -offset_hi" %}
  size(12);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    FloatRegister Rdst    = $dst$$FloatRegister;
    Register      Rtoc    = $toc$$Register;
    address float_address = __ double_constant($src$$constant);
    int offset            = __ offset_to_method_toc(float_address);
    int hi = (offset + (1<<15))>>16;
    int lo = offset - hi * (1<<16);

    __ addis(Rtoc, Rtoc, hi);
    __ lfd(Rdst, lo, Rtoc);
    __ addis(Rtoc, Rtoc, -hi);
  %}
  ins_pipe(pipe_class_memory);
%}

// Adlc adds toc node MachConstantTableBase.
instruct loadConD_Ex(regD dst, immD src) %{
  match(Set dst src);
  ins_cost(MEMORY_REF_COST);

  // See loadConP.
  ins_cannot_rematerialize(true);

  format %{ "ConD    $dst, offset, $constanttablebase \t// load $src from table, postalloc expanded" %}
  postalloc_expand( postalloc_expand_load_double_constant(dst, src, constanttablebase) );
%}

// Prefetch instructions.
// Must be safe to execute with invalid address (cannot fault).

instruct prefetchr(indirectMemory mem, iRegLsrc src) %{
  match(PrefetchRead (AddP mem src));
  ins_cost(MEMORY_REF_COST);

  format %{ "PREFETCH $mem, 0, $src \t// Prefetch read-many" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_dcbt);
    __ dcbt($src$$Register, $mem$$base$$Register);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct prefetchr_no_offset(indirectMemory mem) %{
  match(PrefetchRead mem);
  ins_cost(MEMORY_REF_COST);

  format %{ "PREFETCH $mem" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_dcbt);
    __ dcbt($mem$$base$$Register);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct prefetchw(indirectMemory mem, iRegLsrc src) %{
  match(PrefetchWrite (AddP mem src));
  ins_cost(MEMORY_REF_COST);

  format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many (and read)" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst);
    __ dcbtst($src$$Register, $mem$$base$$Register);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct prefetchw_no_offset(indirectMemory mem) %{
  match(PrefetchWrite mem);
  ins_cost(MEMORY_REF_COST);

  format %{ "PREFETCH $mem" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst);
    __ dcbtst($mem$$base$$Register);
  %}
  ins_pipe(pipe_class_memory);
%}

// Special prefetch versions which use the dcbz instruction.
instruct prefetch_alloc_zero(indirectMemory mem, iRegLsrc src) %{
  match(PrefetchAllocation (AddP mem src));
  predicate(AllocatePrefetchStyle == 3);
  ins_cost(MEMORY_REF_COST);

  format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many with zero" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst);
    __ dcbz($src$$Register, $mem$$base$$Register);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct prefetch_alloc_zero_no_offset(indirectMemory mem) %{
  match(PrefetchAllocation mem);
  predicate(AllocatePrefetchStyle == 3);
  ins_cost(MEMORY_REF_COST);

  format %{ "PREFETCH $mem, 2 \t// Prefetch write-many with zero" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst);
    __ dcbz($mem$$base$$Register);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct prefetch_alloc(indirectMemory mem, iRegLsrc src) %{
  match(PrefetchAllocation (AddP mem src));
  predicate(AllocatePrefetchStyle != 3);
  ins_cost(MEMORY_REF_COST);

  format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst);
    __ dcbtst($src$$Register, $mem$$base$$Register);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct prefetch_alloc_no_offset(indirectMemory mem) %{
  match(PrefetchAllocation mem);
  predicate(AllocatePrefetchStyle != 3);
  ins_cost(MEMORY_REF_COST);

  format %{ "PREFETCH $mem, 2 \t// Prefetch write-many" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst);
    __ dcbtst($mem$$base$$Register);
  %}
  ins_pipe(pipe_class_memory);
%}

//----------Store Instructions-------------------------------------------------

// Store Byte
instruct storeB(memory mem, iRegIsrc src) %{
  match(Set mem (StoreB mem src));
  ins_cost(MEMORY_REF_COST);

  format %{ "STB     $src, $mem \t// byte" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_stb);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ stb($src$$Register, Idisp, $mem$$base$$Register);
  %}
  ins_pipe(pipe_class_memory);
%}

// Store Char/Short
instruct storeC(memory mem, iRegIsrc src) %{
  match(Set mem (StoreC mem src));
  ins_cost(MEMORY_REF_COST);

  format %{ "STH     $src, $mem \t// short" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_sth);
    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
    __ sth($src$$Register, Idisp, $mem$$base$$Register);
  %}
  ins_pipe(pipe_class_memory);
%}

// Store Integer
instruct storeI(memory mem, iRegIsrc src) %{
  match(Set mem (StoreI mem src));
  ins_cost(MEMORY_REF_COST);

  format %{ "STW     $src, $mem" %}
  size(4);
  ins_encode( enc_stw(src, mem) );
  ins_pipe(pipe_class_memory);
%}

// ConvL2I + StoreI.
instruct storeI_convL2I(memory mem, iRegLsrc src) %{
  match(Set mem (StoreI mem (ConvL2I src)));
  ins_cost(MEMORY_REF_COST);

  format %{ "STW     l2i($src), $mem" %}
  size(4);
  ins_encode( enc_stw(src, mem) );
  ins_pipe(pipe_class_memory);
%}

// Store Long
instruct storeL(memoryAlg4 mem, iRegLsrc src) %{
  match(Set mem (StoreL mem src));
  ins_cost(MEMORY_REF_COST);

  format %{ "STD     $src, $mem \t// long" %}
  size(4);
  ins_encode( enc_std(src, mem) );
  ins_pipe(pipe_class_memory);
%}

// Store super word nodes.

// Store Aligned Packed Byte long register to memory
instruct storeA8B(memoryAlg4 mem, iRegLsrc src) %{
  predicate(n->as_StoreVector()->memory_size() == 8);
  match(Set mem (StoreVector mem src));
  ins_cost(MEMORY_REF_COST);

  format %{ "STD     $mem, $src \t// packed8B" %}
  size(4);
  ins_encode( enc_std(src, mem) );
  ins_pipe(pipe_class_memory);
%}

// Store Compressed Oop
instruct storeN(memory dst, iRegN_P2N src) %{
  match(Set dst (StoreN dst src));
  ins_cost(MEMORY_REF_COST);

  format %{ "STW     $src, $dst \t// compressed oop" %}
  size(4);
  ins_encode( enc_stw(src, dst) );
  ins_pipe(pipe_class_memory);
%}

// Store Compressed KLass
instruct storeNKlass(memory dst, iRegN_P2N src) %{
  match(Set dst (StoreNKlass dst src));
  ins_cost(MEMORY_REF_COST);

  format %{ "STW     $src, $dst \t// compressed klass" %}
  size(4);
  ins_encode( enc_stw(src, dst) );
  ins_pipe(pipe_class_memory);
%}

// Store Pointer
instruct storeP(memoryAlg4 dst, iRegPsrc src) %{
  match(Set dst (StoreP dst src));
  ins_cost(MEMORY_REF_COST);

  format %{ "STD     $src, $dst \t// ptr" %}
  size(4);
  ins_encode( enc_std(src, dst) );
  ins_pipe(pipe_class_memory);
%}

// Store Float
instruct storeF(memory mem, regF src) %{
  match(Set mem (StoreF mem src));
  ins_cost(MEMORY_REF_COST);

  format %{ "STFS    $src, $mem" %}
  size(4);
  ins_encode( enc_stfs(src, mem) );
  ins_pipe(pipe_class_memory);
%}

// Store Double
instruct storeD(memory mem, regD src) %{
  match(Set mem (StoreD mem src));
  ins_cost(MEMORY_REF_COST);

  format %{ "STFD    $src, $mem" %}
  size(4);
  ins_encode( enc_stfd(src, mem) );
  ins_pipe(pipe_class_memory);
%}

//----------Store Instructions With Zeros--------------------------------------

// Card-mark for CMS garbage collection.
// This cardmark does an optimization so that it must not always
// do a releasing store. For this, it gets the address of
// CMSCollectorCardTableModRefBSExt::_requires_release as input.
// (Using releaseFieldAddr in the match rule is a hack.)
instruct storeCM_CMS(memory mem, iRegLdst releaseFieldAddr) %{
  match(Set mem (StoreCM mem releaseFieldAddr));
  predicate(false);
  ins_cost(MEMORY_REF_COST);

  // See loadConP.
  ins_cannot_rematerialize(true);

  format %{ "STB     #0, $mem \t// CMS card-mark byte (must be 0!), checking requires_release in [$releaseFieldAddr]" %}
  ins_encode( enc_cms_card_mark(mem, releaseFieldAddr) );
  ins_pipe(pipe_class_memory);
%}

// Card-mark for CMS garbage collection.
// This cardmark does an optimization so that it must not always
// do a releasing store. For this, it needs the constant address of
// CMSCollectorCardTableModRefBSExt::_requires_release.
// This constant address is split off here by expand so we can use
// adlc / matcher functionality to load it from the constant section.
instruct storeCM_CMS_ExEx(memory mem, immI_0 zero) %{
  match(Set mem (StoreCM mem zero));
  predicate(UseConcMarkSweepGC);

  expand %{
    immL baseImm %{ 0 /* TODO: PPC port (jlong)CMSCollectorCardTableModRefBSExt::requires_release_address() */ %}
    iRegLdst releaseFieldAddress;
    loadConL_Ex(releaseFieldAddress, baseImm);
    storeCM_CMS(mem, releaseFieldAddress);
  %}
%}

instruct storeCM_G1(memory mem, immI_0 zero) %{
  match(Set mem (StoreCM mem zero));
  predicate(UseG1GC);
  ins_cost(MEMORY_REF_COST);

  ins_cannot_rematerialize(true);

  format %{ "STB     #0, $mem \t// CMS card-mark byte store (G1)" %}
  size(8);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    __ li(R0, 0);
    //__ release(); // G1: oops are allowed to get visible after dirty marking
    guarantee($mem$$base$$Register != R1_SP, "use frame_slots_bias");
    __ stb(R0, $mem$$disp, $mem$$base$$Register);
  %}
  ins_pipe(pipe_class_memory);
%}

// Convert oop pointer into compressed form.

// Nodes for postalloc expand.

// Shift node for expand.
instruct encodeP_shift(iRegNdst dst, iRegNsrc src) %{
  // The match rule is needed to make it a 'MachTypeNode'!
  match(Set dst (EncodeP src));
  predicate(false);

  format %{ "SRDI    $dst, $src, 3 \t// encode" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
    __ srdi($dst$$Register, $src$$Register, Universe::narrow_oop_shift() & 0x3f);
  %}
  ins_pipe(pipe_class_default);
%}

// Add node for expand.
instruct encodeP_sub(iRegPdst dst, iRegPdst src) %{
  // The match rule is needed to make it a 'MachTypeNode'!
  match(Set dst (EncodeP src));
  predicate(false);

  format %{ "SUB     $dst, $src, oop_base \t// encode" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_subf);
    __ subf($dst$$Register, R30, $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Conditional sub base.
instruct cond_sub_base(iRegNdst dst, flagsReg crx, iRegPsrc src1) %{
  // The match rule is needed to make it a 'MachTypeNode'!
  match(Set dst (EncodeP (Binary crx src1)));
  predicate(false);

  ins_variable_size_depending_on_alignment(true);

  format %{ "BEQ     $crx, done\n\t"
            "SUB     $dst, $src1, R30 \t// encode: subtract base if != NULL\n"
            "done:" %}
  size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
    Label done;
    __ beq($crx$$CondRegister, done);
    __ subf($dst$$Register, R30, $src1$$Register);
    // TODO PPC port __ endgroup_if_needed(_size == 12);
    __ bind(done);
  %}
  ins_pipe(pipe_class_default);
%}

// Power 7 can use isel instruction
instruct cond_set_0_oop(iRegNdst dst, flagsReg crx, iRegPsrc src1) %{
  // The match rule is needed to make it a 'MachTypeNode'!
  match(Set dst (EncodeP (Binary crx src1)));
  predicate(false);

  format %{ "CMOVE   $dst, $crx eq, 0, $src1 \t// encode: preserve 0" %}
  size(4);
  ins_encode %{
    // This is a Power7 instruction for which no machine description exists.
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    __ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// base != 0
// 32G aligned narrow oop base.
instruct encodeP_32GAligned(iRegNdst dst, iRegPsrc src) %{
  match(Set dst (EncodeP src));
  predicate(false /* TODO: PPC port Universe::narrow_oop_base_disjoint()*/);

  format %{ "EXTRDI  $dst, $src, #32, #3 \t// encode with 32G aligned base" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
    __ rldicl($dst$$Register, $src$$Register, 64-Universe::narrow_oop_shift(), 32);
  %}
  ins_pipe(pipe_class_default);
%}

// shift != 0, base != 0
instruct encodeP_Ex(iRegNdst dst, flagsReg crx, iRegPsrc src) %{
  match(Set dst (EncodeP src));
  effect(TEMP crx);
  predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull &&
            Universe::narrow_oop_shift() != 0 &&
            true /* TODO: PPC port Universe::narrow_oop_base_overlaps()*/);

  format %{ "EncodeP $dst, $crx, $src \t// postalloc expanded" %}
  postalloc_expand( postalloc_expand_encode_oop(dst, src, crx));
%}

// shift != 0, base != 0
instruct encodeP_not_null_Ex(iRegNdst dst, iRegPsrc src) %{
  match(Set dst (EncodeP src));
  predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull &&
            Universe::narrow_oop_shift() != 0 &&
            true /* TODO: PPC port Universe::narrow_oop_base_overlaps()*/);

  format %{ "EncodeP $dst, $src\t// $src != Null, postalloc expanded" %}
  postalloc_expand( postalloc_expand_encode_oop_not_null(dst, src) );
%}

// shift != 0, base == 0
// TODO: This is the same as encodeP_shift. Merge!
instruct encodeP_not_null_base_null(iRegNdst dst, iRegPsrc src) %{
  match(Set dst (EncodeP src));
  predicate(Universe::narrow_oop_shift() != 0 &&
            Universe::narrow_oop_base() ==0);

  format %{ "SRDI    $dst, $src, #3 \t// encodeP, $src != NULL" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
    __ srdi($dst$$Register, $src$$Register, Universe::narrow_oop_shift() & 0x3f);
  %}
  ins_pipe(pipe_class_default);
%}

// Compressed OOPs with narrow_oop_shift == 0.
// shift == 0, base == 0
instruct encodeP_narrow_oop_shift_0(iRegNdst dst, iRegPsrc src) %{
  match(Set dst (EncodeP src));
  predicate(Universe::narrow_oop_shift() == 0);

  format %{ "MR      $dst, $src \t// Ptr->Narrow" %}
  // variable size, 0 or 4.
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_or);
    __ mr_if_needed($dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Decode nodes.

// Shift node for expand.
instruct decodeN_shift(iRegPdst dst, iRegPsrc src) %{
  // The match rule is needed to make it a 'MachTypeNode'!
  match(Set dst (DecodeN src));
  predicate(false);

  format %{ "SLDI    $dst, $src, #3 \t// DecodeN" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldicr);
    __ sldi($dst$$Register, $src$$Register, Universe::narrow_oop_shift());
  %}
  ins_pipe(pipe_class_default);
%}

// Add node for expand.
instruct decodeN_add(iRegPdst dst, iRegPdst src) %{
  // The match rule is needed to make it a 'MachTypeNode'!
  match(Set dst (DecodeN src));
  predicate(false);

  format %{ "ADD     $dst, $src, R30 \t// DecodeN, add oop base" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_add);
    __ add($dst$$Register, $src$$Register, R30);
  %}
  ins_pipe(pipe_class_default);
%}

// conditianal add base for expand
instruct cond_add_base(iRegPdst dst, flagsReg crx, iRegPsrc src1) %{
  // The match rule is needed to make it a 'MachTypeNode'!
  // NOTICE that the rule is nonsense - we just have to make sure that:
  //  - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp)
  //  - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC.
  match(Set dst (DecodeN (Binary crx src1)));
  predicate(false);

  ins_variable_size_depending_on_alignment(true);

  format %{ "BEQ     $crx, done\n\t"
            "ADD     $dst, $src1, R30 \t// DecodeN: add oop base if $src1 != NULL\n"
            "done:" %}
  size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling()) */? 12 : 8);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
    Label done;
    __ beq($crx$$CondRegister, done);
    __ add($dst$$Register, $src1$$Register, R30);
    // TODO PPC port  __ endgroup_if_needed(_size == 12);
    __ bind(done);
  %}
  ins_pipe(pipe_class_default);
%}

instruct cond_set_0_ptr(iRegPdst dst, flagsReg crx, iRegPsrc src1) %{
  // The match rule is needed to make it a 'MachTypeNode'!
  // NOTICE that the rule is nonsense - we just have to make sure that:
  //  - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp)
  //  - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC.
  match(Set dst (DecodeN (Binary crx src1)));
  predicate(false);

  format %{ "CMOVE   $dst, $crx eq, 0, $src1 \t// decode: preserve 0" %}
  size(4);
  ins_encode %{
    // This is a Power7 instruction for which no machine description exists.
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    __ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

//  shift != 0, base != 0
instruct decodeN_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
  match(Set dst (DecodeN src));
  predicate((n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
             n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant) &&
            Universe::narrow_oop_shift() != 0 &&
            Universe::narrow_oop_base() != 0);
  effect(TEMP crx);

  format %{ "DecodeN $dst, $src \t// Kills $crx, postalloc expanded" %}
  postalloc_expand( postalloc_expand_decode_oop(dst, src, crx) );
%}

// shift != 0, base == 0
instruct decodeN_nullBase(iRegPdst dst, iRegNsrc src) %{
  match(Set dst (DecodeN src));
  predicate(Universe::narrow_oop_shift() != 0 &&
            Universe::narrow_oop_base() == 0);

  format %{ "SLDI    $dst, $src, #3 \t// DecodeN (zerobased)" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldicr);
    __ sldi($dst$$Register, $src$$Register, Universe::narrow_oop_shift());
  %}
  ins_pipe(pipe_class_default);
%}

// src != 0, shift != 0, base != 0
instruct decodeN_notNull_addBase_Ex(iRegPdst dst, iRegNsrc src) %{
  match(Set dst (DecodeN src));
  predicate((n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
             n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
            Universe::narrow_oop_shift() != 0 &&
            Universe::narrow_oop_base() != 0);

  format %{ "DecodeN $dst, $src \t// $src != NULL, postalloc expanded" %}
  postalloc_expand( postalloc_expand_decode_oop_not_null(dst, src));
%}

// Compressed OOPs with narrow_oop_shift == 0.
instruct decodeN_unscaled(iRegPdst dst, iRegNsrc src) %{
  match(Set dst (DecodeN src));
  predicate(Universe::narrow_oop_shift() == 0);
  ins_cost(DEFAULT_COST);

  format %{ "MR      $dst, $src \t// DecodeN (unscaled)" %}
  // variable size, 0 or 4.
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_or);
    __ mr_if_needed($dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Convert compressed oop into int for vectors alignment masking.
instruct decodeN2I_unscaled(iRegIdst dst, iRegNsrc src) %{
  match(Set dst (ConvL2I (CastP2X (DecodeN src))));
  predicate(Universe::narrow_oop_shift() == 0);
  ins_cost(DEFAULT_COST);

  format %{ "MR      $dst, $src \t// (int)DecodeN (unscaled)" %}
  // variable size, 0 or 4.
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_or);
    __ mr_if_needed($dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Convert klass pointer into compressed form.

// Nodes for postalloc expand.

// Shift node for expand.
instruct encodePKlass_shift(iRegNdst dst, iRegNsrc src) %{
  // The match rule is needed to make it a 'MachTypeNode'!
  match(Set dst (EncodePKlass src));
  predicate(false);

  format %{ "SRDI    $dst, $src, 3 \t// encode" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
    __ srdi($dst$$Register, $src$$Register, Universe::narrow_klass_shift());
  %}
  ins_pipe(pipe_class_default);
%}

// Add node for expand.
instruct encodePKlass_sub_base(iRegPdst dst, iRegLsrc base, iRegPdst src) %{
  // The match rule is needed to make it a 'MachTypeNode'!
  match(Set dst (EncodePKlass (Binary base src)));
  predicate(false);

  format %{ "SUB     $dst, $base, $src \t// encode" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_subf);
    __ subf($dst$$Register, $base$$Register, $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// base != 0
// 32G aligned narrow oop base.
instruct encodePKlass_32GAligned(iRegNdst dst, iRegPsrc src) %{
  match(Set dst (EncodePKlass src));
  predicate(false /* TODO: PPC port Universe::narrow_klass_base_disjoint()*/);

  format %{ "EXTRDI  $dst, $src, #32, #3 \t// encode with 32G aligned base" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
    __ rldicl($dst$$Register, $src$$Register, 64-Universe::narrow_oop_shift(), 32);
  %}
  ins_pipe(pipe_class_default);
%}

// shift != 0, base != 0
instruct encodePKlass_not_null_Ex(iRegNdst dst, iRegLsrc base, iRegPsrc src) %{
  match(Set dst (EncodePKlass (Binary base src)));
  predicate(false);

  format %{ "EncodePKlass $dst, $src\t// $src != Null, postalloc expanded" %}
  postalloc_expand %{
    encodePKlass_sub_baseNode *n1 = new (C) encodePKlass_sub_baseNode();
    n1->add_req(n_region, n_base, n_src);
    n1->_opnds[0] = op_dst;
    n1->_opnds[1] = op_base;
    n1->_opnds[2] = op_src;
    n1->_bottom_type = _bottom_type;

    encodePKlass_shiftNode *n2 = new (C) encodePKlass_shiftNode();
    n2->add_req(n_region, n1);
    n2->_opnds[0] = op_dst;
    n2->_opnds[1] = op_dst;
    n2->_bottom_type = _bottom_type;
    ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
    ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));

    nodes->push(n1);
    nodes->push(n2);
  %}
%}

// shift != 0, base != 0
instruct encodePKlass_not_null_ExEx(iRegNdst dst, iRegPsrc src) %{
  match(Set dst (EncodePKlass src));
  //predicate(Universe::narrow_klass_shift() != 0 &&
  //          true /* TODO: PPC port Universe::narrow_klass_base_overlaps()*/);

  //format %{ "EncodePKlass $dst, $src\t// $src != Null, postalloc expanded" %}
  ins_cost(DEFAULT_COST*2);  // Don't count constant.
  expand %{
    immL baseImm %{ (jlong)(intptr_t)Universe::narrow_klass_base() %}
    iRegLdst base;
    loadConL_Ex(base, baseImm);
    encodePKlass_not_null_Ex(dst, base, src);
  %}
%}

// Decode nodes.

// Shift node for expand.
instruct decodeNKlass_shift(iRegPdst dst, iRegPsrc src) %{
  // The match rule is needed to make it a 'MachTypeNode'!
  match(Set dst (DecodeNKlass src));
  predicate(false);

  format %{ "SLDI    $dst, $src, #3 \t// DecodeNKlass" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldicr);
    __ sldi($dst$$Register, $src$$Register, Universe::narrow_klass_shift());
  %}
  ins_pipe(pipe_class_default);
%}

// Add node for expand.

instruct decodeNKlass_add_base(iRegPdst dst, iRegLsrc base, iRegPdst src) %{
  // The match rule is needed to make it a 'MachTypeNode'!
  match(Set dst (DecodeNKlass (Binary base src)));
  predicate(false);

  format %{ "ADD     $dst, $base, $src \t// DecodeNKlass, add klass base" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_add);
    __ add($dst$$Register, $base$$Register, $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// src != 0, shift != 0, base != 0
instruct decodeNKlass_notNull_addBase_Ex(iRegPdst dst, iRegLsrc base, iRegNsrc src) %{
  match(Set dst (DecodeNKlass (Binary base src)));
  //effect(kill src); // We need a register for the immediate result after shifting.
  predicate(false);

  format %{ "DecodeNKlass $dst =  $base + ($src << 3) \t// $src != NULL, postalloc expanded" %}
  postalloc_expand %{
    decodeNKlass_add_baseNode *n1 = new (C) decodeNKlass_add_baseNode();
    n1->add_req(n_region, n_base, n_src);
    n1->_opnds[0] = op_dst;
    n1->_opnds[1] = op_base;
    n1->_opnds[2] = op_src;
    n1->_bottom_type = _bottom_type;

    decodeNKlass_shiftNode *n2 = new (C) decodeNKlass_shiftNode();
    n2->add_req(n_region, n1);
    n2->_opnds[0] = op_dst;
    n2->_opnds[1] = op_dst;
    n2->_bottom_type = _bottom_type;

    ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
    ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));

    nodes->push(n1);
    nodes->push(n2);
  %}
%}

// src != 0, shift != 0, base != 0
instruct decodeNKlass_notNull_addBase_ExEx(iRegPdst dst, iRegNsrc src) %{
  match(Set dst (DecodeNKlass src));
  // predicate(Universe::narrow_klass_shift() != 0 &&
  //           Universe::narrow_klass_base() != 0);

  //format %{ "DecodeNKlass $dst, $src \t// $src != NULL, expanded" %}

  ins_cost(DEFAULT_COST*2);  // Don't count constant.
  expand %{
    // We add first, then we shift. Like this, we can get along with one register less.
    // But we have to load the base pre-shifted.
    immL baseImm %{ (jlong)((intptr_t)Universe::narrow_klass_base() >> Universe::narrow_klass_shift()) %}
    iRegLdst base;
    loadConL_Ex(base, baseImm);
    decodeNKlass_notNull_addBase_Ex(dst, base, src);
  %}
%}

//----------MemBar Instructions-----------------------------------------------
// Memory barrier flavors

instruct membar_acquire() %{
  match(LoadFence);
  ins_cost(4*MEMORY_REF_COST);

  format %{ "MEMBAR-acquire" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_lwsync);
    __ acquire();
  %}
  ins_pipe(pipe_class_default);
%}

instruct unnecessary_membar_acquire() %{
  match(MemBarAcquire);
  ins_cost(0);

  format %{ " -- \t// redundant MEMBAR-acquire - empty" %}
  size(0);
  ins_encode( /*empty*/ );
  ins_pipe(pipe_class_default);
%}

instruct membar_acquire_lock() %{
  match(MemBarAcquireLock);
  ins_cost(0);

  format %{ " -- \t// redundant MEMBAR-acquire - empty (acquire as part of CAS in prior FastLock)" %}
  size(0);
  ins_encode( /*empty*/ );
  ins_pipe(pipe_class_default);
%}

instruct membar_release() %{
  match(MemBarRelease);
  match(StoreFence);
  ins_cost(4*MEMORY_REF_COST);

  format %{ "MEMBAR-release" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_lwsync);
    __ release();
  %}
  ins_pipe(pipe_class_default);
%}

instruct membar_storestore() %{
  match(MemBarStoreStore);
  ins_cost(4*MEMORY_REF_COST);

  format %{ "MEMBAR-store-store" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_lwsync);
    __ membar(Assembler::StoreStore);
  %}
  ins_pipe(pipe_class_default);
%}

instruct membar_release_lock() %{
  match(MemBarReleaseLock);
  ins_cost(0);

  format %{ " -- \t// redundant MEMBAR-release - empty (release in FastUnlock)" %}
  size(0);
  ins_encode( /*empty*/ );
  ins_pipe(pipe_class_default);
%}

instruct membar_volatile() %{
  match(MemBarVolatile);
  ins_cost(4*MEMORY_REF_COST);

  format %{ "MEMBAR-volatile" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_sync);
    __ fence();
  %}
  ins_pipe(pipe_class_default);
%}

// This optimization is wrong on PPC. The following pattern is not supported:
//  MemBarVolatile
//   ^        ^
//   |        |
//  CtrlProj MemProj
//   ^        ^
//   |        |
//   |       Load
//   |
//  MemBarVolatile
//
//  The first MemBarVolatile could get optimized out! According to
//  Vladimir, this pattern can not occur on Oracle platforms.
//  However, it does occur on PPC64 (because of membars in
//  inline_unsafe_load_store).
//
// Add this node again if we found a good solution for inline_unsafe_load_store().
// Don't forget to look at the implementation of post_store_load_barrier again,
// we did other fixes in that method.
//instruct unnecessary_membar_volatile() %{
//  match(MemBarVolatile);
//  predicate(Matcher::post_store_load_barrier(n));
//  ins_cost(0);
//
//  format %{ " -- \t// redundant MEMBAR-volatile - empty" %}
//  size(0);
//  ins_encode( /*empty*/ );
//  ins_pipe(pipe_class_default);
//%}

instruct membar_CPUOrder() %{
  match(MemBarCPUOrder);
  ins_cost(0);

  format %{ " -- \t// MEMBAR-CPUOrder - empty: PPC64 processors are self-consistent." %}
  size(0);
  ins_encode( /*empty*/ );
  ins_pipe(pipe_class_default);
%}

//----------Conditional Move---------------------------------------------------

// Cmove using isel.
instruct cmovI_reg_isel(cmpOp cmp, flagsReg crx, iRegIdst dst, iRegIsrc src) %{
  match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
  predicate(VM_Version::has_isel());
  ins_cost(DEFAULT_COST);

  format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
  size(4);
  ins_encode %{
    // This is a Power7 instruction for which no machine description
    // exists. Anyways, the scheduler should be off on Power7.
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    int cc        = $cmp$$cmpcode;
    __ isel($dst$$Register, $crx$$CondRegister,
            (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

instruct cmovI_reg(cmpOp cmp, flagsReg crx, iRegIdst dst, iRegIsrc src) %{
  match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
  predicate(!VM_Version::has_isel());
  ins_cost(DEFAULT_COST+BRANCH_COST);

  ins_variable_size_depending_on_alignment(true);

  format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
  // Worst case is branch + move + stop, no stop without scheduler
  size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8);
  ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
  ins_pipe(pipe_class_default);
%}

instruct cmovI_imm(cmpOp cmp, flagsReg crx, iRegIdst dst, immI16 src) %{
  match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
  ins_cost(DEFAULT_COST+BRANCH_COST);

  ins_variable_size_depending_on_alignment(true);

  format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
  // Worst case is branch + move + stop, no stop without scheduler
  size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8);
  ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
  ins_pipe(pipe_class_default);
%}

// Cmove using isel.
instruct cmovL_reg_isel(cmpOp cmp, flagsReg crx, iRegLdst dst, iRegLsrc src) %{
  match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
  predicate(VM_Version::has_isel());
  ins_cost(DEFAULT_COST);

  format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
  size(4);
  ins_encode %{
    // This is a Power7 instruction for which no machine description
    // exists. Anyways, the scheduler should be off on Power7.
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    int cc        = $cmp$$cmpcode;
    __ isel($dst$$Register, $crx$$CondRegister,
            (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

instruct cmovL_reg(cmpOp cmp, flagsReg crx, iRegLdst dst, iRegLsrc src) %{
  match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
  predicate(!VM_Version::has_isel());
  ins_cost(DEFAULT_COST+BRANCH_COST);

  ins_variable_size_depending_on_alignment(true);

  format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
  // Worst case is branch + move + stop, no stop without scheduler.
  size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8);
  ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
  ins_pipe(pipe_class_default);
%}

instruct cmovL_imm(cmpOp cmp, flagsReg crx, iRegLdst dst, immL16 src) %{
  match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
  ins_cost(DEFAULT_COST+BRANCH_COST);

  ins_variable_size_depending_on_alignment(true);

  format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
  // Worst case is branch + move + stop, no stop without scheduler.
  size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8);
  ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
  ins_pipe(pipe_class_default);
%}

// Cmove using isel.
instruct cmovN_reg_isel(cmpOp cmp, flagsReg crx, iRegNdst dst, iRegNsrc src) %{
  match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
  predicate(VM_Version::has_isel());
  ins_cost(DEFAULT_COST);

  format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
  size(4);
  ins_encode %{
    // This is a Power7 instruction for which no machine description
    // exists. Anyways, the scheduler should be off on Power7.
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    int cc        = $cmp$$cmpcode;
    __ isel($dst$$Register, $crx$$CondRegister,
            (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Conditional move for RegN. Only cmov(reg, reg).
instruct cmovN_reg(cmpOp cmp, flagsReg crx, iRegNdst dst, iRegNsrc src) %{
  match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
  predicate(!VM_Version::has_isel());
  ins_cost(DEFAULT_COST+BRANCH_COST);

  ins_variable_size_depending_on_alignment(true);

  format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
  // Worst case is branch + move + stop, no stop without scheduler.
  size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8);
  ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
  ins_pipe(pipe_class_default);
%}

instruct cmovN_imm(cmpOp cmp, flagsReg crx, iRegNdst dst, immN_0 src) %{
  match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
  ins_cost(DEFAULT_COST+BRANCH_COST);

  ins_variable_size_depending_on_alignment(true);

  format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
  // Worst case is branch + move + stop, no stop without scheduler.
  size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8);
  ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
  ins_pipe(pipe_class_default);
%}

// Cmove using isel.
instruct cmovP_reg_isel(cmpOp cmp, flagsReg crx, iRegPdst dst, iRegPsrc src) %{
  match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
  predicate(VM_Version::has_isel());
  ins_cost(DEFAULT_COST);

  format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
  size(4);
  ins_encode %{
    // This is a Power7 instruction for which no machine description
    // exists. Anyways, the scheduler should be off on Power7.
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    int cc        = $cmp$$cmpcode;
    __ isel($dst$$Register, $crx$$CondRegister,
            (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

instruct cmovP_reg(cmpOp cmp, flagsReg crx, iRegPdst dst, iRegP_N2P src) %{
  match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
  predicate(!VM_Version::has_isel());
  ins_cost(DEFAULT_COST+BRANCH_COST);

  ins_variable_size_depending_on_alignment(true);

  format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
  // Worst case is branch + move + stop, no stop without scheduler.
  size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8);
  ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
  ins_pipe(pipe_class_default);
%}

instruct cmovP_imm(cmpOp cmp, flagsReg crx, iRegPdst dst, immP_0 src) %{
  match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
  ins_cost(DEFAULT_COST+BRANCH_COST);

  ins_variable_size_depending_on_alignment(true);

  format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
  // Worst case is branch + move + stop, no stop without scheduler.
  size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8);
  ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
  ins_pipe(pipe_class_default);
%}

instruct cmovF_reg(cmpOp cmp, flagsReg crx, regF dst, regF src) %{
  match(Set dst (CMoveF (Binary cmp crx) (Binary dst src)));
  ins_cost(DEFAULT_COST+BRANCH_COST);

  ins_variable_size_depending_on_alignment(true);

  format %{ "CMOVEF  $cmp, $crx, $dst, $src\n\t" %}
  // Worst case is branch + move + stop, no stop without scheduler.
  size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmovef);
    Label done;
    assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
    // Branch if not (cmp crx).
    __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done);
    __ fmr($dst$$FloatRegister, $src$$FloatRegister);
    // TODO PPC port __ endgroup_if_needed(_size == 12);
    __ bind(done);
  %}
  ins_pipe(pipe_class_default);
%}

instruct cmovD_reg(cmpOp cmp, flagsReg crx, regD dst, regD src) %{
  match(Set dst (CMoveD (Binary cmp crx) (Binary dst src)));
  ins_cost(DEFAULT_COST+BRANCH_COST);

  ins_variable_size_depending_on_alignment(true);

  format %{ "CMOVEF  $cmp, $crx, $dst, $src\n\t" %}
  // Worst case is branch + move + stop, no stop without scheduler.
  size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmovef);
    Label done;
    assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
    // Branch if not (cmp crx).
    __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done);
    __ fmr($dst$$FloatRegister, $src$$FloatRegister);
    // TODO PPC port __ endgroup_if_needed(_size == 12);
    __ bind(done);
  %}
  ins_pipe(pipe_class_default);
%}

//----------Conditional_store--------------------------------------------------
// Conditional-store of the updated heap-top.
// Used during allocation of the shared heap.
// Sets flags (EQ) on success. Implemented with a CASA on Sparc.

// As compareAndSwapL, but return flag register instead of boolean value in
// int register.
// Used by sun/misc/AtomicLongCSImpl.java.
// Mem_ptr must be a memory operand, else this node does not get
// Flag_needs_anti_dependence_check set by adlc. If this is not set this node
// can be rematerialized which leads to errors.
instruct storeLConditional_regP_regL_regL(flagsReg crx, indirect mem_ptr, iRegLsrc oldVal, iRegLsrc newVal) %{
  match(Set crx (StoreLConditional mem_ptr (Binary oldVal newVal)));
  format %{ "CMPXCHGD if ($crx = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %}
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    __ cmpxchgd($crx$$CondRegister, R0, $oldVal$$Register, $newVal$$Register, $mem_ptr$$Register,
                MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
                noreg, NULL, true);
  %}
  ins_pipe(pipe_class_default);
%}

// As compareAndSwapP, but return flag register instead of boolean value in
// int register.
// This instruction is matched if UseTLAB is off.
// Mem_ptr must be a memory operand, else this node does not get
// Flag_needs_anti_dependence_check set by adlc. If this is not set this node
// can be rematerialized which leads to errors.
instruct storePConditional_regP_regP_regP(flagsReg crx, indirect mem_ptr, iRegPsrc oldVal, iRegPsrc newVal) %{
  match(Set crx (StorePConditional mem_ptr (Binary oldVal newVal)));
  format %{ "CMPXCHGD if ($crx = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %}
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    __ cmpxchgd($crx$$CondRegister, R0, $oldVal$$Register, $newVal$$Register, $mem_ptr$$Register,
                MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
                noreg, NULL, true);
  %}
  ins_pipe(pipe_class_default);
%}

// Implement LoadPLocked. Must be ordered against changes of the memory location
// by storePConditional.
// Don't know whether this is ever used.
instruct loadPLocked(iRegPdst dst, memory mem) %{
  match(Set dst (LoadPLocked mem));
  ins_cost(MEMORY_REF_COST);

  format %{ "LD      $dst, $mem \t// loadPLocked\n\t"
            "TWI     $dst\n\t"
            "ISYNC" %}
  size(12);
  ins_encode( enc_ld_ac(dst, mem) );
  ins_pipe(pipe_class_memory);
%}

//----------Compare-And-Swap---------------------------------------------------

// CompareAndSwap{P,I,L} have more than one output, therefore "CmpI
// (CompareAndSwap ...)" or "If (CmpI (CompareAndSwap ..))"  cannot be
// matched.

instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2) %{
  match(Set res (CompareAndSwapI mem_ptr (Binary src1 src2)));
  format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
  // Variable size: instruction count smaller if regs are disjoint.
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
    __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
                MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(),
                $res$$Register, true);
  %}
  ins_pipe(pipe_class_default);
%}

instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2) %{
  match(Set res (CompareAndSwapN mem_ptr (Binary src1 src2)));
  format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
  // Variable size: instruction count smaller if regs are disjoint.
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
    __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
                MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(),
                $res$$Register, true);
  %}
  ins_pipe(pipe_class_default);
%}

instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2) %{
  match(Set res (CompareAndSwapL mem_ptr (Binary src1 src2)));
  format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool" %}
  // Variable size: instruction count smaller if regs are disjoint.
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
    __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
                MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(),
                $res$$Register, NULL, true);
  %}
  ins_pipe(pipe_class_default);
%}

instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2) %{
  match(Set res (CompareAndSwapP mem_ptr (Binary src1 src2)));
  format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
  // Variable size: instruction count smaller if regs are disjoint.
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
    __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
                MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(),
                $res$$Register, NULL, true);
  %}
  ins_pipe(pipe_class_default);
%}

instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{
  match(Set res (GetAndAddI mem_ptr src));
  format %{ "GetAndAddI $res, $mem_ptr, $src" %}
  // Variable size: instruction count smaller if regs are disjoint.
  ins_encode( enc_GetAndAddI(res, mem_ptr, src) );
  ins_pipe(pipe_class_default);
%}

instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{
  match(Set res (GetAndAddL mem_ptr src));
  format %{ "GetAndAddL $res, $mem_ptr, $src" %}
  // Variable size: instruction count smaller if regs are disjoint.
  ins_encode( enc_GetAndAddL(res, mem_ptr, src) );
  ins_pipe(pipe_class_default);
%}

instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{
  match(Set res (GetAndSetI mem_ptr src));
  format %{ "GetAndSetI $res, $mem_ptr, $src" %}
  // Variable size: instruction count smaller if regs are disjoint.
  ins_encode( enc_GetAndSetI(res, mem_ptr, src) );
  ins_pipe(pipe_class_default);
%}

instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{
  match(Set res (GetAndSetL mem_ptr src));
  format %{ "GetAndSetL $res, $mem_ptr, $src" %}
  // Variable size: instruction count smaller if regs are disjoint.
  ins_encode( enc_GetAndSetL(res, mem_ptr, src) );
  ins_pipe(pipe_class_default);
%}

instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src) %{
  match(Set res (GetAndSetP mem_ptr src));
  format %{ "GetAndSetP $res, $mem_ptr, $src" %}
  // Variable size: instruction count smaller if regs are disjoint.
  ins_encode( enc_GetAndSetL(res, mem_ptr, src) );
  ins_pipe(pipe_class_default);
%}

instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src) %{
  match(Set res (GetAndSetN mem_ptr src));
  format %{ "GetAndSetN $res, $mem_ptr, $src" %}
  // Variable size: instruction count smaller if regs are disjoint.
  ins_encode( enc_GetAndSetI(res, mem_ptr, src) );
  ins_pipe(pipe_class_default);
%}

//----------Arithmetic Instructions--------------------------------------------
// Addition Instructions

// Register Addition
instruct addI_reg_reg(iRegIdst dst, iRegIsrc_iRegL2Isrc src1, iRegIsrc_iRegL2Isrc src2) %{
  match(Set dst (AddI src1 src2));
  format %{ "ADD     $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_add);
    __ add($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Expand does not work with above instruct. (??)
instruct addI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
  // no match-rule
  effect(DEF dst, USE src1, USE src2);
  format %{ "ADD     $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_add);
    __ add($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

instruct tree_addI_addI_addI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{
  match(Set dst (AddI (AddI (AddI src1 src2) src3) src4));
  ins_cost(DEFAULT_COST*3);

  expand %{
    // FIXME: we should do this in the ideal world.
    iRegIdst tmp1;
    iRegIdst tmp2;
    addI_reg_reg(tmp1, src1, src2);
    addI_reg_reg_2(tmp2, src3, src4); // Adlc complains about addI_reg_reg.
    addI_reg_reg(dst, tmp1, tmp2);
  %}
%}

// Immediate Addition
instruct addI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
  match(Set dst (AddI src1 src2));
  format %{ "ADDI    $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
    __ addi($dst$$Register, $src1$$Register, $src2$$constant);
  %}
  ins_pipe(pipe_class_default);
%}

// Immediate Addition with 16-bit shifted operand
instruct addI_reg_immhi16(iRegIdst dst, iRegIsrc src1, immIhi16 src2) %{
  match(Set dst (AddI src1 src2));
  format %{ "ADDIS   $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addis);
    __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16);
  %}
  ins_pipe(pipe_class_default);
%}

// Long Addition
instruct addL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
  match(Set dst (AddL src1 src2));
  format %{ "ADD     $dst, $src1, $src2 \t// long" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_add);
    __ add($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Expand does not work with above instruct. (??)
instruct addL_reg_reg_2(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
  // no match-rule
  effect(DEF dst, USE src1, USE src2);
  format %{ "ADD     $dst, $src1, $src2 \t// long" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_add);
    __ add($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

instruct tree_addL_addL_addL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2, iRegLsrc src3, iRegLsrc src4) %{
  match(Set dst (AddL (AddL (AddL src1 src2) src3) src4));
  ins_cost(DEFAULT_COST*3);

  expand %{
    // FIXME: we should do this in the ideal world.
    iRegLdst tmp1;
    iRegLdst tmp2;
    addL_reg_reg(tmp1, src1, src2);
    addL_reg_reg_2(tmp2, src3, src4); // Adlc complains about orI_reg_reg.
    addL_reg_reg(dst, tmp1, tmp2);
  %}
%}

// AddL + ConvL2I.
instruct addI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
  match(Set dst (ConvL2I (AddL src1 src2)));

  format %{ "ADD     $dst, $src1, $src2 \t// long + l2i" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_add);
    __ add($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// No constant pool entries required.
instruct addL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
  match(Set dst (AddL src1 src2));

  format %{ "ADDI    $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
    __ addi($dst$$Register, $src1$$Register, $src2$$constant);
  %}
  ins_pipe(pipe_class_default);
%}

// Long Immediate Addition with 16-bit shifted operand.
// No constant pool entries required.
instruct addL_reg_immhi16(iRegLdst dst, iRegLsrc src1, immL32hi16 src2) %{
  match(Set dst (AddL src1 src2));

  format %{ "ADDIS   $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addis);
    __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16);
  %}
  ins_pipe(pipe_class_default);
%}

// Pointer Register Addition
instruct addP_reg_reg(iRegPdst dst, iRegP_N2P src1, iRegLsrc src2) %{
  match(Set dst (AddP src1 src2));
  format %{ "ADD     $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_add);
    __ add($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Pointer Immediate Addition
// No constant pool entries required.
instruct addP_reg_imm16(iRegPdst dst, iRegP_N2P src1, immL16 src2) %{
  match(Set dst (AddP src1 src2));

  format %{ "ADDI    $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
    __ addi($dst$$Register, $src1$$Register, $src2$$constant);
  %}
  ins_pipe(pipe_class_default);
%}

// Pointer Immediate Addition with 16-bit shifted operand.
// No constant pool entries required.
instruct addP_reg_immhi16(iRegPdst dst, iRegP_N2P src1, immL32hi16 src2) %{
  match(Set dst (AddP src1 src2));

  format %{ "ADDIS   $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addis);
    __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16);
  %}
  ins_pipe(pipe_class_default);
%}

//---------------------
// Subtraction Instructions

// Register Subtraction
instruct subI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
  match(Set dst (SubI src1 src2));
  format %{ "SUBF    $dst, $src2, $src1" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_subf);
    __ subf($dst$$Register, $src2$$Register, $src1$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Immediate Subtraction
// The compiler converts "x-c0" into "x+ -c0" (see SubINode::Ideal),
// so this rule seems to be unused.
instruct subI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
  match(Set dst (SubI src1 src2));
  format %{ "SUBI    $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
    __ addi($dst$$Register, $src1$$Register, ($src2$$constant) * (-1));
  %}
  ins_pipe(pipe_class_default);
%}

// SubI from constant (using subfic).
instruct subI_imm16_reg(iRegIdst dst, immI16 src1, iRegIsrc src2) %{
  match(Set dst (SubI src1 src2));
  format %{ "SUBI    $dst, $src1, $src2" %}

  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_subfic);
    __ subfic($dst$$Register, $src2$$Register, $src1$$constant);
  %}
  ins_pipe(pipe_class_default);
%}

// Turn the sign-bit of an integer into a 32-bit mask, 0x0...0 for
// positive integers and 0xF...F for negative ones.
instruct signmask32I_regI(iRegIdst dst, iRegIsrc src) %{
  // no match-rule, false predicate
  effect(DEF dst, USE src);
  predicate(false);

  format %{ "SRAWI   $dst, $src, #31" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_srawi);
    __ srawi($dst$$Register, $src$$Register, 0x1f);
  %}
  ins_pipe(pipe_class_default);
%}

instruct absI_reg_Ex(iRegIdst dst, iRegIsrc src) %{
  match(Set dst (AbsI src));
  ins_cost(DEFAULT_COST*3);

  expand %{
    iRegIdst tmp1;
    iRegIdst tmp2;
    signmask32I_regI(tmp1, src);
    xorI_reg_reg(tmp2, tmp1, src);
    subI_reg_reg(dst, tmp2, tmp1);
  %}
%}

instruct negI_regI(iRegIdst dst, immI_0 zero, iRegIsrc src2) %{
  match(Set dst (SubI zero src2));
  format %{ "NEG     $dst, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_neg);
    __ neg($dst$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Long subtraction
instruct subL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
  match(Set dst (SubL src1 src2));
  format %{ "SUBF    $dst, $src2, $src1 \t// long" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_subf);
    __ subf($dst$$Register, $src2$$Register, $src1$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// SubL + convL2I.
instruct subI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
  match(Set dst (ConvL2I (SubL src1 src2)));

  format %{ "SUBF    $dst, $src2, $src1 \t// long + l2i" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_subf);
    __ subf($dst$$Register, $src2$$Register, $src1$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Immediate Subtraction
// The compiler converts "x-c0" into "x+ -c0" (see SubLNode::Ideal),
// so this rule seems to be unused.
// No constant pool entries required.
instruct subL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
  match(Set dst (SubL src1 src2));

  format %{ "SUBI    $dst, $src1, $src2 \t// long" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
    __ addi($dst$$Register, $src1$$Register, ($src2$$constant) * (-1));
  %}
  ins_pipe(pipe_class_default);
%}

// Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for
// positive longs and 0xF...F for negative ones.
instruct signmask64I_regL(iRegIdst dst, iRegLsrc src) %{
  // no match-rule, false predicate
  effect(DEF dst, USE src);
  predicate(false);

  format %{ "SRADI   $dst, $src, #63" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_sradi);
    __ sradi($dst$$Register, $src$$Register, 0x3f);
  %}
  ins_pipe(pipe_class_default);
%}

// Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for
// positive longs and 0xF...F for negative ones.
instruct signmask64L_regL(iRegLdst dst, iRegLsrc src) %{
  // no match-rule, false predicate
  effect(DEF dst, USE src);
  predicate(false);

  format %{ "SRADI   $dst, $src, #63" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_sradi);
    __ sradi($dst$$Register, $src$$Register, 0x3f);
  %}
  ins_pipe(pipe_class_default);
%}

// Long negation
instruct negL_reg_reg(iRegLdst dst, immL_0 zero, iRegLsrc src2) %{
  match(Set dst (SubL zero src2));
  format %{ "NEG     $dst, $src2 \t// long" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_neg);
    __ neg($dst$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// NegL + ConvL2I.
instruct negI_con0_regL(iRegIdst dst, immL_0 zero, iRegLsrc src2) %{
  match(Set dst (ConvL2I (SubL zero src2)));

  format %{ "NEG     $dst, $src2 \t// long + l2i" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_neg);
    __ neg($dst$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Multiplication Instructions
// Integer Multiplication

// Register Multiplication
instruct mulI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
  match(Set dst (MulI src1 src2));
  ins_cost(DEFAULT_COST);

  format %{ "MULLW   $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_mullw);
    __ mullw($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Immediate Multiplication
instruct mulI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
  match(Set dst (MulI src1 src2));
  ins_cost(DEFAULT_COST);

  format %{ "MULLI   $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_mulli);
    __ mulli($dst$$Register, $src1$$Register, $src2$$constant);
  %}
  ins_pipe(pipe_class_default);
%}

instruct mulL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
  match(Set dst (MulL src1 src2));
  ins_cost(DEFAULT_COST);

  format %{ "MULLD   $dst $src1, $src2 \t// long" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_mulld);
    __ mulld($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Multiply high for optimized long division by constant.
instruct mulHighL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
  match(Set dst (MulHiL src1 src2));
  ins_cost(DEFAULT_COST);

  format %{ "MULHD   $dst $src1, $src2 \t// long" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_mulhd);
    __ mulhd($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Immediate Multiplication
instruct mulL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
  match(Set dst (MulL src1 src2));
  ins_cost(DEFAULT_COST);

  format %{ "MULLI   $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_mulli);
    __ mulli($dst$$Register, $src1$$Register, $src2$$constant);
  %}
  ins_pipe(pipe_class_default);
%}

// Integer Division with Immediate -1: Negate.
instruct divI_reg_immIvalueMinus1(iRegIdst dst, iRegIsrc src1, immI_minus1 src2) %{
  match(Set dst (DivI src1 src2));
  ins_cost(DEFAULT_COST);

  format %{ "NEG     $dst, $src1 \t// /-1" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_neg);
    __ neg($dst$$Register, $src1$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Integer Division with constant, but not -1.
// We should be able to improve this by checking the type of src2.
// It might well be that src2 is known to be positive.
instruct divI_reg_regnotMinus1(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
  match(Set dst (DivI src1 src2));
  predicate(n->in(2)->find_int_con(-1) != -1); // src2 is a constant, but not -1
  ins_cost(2*DEFAULT_COST);

  format %{ "DIVW    $dst, $src1, $src2 \t// /not-1" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_divw);
    __ divw($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

instruct cmovI_bne_negI_reg(iRegIdst dst, flagsReg crx, iRegIsrc src1) %{
  effect(USE_DEF dst, USE src1, USE crx);
  predicate(false);

  ins_variable_size_depending_on_alignment(true);

  format %{ "CMOVE   $dst, neg($src1), $crx" %}
  // Worst case is branch + move + stop, no stop without scheduler.
  size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
    Label done;
    __ bne($crx$$CondRegister, done);
    __ neg($dst$$Register, $src1$$Register);
    // TODO PPC port __ endgroup_if_needed(_size == 12);
    __ bind(done);
  %}
  ins_pipe(pipe_class_default);
%}

// Integer Division with Registers not containing constants.
instruct divI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
  match(Set dst (DivI src1 src2));
  ins_cost(10*DEFAULT_COST);

  expand %{
    immI16 imm %{ (int)-1 %}
    flagsReg tmp1;
    cmpI_reg_imm16(tmp1, src2, imm);          // check src2 == -1
    divI_reg_regnotMinus1(dst, src1, src2);   // dst = src1 / src2
    cmovI_bne_negI_reg(dst, tmp1, src1);      // cmove dst = neg(src1) if src2 == -1
  %}
%}

// Long Division with Immediate -1: Negate.
instruct divL_reg_immLvalueMinus1(iRegLdst dst, iRegLsrc src1, immL_minus1 src2) %{
  match(Set dst (DivL src1 src2));
  ins_cost(DEFAULT_COST);

  format %{ "NEG     $dst, $src1 \t// /-1, long" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_neg);
    __ neg($dst$$Register, $src1$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Long Division with constant, but not -1.
instruct divL_reg_regnotMinus1(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
  match(Set dst (DivL src1 src2));
  predicate(n->in(2)->find_long_con(-1L) != -1L); // Src2 is a constant, but not -1.
  ins_cost(2*DEFAULT_COST);

  format %{ "DIVD    $dst, $src1, $src2 \t// /not-1, long" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_divd);
    __ divd($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

instruct cmovL_bne_negL_reg(iRegLdst dst, flagsReg crx, iRegLsrc src1) %{
  effect(USE_DEF dst, USE src1, USE crx);
  predicate(false);

  ins_variable_size_depending_on_alignment(true);

  format %{ "CMOVE   $dst, neg($src1), $crx" %}
  // Worst case is branch + move + stop, no stop without scheduler.
  size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
    Label done;
    __ bne($crx$$CondRegister, done);
    __ neg($dst$$Register, $src1$$Register);
    // TODO PPC port __ endgroup_if_needed(_size == 12);
    __ bind(done);
  %}
  ins_pipe(pipe_class_default);
%}

// Long Division with Registers not containing constants.
instruct divL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
  match(Set dst (DivL src1 src2));
  ins_cost(10*DEFAULT_COST);

  expand %{
    immL16 imm %{ (int)-1 %}
    flagsReg tmp1;
    cmpL_reg_imm16(tmp1, src2, imm);          // check src2 == -1
    divL_reg_regnotMinus1(dst, src1, src2);   // dst = src1 / src2
    cmovL_bne_negL_reg(dst, tmp1, src1);      // cmove dst = neg(src1) if src2 == -1
  %}
%}

// Integer Remainder with registers.
instruct modI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
  match(Set dst (ModI src1 src2));
  ins_cost(10*DEFAULT_COST);

  expand %{
    immI16 imm %{ (int)-1 %}
    flagsReg tmp1;
    iRegIdst tmp2;
    iRegIdst tmp3;
    cmpI_reg_imm16(tmp1, src2, imm);           // check src2 == -1
    divI_reg_regnotMinus1(tmp2, src1, src2);   // tmp2 = src1 / src2
    cmovI_bne_negI_reg(tmp2, tmp1, src1);      // cmove tmp2 = neg(src1) if src2 == -1
    mulI_reg_reg(tmp3, src2, tmp2);            // tmp3 = src2 * tmp2
    subI_reg_reg(dst, src1, tmp3);             // dst = src1 - tmp3
  %}
%}

// Long Remainder with registers
instruct modL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
  match(Set dst (ModL src1 src2));
  ins_cost(10*DEFAULT_COST);

  expand %{
    immL16 imm %{ (int)-1 %}
    flagsReg tmp1;
    iRegLdst tmp2;
    iRegLdst tmp3;
    cmpL_reg_imm16(tmp1, src2, imm);             // check src2 == -1
    divL_reg_regnotMinus1(tmp2, src1, src2);     // tmp2 = src1 / src2
    cmovL_bne_negL_reg(tmp2, tmp1, src1);        // cmove tmp2 = neg(src1) if src2 == -1
    mulL_reg_reg(tmp3, src2, tmp2);              // tmp3 = src2 * tmp2
    subL_reg_reg(dst, src1, tmp3);               // dst = src1 - tmp3
  %}
%}

// Integer Shift Instructions

// Register Shift Left

// Clear all but the lowest #mask bits.
// Used to normalize shift amounts in registers.
instruct maskI_reg_imm(iRegIdst dst, iRegIsrc src, uimmI6 mask) %{
  // no match-rule, false predicate
  effect(DEF dst, USE src, USE mask);
  predicate(false);

  format %{ "MASK    $dst, $src, $mask \t// clear $mask upper bits" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
    __ clrldi($dst$$Register, $src$$Register, $mask$$constant);
  %}
  ins_pipe(pipe_class_default);
%}

instruct lShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
  // no match-rule, false predicate
  effect(DEF dst, USE src1, USE src2);
  predicate(false);

  format %{ "SLW     $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_slw);
    __ slw($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

instruct lShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
  match(Set dst (LShiftI src1 src2));
  ins_cost(DEFAULT_COST*2);
  expand %{
    uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %}
    iRegIdst tmpI;
    maskI_reg_imm(tmpI, src2, mask);
    lShiftI_reg_reg(dst, src1, tmpI);
  %}
%}

// Register Shift Left Immediate
instruct lShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{
  match(Set dst (LShiftI src1 src2));

  format %{ "SLWI    $dst, $src1, ($src2 & 0x1f)" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm);
    __ slwi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f);
  %}
  ins_pipe(pipe_class_default);
%}

// AndI with negpow2-constant + LShiftI
instruct lShiftI_andI_immInegpow2_imm5(iRegIdst dst, iRegIsrc src1, immInegpow2 src2, uimmI5 src3) %{
  match(Set dst (LShiftI (AndI src1 src2) src3));
  predicate(UseRotateAndMaskInstructionsPPC64);

  format %{ "RLWINM  $dst, lShiftI(AndI($src1, $src2), $src3)" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm); // FIXME: assert that rlwinm is equal to addi
    long src2      = $src2$$constant;
    long src3      = $src3$$constant;
    long maskbits  = src3 + log2_long((jlong) (julong) (juint) -src2);
    if (maskbits >= 32) {
      __ li($dst$$Register, 0); // addi
    } else {
      __ rlwinm($dst$$Register, $src1$$Register, src3 & 0x1f, 0, (31-maskbits) & 0x1f);
    }
  %}
  ins_pipe(pipe_class_default);
%}

// RShiftI + AndI with negpow2-constant + LShiftI
instruct lShiftI_andI_immInegpow2_rShiftI_imm5(iRegIdst dst, iRegIsrc src1, immInegpow2 src2, uimmI5 src3) %{
  match(Set dst (LShiftI (AndI (RShiftI src1 src3) src2) src3));
  predicate(UseRotateAndMaskInstructionsPPC64);

  format %{ "RLWINM  $dst, lShiftI(AndI(RShiftI($src1, $src3), $src2), $src3)" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm); // FIXME: assert that rlwinm is equal to addi
    long src2      = $src2$$constant;
    long src3      = $src3$$constant;
    long maskbits  = src3 + log2_long((jlong) (julong) (juint) -src2);
    if (maskbits >= 32) {
      __ li($dst$$Register, 0); // addi
    } else {
      __ rlwinm($dst$$Register, $src1$$Register, 0, 0, (31-maskbits) & 0x1f);
    }
  %}
  ins_pipe(pipe_class_default);
%}

instruct lShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
  // no match-rule, false predicate
  effect(DEF dst, USE src1, USE src2);
  predicate(false);

  format %{ "SLD     $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_sld);
    __ sld($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Register Shift Left
instruct lShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
  match(Set dst (LShiftL src1 src2));
  ins_cost(DEFAULT_COST*2);
  expand %{
    uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %}
    iRegIdst tmpI;
    maskI_reg_imm(tmpI, src2, mask);
    lShiftL_regL_regI(dst, src1, tmpI);
  %}
%}

// Register Shift Left Immediate
instruct lshiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{
  match(Set dst (LShiftL src1 src2));
  format %{ "SLDI    $dst, $src1, ($src2 & 0x3f)" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldicr);
    __ sldi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
  %}
  ins_pipe(pipe_class_default);
%}

// If we shift more than 32 bits, we need not convert I2L.
instruct lShiftL_regI_immGE32(iRegLdst dst, iRegIsrc src1, uimmI6_ge32 src2) %{
  match(Set dst (LShiftL (ConvI2L src1) src2));
  ins_cost(DEFAULT_COST);

  size(4);
  format %{ "SLDI    $dst, i2l($src1), $src2" %}
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldicr);
    __ sldi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
  %}
  ins_pipe(pipe_class_default);
%}

// Shift a postivie int to the left.
// Clrlsldi clears the upper 32 bits and shifts.
instruct scaledPositiveI2L_lShiftL_convI2L_reg_imm6(iRegLdst dst, iRegIsrc src1, uimmI6 src2) %{
  match(Set dst (LShiftL (ConvI2L src1) src2));
  predicate(((ConvI2LNode*)(_kids[0]->_leaf))->type()->is_long()->is_positive_int());

  format %{ "SLDI    $dst, i2l(positive_int($src1)), $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldic);
    __ clrlsldi($dst$$Register, $src1$$Register, 0x20, $src2$$constant);
  %}
  ins_pipe(pipe_class_default);
%}

instruct arShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
  // no match-rule, false predicate
  effect(DEF dst, USE src1, USE src2);
  predicate(false);

  format %{ "SRAW    $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_sraw);
    __ sraw($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Register Arithmetic Shift Right
instruct arShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
  match(Set dst (RShiftI src1 src2));
  ins_cost(DEFAULT_COST*2);
  expand %{
    uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %}
    iRegIdst tmpI;
    maskI_reg_imm(tmpI, src2, mask);
    arShiftI_reg_reg(dst, src1, tmpI);
  %}
%}

// Register Arithmetic Shift Right Immediate
instruct arShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{
  match(Set dst (RShiftI src1 src2));

  format %{ "SRAWI   $dst, $src1, ($src2 & 0x1f)" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_srawi);
    __ srawi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f);
  %}
  ins_pipe(pipe_class_default);
%}

instruct arShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
  // no match-rule, false predicate
  effect(DEF dst, USE src1, USE src2);
  predicate(false);

  format %{ "SRAD    $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_srad);
    __ srad($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Register Shift Right Arithmetic Long
instruct arShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
  match(Set dst (RShiftL src1 src2));
  ins_cost(DEFAULT_COST*2);

  expand %{
    uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %}
    iRegIdst tmpI;
    maskI_reg_imm(tmpI, src2, mask);
    arShiftL_regL_regI(dst, src1, tmpI);
  %}
%}

// Register Shift Right Immediate
instruct arShiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{
  match(Set dst (RShiftL src1 src2));

  format %{ "SRADI   $dst, $src1, ($src2 & 0x3f)" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_sradi);
    __ sradi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
  %}
  ins_pipe(pipe_class_default);
%}

// RShiftL + ConvL2I
instruct convL2I_arShiftL_regL_immI(iRegIdst dst, iRegLsrc src1, immI src2) %{
  match(Set dst (ConvL2I (RShiftL src1 src2)));

  format %{ "SRADI   $dst, $src1, ($src2 & 0x3f) \t// long + l2i" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_sradi);
    __ sradi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
  %}
  ins_pipe(pipe_class_default);
%}

instruct urShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
  // no match-rule, false predicate
  effect(DEF dst, USE src1, USE src2);
  predicate(false);

  format %{ "SRW     $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_srw);
    __ srw($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Register Shift Right
instruct urShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
  match(Set dst (URShiftI src1 src2));
  ins_cost(DEFAULT_COST*2);

  expand %{
    uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %}
    iRegIdst tmpI;
    maskI_reg_imm(tmpI, src2, mask);
    urShiftI_reg_reg(dst, src1, tmpI);
  %}
%}

// Register Shift Right Immediate
instruct urShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{
  match(Set dst (URShiftI src1 src2));

  format %{ "SRWI    $dst, $src1, ($src2 & 0x1f)" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm);
    __ srwi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f);
  %}
  ins_pipe(pipe_class_default);
%}

instruct urShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
  // no match-rule, false predicate
  effect(DEF dst, USE src1, USE src2);
  predicate(false);

  format %{ "SRD     $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_srd);
    __ srd($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Register Shift Right
instruct urShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
  match(Set dst (URShiftL src1 src2));
  ins_cost(DEFAULT_COST*2);

  expand %{
    uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %}
    iRegIdst tmpI;
    maskI_reg_imm(tmpI, src2, mask);
    urShiftL_regL_regI(dst, src1, tmpI);
  %}
%}

// Register Shift Right Immediate
instruct urShiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{
  match(Set dst (URShiftL src1 src2));

  format %{ "SRDI    $dst, $src1, ($src2 & 0x3f)" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
    __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
  %}
  ins_pipe(pipe_class_default);
%}

// URShiftL + ConvL2I.
instruct convL2I_urShiftL_regL_immI(iRegIdst dst, iRegLsrc src1, immI src2) %{
  match(Set dst (ConvL2I (URShiftL src1 src2)));

  format %{ "SRDI    $dst, $src1, ($src2 & 0x3f) \t// long + l2i" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
    __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
  %}
  ins_pipe(pipe_class_default);
%}

// Register Shift Right Immediate with a CastP2X
instruct shrP_convP2X_reg_imm6(iRegLdst dst, iRegP_N2P src1, uimmI6 src2) %{
  match(Set dst (URShiftL (CastP2X src1) src2));

  format %{ "SRDI    $dst, $src1, $src2 \t// Cast ptr $src1 to long and shift" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
    __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
  %}
  ins_pipe(pipe_class_default);
%}

instruct sxtI_reg(iRegIdst dst, iRegIsrc src) %{
  match(Set dst (ConvL2I (ConvI2L src)));

  format %{ "EXTSW   $dst, $src \t// int->int" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_extsw);
    __ extsw($dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

//----------Rotate Instructions------------------------------------------------

// Rotate Left by 8-bit immediate
instruct rotlI_reg_immi8(iRegIdst dst, iRegIsrc src, immI8 lshift, immI8 rshift) %{
  match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift)));
  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));

  format %{ "ROTLWI  $dst, $src, $lshift" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm);
    __ rotlwi($dst$$Register, $src$$Register, $lshift$$constant);
  %}
  ins_pipe(pipe_class_default);
%}

// Rotate Right by 8-bit immediate
instruct rotrI_reg_immi8(iRegIdst dst, iRegIsrc src, immI8 rshift, immI8 lshift) %{
  match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift)));
  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));

  format %{ "ROTRWI  $dst, $rshift" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm);
    __ rotrwi($dst$$Register, $src$$Register, $rshift$$constant);
  %}
  ins_pipe(pipe_class_default);
%}

//----------Floating Point Arithmetic Instructions-----------------------------

// Add float single precision
instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
  match(Set dst (AddF src1 src2));

  format %{ "FADDS   $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fadds);
    __ fadds($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

// Add float double precision
instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
  match(Set dst (AddD src1 src2));

  format %{ "FADD    $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fadd);
    __ fadd($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

// Sub float single precision
instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
  match(Set dst (SubF src1 src2));

  format %{ "FSUBS   $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fsubs);
    __ fsubs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

// Sub float double precision
instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
  match(Set dst (SubD src1 src2));
  format %{ "FSUB    $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fsub);
    __ fsub($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

// Mul float single precision
instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
  match(Set dst (MulF src1 src2));
  format %{ "FMULS   $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fmuls);
    __ fmuls($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

// Mul float double precision
instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
  match(Set dst (MulD src1 src2));
  format %{ "FMUL    $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fmul);
    __ fmul($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

// Div float single precision
instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
  match(Set dst (DivF src1 src2));
  format %{ "FDIVS   $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fdivs);
    __ fdivs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

// Div float double precision
instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
  match(Set dst (DivD src1 src2));
  format %{ "FDIV    $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fdiv);
    __ fdiv($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

// Absolute float single precision
instruct absF_reg(regF dst, regF src) %{
  match(Set dst (AbsF src));
  format %{ "FABS    $dst, $src \t// float" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fabs);
    __ fabs($dst$$FloatRegister, $src$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

// Absolute float double precision
instruct absD_reg(regD dst, regD src) %{
  match(Set dst (AbsD src));
  format %{ "FABS    $dst, $src \t// double" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fabs);
    __ fabs($dst$$FloatRegister, $src$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

instruct negF_reg(regF dst, regF src) %{
  match(Set dst (NegF src));
  format %{ "FNEG    $dst, $src \t// float" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fneg);
    __ fneg($dst$$FloatRegister, $src$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

instruct negD_reg(regD dst, regD src) %{
  match(Set dst (NegD src));
  format %{ "FNEG    $dst, $src \t// double" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fneg);
    __ fneg($dst$$FloatRegister, $src$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

// AbsF + NegF.
instruct negF_absF_reg(regF dst, regF src) %{
  match(Set dst (NegF (AbsF src)));
  format %{ "FNABS   $dst, $src \t// float" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fnabs);
    __ fnabs($dst$$FloatRegister, $src$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

// AbsD + NegD.
instruct negD_absD_reg(regD dst, regD src) %{
  match(Set dst (NegD (AbsD src)));
  format %{ "FNABS   $dst, $src \t// double" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fnabs);
    __ fnabs($dst$$FloatRegister, $src$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

// VM_Version::has_fsqrt() decides if this node will be used.
// Sqrt float double precision
instruct sqrtD_reg(regD dst, regD src) %{
  match(Set dst (SqrtD src));
  format %{ "FSQRT   $dst, $src" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fsqrt);
    __ fsqrt($dst$$FloatRegister, $src$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

// Single-precision sqrt.
instruct sqrtF_reg(regF dst, regF src) %{
  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
  predicate(VM_Version::has_fsqrts());
  ins_cost(DEFAULT_COST);

  format %{ "FSQRTS  $dst, $src" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fsqrts);
    __ fsqrts($dst$$FloatRegister, $src$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

instruct roundDouble_nop(regD dst) %{
  match(Set dst (RoundDouble dst));
  ins_cost(0);

  format %{ " -- \t// RoundDouble not needed - empty" %}
  size(0);
  // PPC results are already "rounded" (i.e., normal-format IEEE).
  ins_encode( /*empty*/ );
  ins_pipe(pipe_class_default);
%}

instruct roundFloat_nop(regF dst) %{
  match(Set dst (RoundFloat dst));
  ins_cost(0);

  format %{ " -- \t// RoundFloat not needed - empty" %}
  size(0);
  // PPC results are already "rounded" (i.e., normal-format IEEE).
  ins_encode( /*empty*/ );
  ins_pipe(pipe_class_default);
%}

//----------Logical Instructions-----------------------------------------------

// And Instructions

// Register And
instruct andI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
  match(Set dst (AndI src1 src2));
  format %{ "AND     $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_and);
    __ andr($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Immediate And
instruct andI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2, flagsRegCR0 cr0) %{
  match(Set dst (AndI src1 src2));
  effect(KILL cr0);

  format %{ "ANDI    $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_andi_);
    // FIXME: avoid andi_ ?
    __ andi_($dst$$Register, $src1$$Register, $src2$$constant);
  %}
  ins_pipe(pipe_class_default);
%}

// Immediate And where the immediate is a negative power of 2.
instruct andI_reg_immInegpow2(iRegIdst dst, iRegIsrc src1, immInegpow2 src2) %{
  match(Set dst (AndI src1 src2));
  format %{ "ANDWI   $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldicr);
    __ clrrdi($dst$$Register, $src1$$Register, log2_long((jlong)(julong)(juint)-($src2$$constant)));
  %}
  ins_pipe(pipe_class_default);
%}

instruct andI_reg_immIpow2minus1(iRegIdst dst, iRegIsrc src1, immIpow2minus1 src2) %{
  match(Set dst (AndI src1 src2));
  format %{ "ANDWI   $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
    __ clrldi($dst$$Register, $src1$$Register, 64-log2_long((((jlong) $src2$$constant)+1)));
  %}
  ins_pipe(pipe_class_default);
%}

instruct andI_reg_immIpowerOf2(iRegIdst dst, iRegIsrc src1, immIpowerOf2 src2) %{
  match(Set dst (AndI src1 src2));
  predicate(UseRotateAndMaskInstructionsPPC64);
  format %{ "ANDWI   $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm);
    __ rlwinm($dst$$Register, $src1$$Register, 0,
              (31-log2_long((jlong) $src2$$constant)) & 0x1f, (31-log2_long((jlong) $src2$$constant)) & 0x1f);
  %}
  ins_pipe(pipe_class_default);
%}

// Register And Long
instruct andL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
  match(Set dst (AndL src1 src2));
  ins_cost(DEFAULT_COST);

  format %{ "AND     $dst, $src1, $src2 \t// long" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_and);
    __ andr($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Immediate And long
instruct andL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2, flagsRegCR0 cr0) %{
  match(Set dst (AndL src1 src2));
  effect(KILL cr0);
  ins_cost(DEFAULT_COST);

  format %{ "ANDI    $dst, $src1, $src2 \t// long" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_andi_);
    // FIXME: avoid andi_ ?
    __ andi_($dst$$Register, $src1$$Register, $src2$$constant);
  %}
  ins_pipe(pipe_class_default);
%}

// Immediate And Long where the immediate is a negative power of 2.
instruct andL_reg_immLnegpow2(iRegLdst dst, iRegLsrc src1, immLnegpow2 src2) %{
  match(Set dst (AndL src1 src2));
  format %{ "ANDDI   $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldicr);
    __ clrrdi($dst$$Register, $src1$$Register, log2_long((jlong)-$src2$$constant));
  %}
  ins_pipe(pipe_class_default);
%}

instruct andL_reg_immLpow2minus1(iRegLdst dst, iRegLsrc src1, immLpow2minus1 src2) %{
  match(Set dst (AndL src1 src2));
  format %{ "ANDDI   $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
    __ clrldi($dst$$Register, $src1$$Register, 64-log2_long((((jlong) $src2$$constant)+1)));
  %}
  ins_pipe(pipe_class_default);
%}

// AndL + ConvL2I.
instruct convL2I_andL_reg_immLpow2minus1(iRegIdst dst, iRegLsrc src1, immLpow2minus1 src2) %{
  match(Set dst (ConvL2I (AndL src1 src2)));
  ins_cost(DEFAULT_COST);

  format %{ "ANDDI   $dst, $src1, $src2 \t// long + l2i" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
    __ clrldi($dst$$Register, $src1$$Register, 64-log2_long((((jlong) $src2$$constant)+1)));
  %}
  ins_pipe(pipe_class_default);
%}

// Or Instructions

// Register Or
instruct orI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
  match(Set dst (OrI src1 src2));
  format %{ "OR      $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_or);
    __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Expand does not work with above instruct. (??)
instruct orI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
  // no match-rule
  effect(DEF dst, USE src1, USE src2);
  format %{ "OR      $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_or);
    __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

instruct tree_orI_orI_orI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{
  match(Set dst (OrI (OrI (OrI src1 src2) src3) src4));
  ins_cost(DEFAULT_COST*3);

  expand %{
    // FIXME: we should do this in the ideal world.
    iRegIdst tmp1;
    iRegIdst tmp2;
    orI_reg_reg(tmp1, src1, src2);
    orI_reg_reg_2(tmp2, src3, src4); // Adlc complains about orI_reg_reg.
    orI_reg_reg(dst, tmp1, tmp2);
  %}
%}

// Immediate Or
instruct orI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2) %{
  match(Set dst (OrI src1 src2));
  format %{ "ORI     $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_ori);
    __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF);
  %}
  ins_pipe(pipe_class_default);
%}

// Register Or Long
instruct orL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
  match(Set dst (OrL src1 src2));
  ins_cost(DEFAULT_COST);

  size(4);
  format %{ "OR      $dst, $src1, $src2 \t// long" %}
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_or);
    __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// OrL + ConvL2I.
instruct orI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
  match(Set dst (ConvL2I (OrL src1 src2)));
  ins_cost(DEFAULT_COST);

  format %{ "OR      $dst, $src1, $src2 \t// long + l2i" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_or);
    __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Immediate Or long
instruct orL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 con) %{
  match(Set dst (OrL src1 con));
  ins_cost(DEFAULT_COST);

  format %{ "ORI     $dst, $src1, $con \t// long" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_ori);
    __ ori($dst$$Register, $src1$$Register, ($con$$constant) & 0xFFFF);
  %}
  ins_pipe(pipe_class_default);
%}

// Xor Instructions

// Register Xor
instruct xorI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
  match(Set dst (XorI src1 src2));
  format %{ "XOR     $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_xor);
    __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Expand does not work with above instruct. (??)
instruct xorI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
  // no match-rule
  effect(DEF dst, USE src1, USE src2);
  format %{ "XOR     $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_xor);
    __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

instruct tree_xorI_xorI_xorI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{
  match(Set dst (XorI (XorI (XorI src1 src2) src3) src4));
  ins_cost(DEFAULT_COST*3);

  expand %{
    // FIXME: we should do this in the ideal world.
    iRegIdst tmp1;
    iRegIdst tmp2;
    xorI_reg_reg(tmp1, src1, src2);
    xorI_reg_reg_2(tmp2, src3, src4); // Adlc complains about xorI_reg_reg.
    xorI_reg_reg(dst, tmp1, tmp2);
  %}
%}

// Immediate Xor
instruct xorI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2) %{
  match(Set dst (XorI src1 src2));
  format %{ "XORI    $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_xori);
    __ xori($dst$$Register, $src1$$Register, $src2$$constant);
  %}
  ins_pipe(pipe_class_default);
%}

// Register Xor Long
instruct xorL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
  match(Set dst (XorL src1 src2));
  ins_cost(DEFAULT_COST);

  format %{ "XOR     $dst, $src1, $src2 \t// long" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_xor);
    __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// XorL + ConvL2I.
instruct xorI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
  match(Set dst (ConvL2I (XorL src1 src2)));
  ins_cost(DEFAULT_COST);

  format %{ "XOR     $dst, $src1, $src2 \t// long + l2i" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_xor);
    __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Immediate Xor Long
instruct xorL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2) %{
  match(Set dst (XorL src1 src2));
  ins_cost(DEFAULT_COST);

  format %{ "XORI    $dst, $src1, $src2 \t// long" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_xori);
    __ xori($dst$$Register, $src1$$Register, $src2$$constant);
  %}
  ins_pipe(pipe_class_default);
%}

instruct notI_reg(iRegIdst dst, iRegIsrc src1, immI_minus1 src2) %{
  match(Set dst (XorI src1 src2));
  ins_cost(DEFAULT_COST);

  format %{ "NOT     $dst, $src1 ($src2)" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_nor);
    __ nor($dst$$Register, $src1$$Register, $src1$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

instruct notL_reg(iRegLdst dst, iRegLsrc src1, immL_minus1 src2) %{
  match(Set dst (XorL src1 src2));
  ins_cost(DEFAULT_COST);

  format %{ "NOT     $dst, $src1 ($src2) \t// long" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_nor);
    __ nor($dst$$Register, $src1$$Register, $src1$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// And-complement
instruct andcI_reg_reg(iRegIdst dst, iRegIsrc src1, immI_minus1 src2, iRegIsrc src3) %{
  match(Set dst (AndI (XorI src1 src2) src3));
  ins_cost(DEFAULT_COST);

  format %{ "ANDW    $dst, xori($src1, $src2), $src3" %}
  size(4);
  ins_encode( enc_andc(dst, src3, src1) );
  ins_pipe(pipe_class_default);
%}

// And-complement
instruct andcL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
  // no match-rule, false predicate
  effect(DEF dst, USE src1, USE src2);
  predicate(false);

  format %{ "ANDC    $dst, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_andc);
    __ andc($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

//----------Moves between int/long and float/double----------------------------
//
// The following rules move values from int/long registers/stack-locations
// to float/double registers/stack-locations and vice versa, without doing any
// conversions. These rules are used to implement the bit-conversion methods
// of java.lang.Float etc., e.g.
//   int   floatToIntBits(float value)
//   float intBitsToFloat(int bits)
//
// Notes on the implementation on ppc64:
// We only provide rules which move between a register and a stack-location,
// because we always have to go through memory when moving between a float
// register and an integer register.

//---------- Chain stack slots between similar types --------

// These are needed so that the rules below can match.

// Load integer from stack slot
instruct stkI_to_regI(iRegIdst dst, stackSlotI src) %{
  match(Set dst src);
  ins_cost(MEMORY_REF_COST);

  format %{ "LWZ     $dst, $src" %}
  size(4);
  ins_encode( enc_lwz(dst, src) );
  ins_pipe(pipe_class_memory);
%}

// Store integer to stack slot
instruct regI_to_stkI(stackSlotI dst, iRegIsrc src) %{
  match(Set dst src);
  ins_cost(MEMORY_REF_COST);

  format %{ "STW     $src, $dst \t// stk" %}
  size(4);
  ins_encode( enc_stw(src, dst) ); // rs=rt
  ins_pipe(pipe_class_memory);
%}

// Load long from stack slot
instruct stkL_to_regL(iRegLdst dst, stackSlotL src) %{
  match(Set dst src);
  ins_cost(MEMORY_REF_COST);

  format %{ "LD      $dst, $src \t// long" %}
  size(4);
  ins_encode( enc_ld(dst, src) );
  ins_pipe(pipe_class_memory);
%}

// Store long to stack slot
instruct regL_to_stkL(stackSlotL dst, iRegLsrc src) %{
  match(Set dst src);
  ins_cost(MEMORY_REF_COST);

  format %{ "STD     $src, $dst \t// long" %}
  size(4);
  ins_encode( enc_std(src, dst) ); // rs=rt
  ins_pipe(pipe_class_memory);
%}

//----------Moves between int and float

// Move float value from float stack-location to integer register.
instruct moveF2I_stack_reg(iRegIdst dst, stackSlotF src) %{
  match(Set dst (MoveF2I src));
  ins_cost(MEMORY_REF_COST);

  format %{ "LWZ     $dst, $src \t// MoveF2I" %}
  size(4);
  ins_encode( enc_lwz(dst, src) );
  ins_pipe(pipe_class_memory);
%}

// Move float value from float register to integer stack-location.
instruct moveF2I_reg_stack(stackSlotI dst, regF src) %{
  match(Set dst (MoveF2I src));
  ins_cost(MEMORY_REF_COST);

  format %{ "STFS    $src, $dst \t// MoveF2I" %}
  size(4);
  ins_encode( enc_stfs(src, dst) );
  ins_pipe(pipe_class_memory);
%}

// Move integer value from integer stack-location to float register.
instruct moveI2F_stack_reg(regF dst, stackSlotI src) %{
  match(Set dst (MoveI2F src));
  ins_cost(MEMORY_REF_COST);

  format %{ "LFS     $dst, $src \t// MoveI2F" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_lfs);
    int Idisp = $src$$disp + frame_slots_bias($src$$base, ra_);
    __ lfs($dst$$FloatRegister, Idisp, $src$$base$$Register);
  %}
  ins_pipe(pipe_class_memory);
%}

// Move integer value from integer register to float stack-location.
instruct moveI2F_reg_stack(stackSlotF dst, iRegIsrc src) %{
  match(Set dst (MoveI2F src));
  ins_cost(MEMORY_REF_COST);

  format %{ "STW     $src, $dst \t// MoveI2F" %}
  size(4);
  ins_encode( enc_stw(src, dst) );
  ins_pipe(pipe_class_memory);
%}

//----------Moves between long and float

instruct moveF2L_reg_stack(stackSlotL dst, regF src) %{
  // no match-rule, false predicate
  effect(DEF dst, USE src);
  predicate(false);

  format %{ "storeD  $src, $dst \t// STACK" %}
  size(4);
  ins_encode( enc_stfd(src, dst) );
  ins_pipe(pipe_class_default);
%}

//----------Moves between long and double

// Move double value from double stack-location to long register.
instruct moveD2L_stack_reg(iRegLdst dst, stackSlotD src) %{
  match(Set dst (MoveD2L src));
  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "LD      $dst, $src \t// MoveD2L" %}
  ins_encode( enc_ld(dst, src) );
  ins_pipe(pipe_class_memory);
%}

// Move double value from double register to long stack-location.
instruct moveD2L_reg_stack(stackSlotL dst, regD src) %{
  match(Set dst (MoveD2L src));
  effect(DEF dst, USE src);
  ins_cost(MEMORY_REF_COST);

  format %{ "STFD    $src, $dst \t// MoveD2L" %}
  size(4);
  ins_encode( enc_stfd(src, dst) );
  ins_pipe(pipe_class_memory);
%}

// Move long value from long stack-location to double register.
instruct moveL2D_stack_reg(regD dst, stackSlotL src) %{
  match(Set dst (MoveL2D src));
  ins_cost(MEMORY_REF_COST);

  format %{ "LFD     $dst, $src \t// MoveL2D" %}
  size(4);
  ins_encode( enc_lfd(dst, src) );
  ins_pipe(pipe_class_memory);
%}

// Move long value from long register to double stack-location.
instruct moveL2D_reg_stack(stackSlotD dst, iRegLsrc src) %{
  match(Set dst (MoveL2D src));
  ins_cost(MEMORY_REF_COST);

  format %{ "STD     $src, $dst \t// MoveL2D" %}
  size(4);
  ins_encode( enc_std(src, dst) );
  ins_pipe(pipe_class_memory);
%}

//----------Register Move Instructions-----------------------------------------

// Replicate for Superword

instruct moveReg(iRegLdst dst, iRegIsrc src) %{
  predicate(false);
  effect(DEF dst, USE src);

  format %{ "MR      $dst, $src \t// replicate " %}
  // variable size, 0 or 4.
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_or);
    __ mr_if_needed($dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

//----------Cast instructions (Java-level type cast)---------------------------

// Cast Long to Pointer for unsafe natives.
instruct castX2P(iRegPdst dst, iRegLsrc src) %{
  match(Set dst (CastX2P src));

  format %{ "MR      $dst, $src \t// Long->Ptr" %}
  // variable size, 0 or 4.
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_or);
    __ mr_if_needed($dst$$Register, $src$$Register);
  %}
 ins_pipe(pipe_class_default);
%}

// Cast Pointer to Long for unsafe natives.
instruct castP2X(iRegLdst dst, iRegP_N2P src) %{
  match(Set dst (CastP2X src));

  format %{ "MR      $dst, $src \t// Ptr->Long" %}
  // variable size, 0 or 4.
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_or);
    __ mr_if_needed($dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

instruct castPP(iRegPdst dst) %{
  match(Set dst (CastPP dst));
  format %{ " -- \t// castPP of $dst" %}
  size(0);
  ins_encode( /*empty*/ );
  ins_pipe(pipe_class_default);
%}

instruct castII(iRegIdst dst) %{
  match(Set dst (CastII dst));
  format %{ " -- \t// castII of $dst" %}
  size(0);
  ins_encode( /*empty*/ );
  ins_pipe(pipe_class_default);
%}

instruct checkCastPP(iRegPdst dst) %{
  match(Set dst (CheckCastPP dst));
  format %{ " -- \t// checkcastPP of $dst" %}
  size(0);
  ins_encode( /*empty*/ );
  ins_pipe(pipe_class_default);
%}

//----------Convert instructions-----------------------------------------------

// Convert to boolean.

// int_to_bool(src) : { 1   if src != 0
//                    { 0   else
//
// strategy:
// 1) Count leading zeros of 32 bit-value src,
//    this returns 32 (0b10.0000) iff src == 0 and <32 otherwise.
// 2) Shift 5 bits to the right, result is 0b1 iff src == 0, 0b0 otherwise.
// 3) Xori the result to get 0b1 if src != 0 and 0b0 if src == 0.

// convI2Bool
instruct convI2Bool_reg__cntlz_Ex(iRegIdst dst, iRegIsrc src) %{
  match(Set dst (Conv2B src));
  predicate(UseCountLeadingZerosInstructionsPPC64);
  ins_cost(DEFAULT_COST);

  expand %{
    immI shiftAmount %{ 0x5 %}
    uimmI16 mask %{ 0x1 %}
    iRegIdst tmp1;
    iRegIdst tmp2;
    countLeadingZerosI(tmp1, src);
    urShiftI_reg_imm(tmp2, tmp1, shiftAmount);
    xorI_reg_uimm16(dst, tmp2, mask);
  %}
%}

instruct convI2Bool_reg__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx) %{
  match(Set dst (Conv2B src));
  effect(TEMP crx);
  predicate(!UseCountLeadingZerosInstructionsPPC64);
  ins_cost(DEFAULT_COST);

  format %{ "CMPWI   $crx, $src, #0 \t// convI2B"
            "LI      $dst, #0\n\t"
            "BEQ     $crx, done\n\t"
            "LI      $dst, #1\n"
            "done:" %}
  size(16);
  ins_encode( enc_convI2B_regI__cmove(dst, src, crx, 0x0, 0x1) );
  ins_pipe(pipe_class_compare);
%}

// ConvI2B + XorI
instruct xorI_convI2Bool_reg_immIvalue1__cntlz_Ex(iRegIdst dst, iRegIsrc src, immI_1 mask) %{
  match(Set dst (XorI (Conv2B src) mask));
  predicate(UseCountLeadingZerosInstructionsPPC64);
  ins_cost(DEFAULT_COST);

  expand %{
    immI shiftAmount %{ 0x5 %}
    iRegIdst tmp1;
    countLeadingZerosI(tmp1, src);
    urShiftI_reg_imm(dst, tmp1, shiftAmount);
  %}
%}

instruct xorI_convI2Bool_reg_immIvalue1__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx, immI_1 mask) %{
  match(Set dst (XorI (Conv2B src) mask));
  effect(TEMP crx);
  predicate(!UseCountLeadingZerosInstructionsPPC64);
  ins_cost(DEFAULT_COST);

  format %{ "CMPWI   $crx, $src, #0 \t// Xor(convI2B($src), $mask)"
            "LI      $dst, #1\n\t"
            "BEQ     $crx, done\n\t"
            "LI      $dst, #0\n"
            "done:" %}
  size(16);
  ins_encode( enc_convI2B_regI__cmove(dst, src, crx, 0x1, 0x0) );
  ins_pipe(pipe_class_compare);
%}

// AndI 0b0..010..0 + ConvI2B
instruct convI2Bool_andI_reg_immIpowerOf2(iRegIdst dst, iRegIsrc src, immIpowerOf2 mask) %{
  match(Set dst (Conv2B (AndI src mask)));
  predicate(UseRotateAndMaskInstructionsPPC64);
  ins_cost(DEFAULT_COST);

  format %{ "RLWINM  $dst, $src, $mask \t// convI2B(AndI($src, $mask))" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm);
    __ rlwinm($dst$$Register, $src$$Register, (32-log2_long((jlong)$mask$$constant)) & 0x1f, 31, 31);
  %}
  ins_pipe(pipe_class_default);
%}

// Convert pointer to boolean.
//
// ptr_to_bool(src) : { 1   if src != 0
//                    { 0   else
//
// strategy:
// 1) Count leading zeros of 64 bit-value src,
//    this returns 64 (0b100.0000) iff src == 0 and <64 otherwise.
// 2) Shift 6 bits to the right, result is 0b1 iff src == 0, 0b0 otherwise.
// 3) Xori the result to get 0b1 if src != 0 and 0b0 if src == 0.

// ConvP2B
instruct convP2Bool_reg__cntlz_Ex(iRegIdst dst, iRegP_N2P src) %{
  match(Set dst (Conv2B src));
  predicate(UseCountLeadingZerosInstructionsPPC64);
  ins_cost(DEFAULT_COST);

  expand %{
    immI shiftAmount %{ 0x6 %}
    uimmI16 mask %{ 0x1 %}
    iRegIdst tmp1;
    iRegIdst tmp2;
    countLeadingZerosP(tmp1, src);
    urShiftI_reg_imm(tmp2, tmp1, shiftAmount);
    xorI_reg_uimm16(dst, tmp2, mask);
  %}
%}

instruct convP2Bool_reg__cmove(iRegIdst dst, iRegP_N2P src, flagsReg crx) %{
  match(Set dst (Conv2B src));
  effect(TEMP crx);
  predicate(!UseCountLeadingZerosInstructionsPPC64);
  ins_cost(DEFAULT_COST);

  format %{ "CMPDI   $crx, $src, #0 \t// convP2B"
            "LI      $dst, #0\n\t"
            "BEQ     $crx, done\n\t"
            "LI      $dst, #1\n"
            "done:" %}
  size(16);
  ins_encode( enc_convP2B_regP__cmove(dst, src, crx, 0x0, 0x1) );
  ins_pipe(pipe_class_compare);
%}

// ConvP2B + XorI
instruct xorI_convP2Bool_reg__cntlz_Ex(iRegIdst dst, iRegP_N2P src, immI_1 mask) %{
  match(Set dst (XorI (Conv2B src) mask));
  predicate(UseCountLeadingZerosInstructionsPPC64);
  ins_cost(DEFAULT_COST);

  expand %{
    immI shiftAmount %{ 0x6 %}
    iRegIdst tmp1;
    countLeadingZerosP(tmp1, src);
    urShiftI_reg_imm(dst, tmp1, shiftAmount);
  %}
%}

instruct xorI_convP2Bool_reg_immIvalue1__cmove(iRegIdst dst, iRegP_N2P src, flagsReg crx, immI_1 mask) %{
  match(Set dst (XorI (Conv2B src) mask));
  effect(TEMP crx);
  predicate(!UseCountLeadingZerosInstructionsPPC64);
  ins_cost(DEFAULT_COST);

  format %{ "CMPDI   $crx, $src, #0 \t// XorI(convP2B($src), $mask)"
            "LI      $dst, #1\n\t"
            "BEQ     $crx, done\n\t"
            "LI      $dst, #0\n"
            "done:" %}
  size(16);
  ins_encode( enc_convP2B_regP__cmove(dst, src, crx, 0x1, 0x0) );
  ins_pipe(pipe_class_compare);
%}

// if src1 < src2, return -1 else return 0
instruct cmpLTMask_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
  match(Set dst (CmpLTMask src1 src2));
  ins_cost(DEFAULT_COST*4);

  expand %{
    iRegLdst src1s;
    iRegLdst src2s;
    iRegLdst diff;
    convI2L_reg(src1s, src1); // Ensure proper sign extension.
    convI2L_reg(src2s, src2); // Ensure proper sign extension.
    subL_reg_reg(diff, src1s, src2s);
    // Need to consider >=33 bit result, therefore we need signmaskL.
    signmask64I_regL(dst, diff);
  %}
%}

instruct cmpLTMask_reg_immI0(iRegIdst dst, iRegIsrc src1, immI_0 src2) %{
  match(Set dst (CmpLTMask src1 src2)); // if src1 < src2, return -1 else return 0
  format %{ "SRAWI   $dst, $src1, $src2 \t// CmpLTMask" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_srawi);
    __ srawi($dst$$Register, $src1$$Register, 0x1f);
  %}
  ins_pipe(pipe_class_default);
%}

//----------Arithmetic Conversion Instructions---------------------------------

// Convert to Byte  -- nop
// Convert to Short -- nop

// Convert to Int

instruct convB2I_reg(iRegIdst dst, iRegIsrc src, immI_24 amount) %{
  match(Set dst (RShiftI (LShiftI src amount) amount));
  format %{ "EXTSB   $dst, $src \t// byte->int" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_extsb);
    __ extsb($dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// LShiftI 16 + RShiftI 16 converts short to int.
instruct convS2I_reg(iRegIdst dst, iRegIsrc src, immI_16 amount) %{
  match(Set dst (RShiftI (LShiftI src amount) amount));
  format %{ "EXTSH   $dst, $src \t// short->int" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_extsh);
    __ extsh($dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// ConvL2I + ConvI2L: Sign extend int in long register.
instruct sxtI_L2L_reg(iRegLdst dst, iRegLsrc src) %{
  match(Set dst (ConvI2L (ConvL2I src)));

  format %{ "EXTSW   $dst, $src \t// long->long" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_extsw);
    __ extsw($dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

instruct convL2I_reg(iRegIdst dst, iRegLsrc src) %{
  match(Set dst (ConvL2I src));
  format %{ "MR      $dst, $src \t// long->int" %}
  // variable size, 0 or 4
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_or);
    __ mr_if_needed($dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

instruct convD2IRaw_regD(regD dst, regD src) %{
  // no match-rule, false predicate
  effect(DEF dst, USE src);
  predicate(false);

  format %{ "FCTIWZ $dst, $src \t// convD2I, $src != NaN" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fctiwz);;
    __ fctiwz($dst$$FloatRegister, $src$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

instruct cmovI_bso_stackSlotL(iRegIdst dst, flagsReg crx, stackSlotL src) %{
  // no match-rule, false predicate
  effect(DEF dst, USE crx, USE src);
  predicate(false);

  ins_variable_size_depending_on_alignment(true);

  format %{ "cmovI   $crx, $dst, $src" %}
  // Worst case is branch + move + stop, no stop without scheduler.
  size(false /* TODO: PPC PORT(InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8);
  ins_encode( enc_cmove_bso_stackSlotL(dst, crx, src) );
  ins_pipe(pipe_class_default);
%}

instruct cmovI_bso_stackSlotL_conLvalue0_Ex(iRegIdst dst, flagsReg crx, stackSlotL mem) %{
  // no match-rule, false predicate
  effect(DEF dst, USE crx, USE mem);
  predicate(false);

  format %{ "CmovI   $dst, $crx, $mem \t// postalloc expanded" %}
  postalloc_expand %{
    //
    // replaces
    //
    //   region  dst  crx  mem
    //    \       |    |   /
    //     dst=cmovI_bso_stackSlotL_conLvalue0
    //
    // with
    //
    //   region  dst
    //    \       /
    //     dst=loadConI16(0)
    //      |
    //      ^  region  dst  crx  mem
    //      |   \       |    |    /
    //      dst=cmovI_bso_stackSlotL
    //

    // Create new nodes.
    MachNode *m1 = new (C) loadConI16Node();
    MachNode *m2 = new (C) cmovI_bso_stackSlotLNode();

    // inputs for new nodes
    m1->add_req(n_region);
    m2->add_req(n_region, n_crx, n_mem);

    // precedences for new nodes
    m2->add_prec(m1);

    // operands for new nodes
    m1->_opnds[0] = op_dst;
    m1->_opnds[1] = new (C) immI16Oper(0);

    m2->_opnds[0] = op_dst;
    m2->_opnds[1] = op_crx;
    m2->_opnds[2] = op_mem;

    // registers for new nodes
    ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
    ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst

    // Insert new nodes.
    nodes->push(m1);
    nodes->push(m2);
  %}
%}

// Double to Int conversion, NaN is mapped to 0.
instruct convD2I_reg_ExEx(iRegIdst dst, regD src) %{
  match(Set dst (ConvD2I src));
  ins_cost(DEFAULT_COST);

  expand %{
    regD tmpD;
    stackSlotL tmpS;
    flagsReg crx;
    cmpDUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
    convD2IRaw_regD(tmpD, src);                         // Convert float to int (speculated).
    moveD2L_reg_stack(tmpS, tmpD);                      // Store float to stack (speculated).
    cmovI_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
  %}
%}

instruct convF2IRaw_regF(regF dst, regF src) %{
  // no match-rule, false predicate
  effect(DEF dst, USE src);
  predicate(false);

  format %{ "FCTIWZ $dst, $src \t// convF2I, $src != NaN" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fctiwz);
    __ fctiwz($dst$$FloatRegister, $src$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

// Float to Int conversion, NaN is mapped to 0.
instruct convF2I_regF_ExEx(iRegIdst dst, regF src) %{
  match(Set dst (ConvF2I src));
  ins_cost(DEFAULT_COST);

  expand %{
    regF tmpF;
    stackSlotL tmpS;
    flagsReg crx;
    cmpFUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
    convF2IRaw_regF(tmpF, src);                         // Convert float to int (speculated).
    moveF2L_reg_stack(tmpS, tmpF);                      // Store float to stack (speculated).
    cmovI_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
  %}
%}

// Convert to Long

instruct convI2L_reg(iRegLdst dst, iRegIsrc src) %{
  match(Set dst (ConvI2L src));
  format %{ "EXTSW   $dst, $src \t// int->long" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_extsw);
    __ extsw($dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Zero-extend: convert unsigned int to long (convUI2L).
instruct zeroExtendL_regI(iRegLdst dst, iRegIsrc src, immL_32bits mask) %{
  match(Set dst (AndL (ConvI2L src) mask));
  ins_cost(DEFAULT_COST);

  format %{ "CLRLDI  $dst, $src, #32 \t// zero-extend int to long" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
    __ clrldi($dst$$Register, $src$$Register, 32);
  %}
  ins_pipe(pipe_class_default);
%}

// Zero-extend: convert unsigned int to long in long register.
instruct zeroExtendL_regL(iRegLdst dst, iRegLsrc src, immL_32bits mask) %{
  match(Set dst (AndL src mask));
  ins_cost(DEFAULT_COST);

  format %{ "CLRLDI  $dst, $src, #32 \t// zero-extend int to long" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldicl);
    __ clrldi($dst$$Register, $src$$Register, 32);
  %}
  ins_pipe(pipe_class_default);
%}

instruct convF2LRaw_regF(regF dst, regF src) %{
  // no match-rule, false predicate
  effect(DEF dst, USE src);
  predicate(false);

  format %{ "FCTIDZ $dst, $src \t// convF2L, $src != NaN" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fctiwz);
    __ fctidz($dst$$FloatRegister, $src$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

instruct cmovL_bso_stackSlotL(iRegLdst dst, flagsReg crx, stackSlotL src) %{
  // no match-rule, false predicate
  effect(DEF dst, USE crx, USE src);
  predicate(false);

  ins_variable_size_depending_on_alignment(true);

  format %{ "cmovL   $crx, $dst, $src" %}
  // Worst case is branch + move + stop, no stop without scheduler.
  size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8);
  ins_encode( enc_cmove_bso_stackSlotL(dst, crx, src) );
  ins_pipe(pipe_class_default);
%}

instruct cmovL_bso_stackSlotL_conLvalue0_Ex(iRegLdst dst, flagsReg crx, stackSlotL mem) %{
  // no match-rule, false predicate
  effect(DEF dst, USE crx, USE mem);
  predicate(false);

  format %{ "CmovL   $dst, $crx, $mem \t// postalloc expanded" %}
  postalloc_expand %{
    //
    // replaces
    //
    //   region  dst  crx  mem
    //    \       |    |   /
    //     dst=cmovL_bso_stackSlotL_conLvalue0
    //
    // with
    //
    //   region  dst
    //    \       /
    //     dst=loadConL16(0)
    //      |
    //      ^  region  dst  crx  mem
    //      |   \       |    |    /
    //      dst=cmovL_bso_stackSlotL
    //

    // Create new nodes.
    MachNode *m1 = new (C) loadConL16Node();
    MachNode *m2 = new (C) cmovL_bso_stackSlotLNode();

    // inputs for new nodes
    m1->add_req(n_region);
    m2->add_req(n_region, n_crx, n_mem);
    m2->add_prec(m1);

    // operands for new nodes
    m1->_opnds[0] = op_dst;
    m1->_opnds[1] = new (C) immL16Oper(0);
    m2->_opnds[0] = op_dst;
    m2->_opnds[1] = op_crx;
    m2->_opnds[2] = op_mem;

    // registers for new nodes
    ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
    ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst

    // Insert new nodes.
    nodes->push(m1);
    nodes->push(m2);
  %}
%}

// Float to Long conversion, NaN is mapped to 0.
instruct convF2L_reg_ExEx(iRegLdst dst, regF src) %{
  match(Set dst (ConvF2L src));
  ins_cost(DEFAULT_COST);

  expand %{
    regF tmpF;
    stackSlotL tmpS;
    flagsReg crx;
    cmpFUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
    convF2LRaw_regF(tmpF, src);                         // Convert float to long (speculated).
    moveF2L_reg_stack(tmpS, tmpF);                      // Store float to stack (speculated).
    cmovL_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
  %}
%}

instruct convD2LRaw_regD(regD dst, regD src) %{
  // no match-rule, false predicate
  effect(DEF dst, USE src);
  predicate(false);

  format %{ "FCTIDZ $dst, $src \t// convD2L $src != NaN" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fctiwz);
    __ fctidz($dst$$FloatRegister, $src$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

// Double to Long conversion, NaN is mapped to 0.
instruct convD2L_reg_ExEx(iRegLdst dst, regD src) %{
  match(Set dst (ConvD2L src));
  ins_cost(DEFAULT_COST);

  expand %{
    regD tmpD;
    stackSlotL tmpS;
    flagsReg crx;
    cmpDUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
    convD2LRaw_regD(tmpD, src);                         // Convert float to long (speculated).
    moveD2L_reg_stack(tmpS, tmpD);                      // Store float to stack (speculated).
    cmovL_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
  %}
%}

// Convert to Float

// Placed here as needed in expand.
instruct convL2DRaw_regD(regD dst, regD src) %{
  // no match-rule, false predicate
  effect(DEF dst, USE src);
  predicate(false);

  format %{ "FCFID $dst, $src \t// convL2D" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fcfid);
    __ fcfid($dst$$FloatRegister, $src$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

// Placed here as needed in expand.
instruct convD2F_reg(regF dst, regD src) %{
  match(Set dst (ConvD2F src));
  format %{ "FRSP    $dst, $src \t// convD2F" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_frsp);
    __ frsp($dst$$FloatRegister, $src$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

// Integer to Float conversion.
instruct convI2F_ireg_Ex(regF dst, iRegIsrc src) %{
  match(Set dst (ConvI2F src));
  predicate(!VM_Version::has_fcfids());
  ins_cost(DEFAULT_COST);

  expand %{
    iRegLdst tmpL;
    stackSlotL tmpS;
    regD tmpD;
    regD tmpD2;
    convI2L_reg(tmpL, src);              // Sign-extension int to long.
    regL_to_stkL(tmpS, tmpL);            // Store long to stack.
    moveL2D_stack_reg(tmpD, tmpS);       // Load long into double register.
    convL2DRaw_regD(tmpD2, tmpD);        // Convert to double.
    convD2F_reg(dst, tmpD2);             // Convert double to float.
  %}
%}

instruct convL2FRaw_regF(regF dst, regD src) %{
  // no match-rule, false predicate
  effect(DEF dst, USE src);
  predicate(false);

  format %{ "FCFIDS $dst, $src \t// convL2F" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fcfid);
    __ fcfids($dst$$FloatRegister, $src$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

// Integer to Float conversion. Special version for Power7.
instruct convI2F_ireg_fcfids_Ex(regF dst, iRegIsrc src) %{
  match(Set dst (ConvI2F src));
  predicate(VM_Version::has_fcfids());
  ins_cost(DEFAULT_COST);

  expand %{
    iRegLdst tmpL;
    stackSlotL tmpS;
    regD tmpD;
    convI2L_reg(tmpL, src);              // Sign-extension int to long.
    regL_to_stkL(tmpS, tmpL);            // Store long to stack.
    moveL2D_stack_reg(tmpD, tmpS);       // Load long into double register.
    convL2FRaw_regF(dst, tmpD);          // Convert to float.
  %}
%}

// L2F to avoid runtime call.
instruct convL2F_ireg_fcfids_Ex(regF dst, iRegLsrc src) %{
  match(Set dst (ConvL2F src));
  predicate(VM_Version::has_fcfids());
  ins_cost(DEFAULT_COST);

  expand %{
    stackSlotL tmpS;
    regD tmpD;
    regL_to_stkL(tmpS, src);             // Store long to stack.
    moveL2D_stack_reg(tmpD, tmpS);       // Load long into double register.
    convL2FRaw_regF(dst, tmpD);          // Convert to float.
  %}
%}

// Moved up as used in expand.
//instruct convD2F_reg(regF dst, regD src) %{%}

// Convert to Double

// Integer to Double conversion.
instruct convI2D_reg_Ex(regD dst, iRegIsrc src) %{
  match(Set dst (ConvI2D src));
  ins_cost(DEFAULT_COST);

  expand %{
    iRegLdst tmpL;
    stackSlotL tmpS;
    regD tmpD;
    convI2L_reg(tmpL, src);              // Sign-extension int to long.
    regL_to_stkL(tmpS, tmpL);            // Store long to stack.
    moveL2D_stack_reg(tmpD, tmpS);       // Load long into double register.
    convL2DRaw_regD(dst, tmpD);          // Convert to double.
  %}
%}

// Long to Double conversion
instruct convL2D_reg_Ex(regD dst, stackSlotL src) %{
  match(Set dst (ConvL2D src));
  ins_cost(DEFAULT_COST + MEMORY_REF_COST);

  expand %{
    regD tmpD;
    moveL2D_stack_reg(tmpD, src);
    convL2DRaw_regD(dst, tmpD);
  %}
%}

instruct convF2D_reg(regD dst, regF src) %{
  match(Set dst (ConvF2D src));
  format %{ "FMR     $dst, $src \t// float->double" %}
  // variable size, 0 or 4
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fmr);
    __ fmr_if_needed($dst$$FloatRegister, $src$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

//----------Control Flow Instructions------------------------------------------
// Compare Instructions

// Compare Integers
instruct cmpI_reg_reg(flagsReg crx, iRegIsrc src1, iRegIsrc src2) %{
  match(Set crx (CmpI src1 src2));
  size(4);
  format %{ "CMPW    $crx, $src1, $src2" %}
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmp);
    __ cmpw($crx$$CondRegister, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_compare);
%}

instruct cmpI_reg_imm16(flagsReg crx, iRegIsrc src1, immI16 src2) %{
  match(Set crx (CmpI src1 src2));
  format %{ "CMPWI   $crx, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmpi);
    __ cmpwi($crx$$CondRegister, $src1$$Register, $src2$$constant);
  %}
  ins_pipe(pipe_class_compare);
%}

// (src1 & src2) == 0?
instruct testI_reg_imm(flagsRegCR0 cr0, iRegIsrc src1, uimmI16 src2, immI_0 zero) %{
  match(Set cr0 (CmpI (AndI src1 src2) zero));
  // r0 is killed
  format %{ "ANDI    R0, $src1, $src2 \t// BTST int" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_andi_);
    // FIXME: avoid andi_ ?
    __ andi_(R0, $src1$$Register, $src2$$constant);
  %}
  ins_pipe(pipe_class_compare);
%}

instruct cmpL_reg_reg(flagsReg crx, iRegLsrc src1, iRegLsrc src2) %{
  match(Set crx (CmpL src1 src2));
  format %{ "CMPD    $crx, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmp);
    __ cmpd($crx$$CondRegister, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_compare);
%}

instruct cmpL_reg_imm16(flagsReg crx, iRegLsrc src1, immL16 src2) %{
  match(Set crx (CmpL src1 src2));
  format %{ "CMPDI   $crx, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmpi);
    __ cmpdi($crx$$CondRegister, $src1$$Register, $src2$$constant);
  %}
  ins_pipe(pipe_class_compare);
%}

instruct testL_reg_reg(flagsRegCR0 cr0, iRegLsrc src1, iRegLsrc src2, immL_0 zero) %{
  match(Set cr0 (CmpL (AndL src1 src2) zero));
  // r0 is killed
  format %{ "AND     R0, $src1, $src2 \t// BTST long" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_and_);
    __ and_(R0, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_compare);
%}

instruct testL_reg_imm(flagsRegCR0 cr0, iRegLsrc src1, uimmL16 src2, immL_0 zero) %{
  match(Set cr0 (CmpL (AndL src1 src2) zero));
  // r0 is killed
  format %{ "ANDI    R0, $src1, $src2 \t// BTST long" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_andi_);
    // FIXME: avoid andi_ ?
    __ andi_(R0, $src1$$Register, $src2$$constant);
  %}
  ins_pipe(pipe_class_compare);
%}

instruct cmovI_conIvalueMinus1_conIvalue1(iRegIdst dst, flagsReg crx) %{
  // no match-rule, false predicate
  effect(DEF dst, USE crx);
  predicate(false);

  ins_variable_size_depending_on_alignment(true);

  format %{ "cmovI   $crx, $dst, -1, 0, +1" %}
  // Worst case is branch + move + branch + move + stop, no stop without scheduler.
  size(false /* TODO: PPC PORTInsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 20 : 16);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
    Label done;
    // li(Rdst, 0);              // equal -> 0
    __ beq($crx$$CondRegister, done);
    __ li($dst$$Register, 1);    // greater -> +1
    __ bgt($crx$$CondRegister, done);
    __ li($dst$$Register, -1);   // unordered or less -> -1
    // TODO: PPC port__ endgroup_if_needed(_size == 20);
    __ bind(done);
  %}
  ins_pipe(pipe_class_compare);
%}

instruct cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(iRegIdst dst, flagsReg crx) %{
  // no match-rule, false predicate
  effect(DEF dst, USE crx);
  predicate(false);

  format %{ "CmovI    $crx, $dst, -1, 0, +1 \t// postalloc expanded" %}
  postalloc_expand %{
    //
    // replaces
    //
    //   region  crx
    //    \       |
    //     dst=cmovI_conIvalueMinus1_conIvalue0_conIvalue1
    //
    // with
    //
    //   region
    //    \
    //     dst=loadConI16(0)
    //      |
    //      ^  region  crx
    //      |   \       |
    //      dst=cmovI_conIvalueMinus1_conIvalue1
    //

    // Create new nodes.
    MachNode *m1 = new (C) loadConI16Node();
    MachNode *m2 = new (C) cmovI_conIvalueMinus1_conIvalue1Node();

    // inputs for new nodes
    m1->add_req(n_region);
    m2->add_req(n_region, n_crx);
    m2->add_prec(m1);

    // operands for new nodes
    m1->_opnds[0] = op_dst;
    m1->_opnds[1] = new (C) immI16Oper(0);
    m2->_opnds[0] = op_dst;
    m2->_opnds[1] = op_crx;

    // registers for new nodes
    ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
    ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst

    // Insert new nodes.
    nodes->push(m1);
    nodes->push(m2);
  %}
%}

// Manifest a CmpL3 result in an integer register. Very painful.
// This is the test to avoid.
// (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
instruct cmpL3_reg_reg_ExEx(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
  match(Set dst (CmpL3 src1 src2));
  ins_cost(DEFAULT_COST*5+BRANCH_COST);

  expand %{
    flagsReg tmp1;
    cmpL_reg_reg(tmp1, src1, src2);
    cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(dst, tmp1);
  %}
%}

// Implicit range checks.
// A range check in the ideal world has one of the following shapes:
//  - (If le (CmpU length index)), (IfTrue  throw exception)
//  - (If lt (CmpU index length)), (IfFalse throw exception)
//
// Match range check 'If le (CmpU length index)'.
instruct rangeCheck_iReg_uimm15(cmpOp cmp, iRegIsrc src_length, uimmI15 index, label labl) %{
  match(If cmp (CmpU src_length index));
  effect(USE labl);
  predicate(TrapBasedRangeChecks &&
            _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le &&
            PROB_UNLIKELY(_leaf->as_If()->_prob) >= PROB_ALWAYS &&
            (Matcher::branches_to_uncommon_trap(_leaf)));

  ins_is_TrapBasedCheckNode(true);

  format %{ "TWI     $index $cmp $src_length \t// RangeCheck => trap $labl" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_twi);
    if ($cmp$$cmpcode == 0x1 /* less_equal */) {
      __ trap_range_check_le($src_length$$Register, $index$$constant);
    } else {
      // Both successors are uncommon traps, probability is 0.
      // Node got flipped during fixup flow.
      assert($cmp$$cmpcode == 0x9, "must be greater");
      __ trap_range_check_g($src_length$$Register, $index$$constant);
    }
  %}
  ins_pipe(pipe_class_trap);
%}

// Match range check 'If lt (CmpU index length)'.
instruct rangeCheck_iReg_iReg(cmpOp cmp, iRegIsrc src_index, iRegIsrc src_length, label labl) %{
  match(If cmp (CmpU src_index src_length));
  effect(USE labl);
  predicate(TrapBasedRangeChecks &&
            _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt &&
            _leaf->as_If()->_prob >= PROB_ALWAYS &&
            (Matcher::branches_to_uncommon_trap(_leaf)));

  ins_is_TrapBasedCheckNode(true);

  format %{ "TW      $src_index $cmp $src_length \t// RangeCheck => trap $labl" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_tw);
    if ($cmp$$cmpcode == 0x0 /* greater_equal */) {
      __ trap_range_check_ge($src_index$$Register, $src_length$$Register);
    } else {
      // Both successors are uncommon traps, probability is 0.
      // Node got flipped during fixup flow.
      assert($cmp$$cmpcode == 0x8, "must be less");
      __ trap_range_check_l($src_index$$Register, $src_length$$Register);
    }
  %}
  ins_pipe(pipe_class_trap);
%}

// Match range check 'If lt (CmpU index length)'.
instruct rangeCheck_uimm15_iReg(cmpOp cmp, iRegIsrc src_index, uimmI15 length, label labl) %{
  match(If cmp (CmpU src_index length));
  effect(USE labl);
  predicate(TrapBasedRangeChecks &&
            _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt &&
            _leaf->as_If()->_prob >= PROB_ALWAYS &&
            (Matcher::branches_to_uncommon_trap(_leaf)));

  ins_is_TrapBasedCheckNode(true);

  format %{ "TWI     $src_index $cmp $length \t// RangeCheck => trap $labl" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_twi);
    if ($cmp$$cmpcode == 0x0 /* greater_equal */) {
      __ trap_range_check_ge($src_index$$Register, $length$$constant);
    } else {
      // Both successors are uncommon traps, probability is 0.
      // Node got flipped during fixup flow.
      assert($cmp$$cmpcode == 0x8, "must be less");
      __ trap_range_check_l($src_index$$Register, $length$$constant);
    }
  %}
  ins_pipe(pipe_class_trap);
%}

instruct compU_reg_reg(flagsReg crx, iRegIsrc src1, iRegIsrc src2) %{
  match(Set crx (CmpU src1 src2));
  format %{ "CMPLW   $crx, $src1, $src2 \t// unsigned" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmpl);
    __ cmplw($crx$$CondRegister, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_compare);
%}

instruct compU_reg_uimm16(flagsReg crx, iRegIsrc src1, uimmI16 src2) %{
  match(Set crx (CmpU src1 src2));
  size(4);
  format %{ "CMPLWI  $crx, $src1, $src2" %}
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmpli);
    __ cmplwi($crx$$CondRegister, $src1$$Register, $src2$$constant);
  %}
  ins_pipe(pipe_class_compare);
%}

// Implicit zero checks (more implicit null checks).
// No constant pool entries required.
instruct zeroCheckN_iReg_imm0(cmpOp cmp, iRegNsrc value, immN_0 zero, label labl) %{
  match(If cmp (CmpN value zero));
  effect(USE labl);
  predicate(TrapBasedNullChecks &&
            _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne &&
            _leaf->as_If()->_prob >= PROB_LIKELY_MAG(4) &&
            Matcher::branches_to_uncommon_trap(_leaf));
  ins_cost(1);

  ins_is_TrapBasedCheckNode(true);

  format %{ "TDI     $value $cmp $zero \t// ZeroCheckN => trap $labl" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_tdi);
    if ($cmp$$cmpcode == 0xA) {
      __ trap_null_check($value$$Register);
    } else {
      // Both successors are uncommon traps, probability is 0.
      // Node got flipped during fixup flow.
      assert($cmp$$cmpcode == 0x2 , "must be equal(0xA) or notEqual(0x2)");
      __ trap_null_check($value$$Register, Assembler::traptoGreaterThanUnsigned);
    }
  %}
  ins_pipe(pipe_class_trap);
%}

// Compare narrow oops.
instruct cmpN_reg_reg(flagsReg crx, iRegNsrc src1, iRegNsrc src2) %{
  match(Set crx (CmpN src1 src2));

  size(4);
  ins_cost(DEFAULT_COST);
  format %{ "CMPLW   $crx, $src1, $src2 \t// compressed ptr" %}
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmpl);
    __ cmplw($crx$$CondRegister, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_compare);
%}

instruct cmpN_reg_imm0(flagsReg crx, iRegNsrc src1, immN_0 src2) %{
  match(Set crx (CmpN src1 src2));
  // Make this more expensive than zeroCheckN_iReg_imm0.
  ins_cost(DEFAULT_COST);

  format %{ "CMPLWI  $crx, $src1, $src2 \t// compressed ptr" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmpli);
    __ cmplwi($crx$$CondRegister, $src1$$Register, $src2$$constant);
  %}
  ins_pipe(pipe_class_compare);
%}

// Implicit zero checks (more implicit null checks).
// No constant pool entries required.
instruct zeroCheckP_reg_imm0(cmpOp cmp, iRegP_N2P value, immP_0 zero, label labl) %{
  match(If cmp (CmpP value zero));
  effect(USE labl);
  predicate(TrapBasedNullChecks &&
            _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne &&
            _leaf->as_If()->_prob >= PROB_LIKELY_MAG(4) &&
            Matcher::branches_to_uncommon_trap(_leaf));

  ins_is_TrapBasedCheckNode(true);

  format %{ "TDI     $value $cmp $zero \t// ZeroCheckP => trap $labl" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_tdi);
    if ($cmp$$cmpcode == 0xA) {
      __ trap_null_check($value$$Register);
    } else {
      // Both successors are uncommon traps, probability is 0.
      // Node got flipped during fixup flow.
      assert($cmp$$cmpcode == 0x2 , "must be equal(0xA) or notEqual(0x2)");
      __ trap_null_check($value$$Register, Assembler::traptoGreaterThanUnsigned);
    }
  %}
  ins_pipe(pipe_class_trap);
%}

// Compare Pointers
instruct cmpP_reg_reg(flagsReg crx, iRegP_N2P src1, iRegP_N2P src2) %{
  match(Set crx (CmpP src1 src2));
  format %{ "CMPLD   $crx, $src1, $src2 \t// ptr" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmpl);
    __ cmpld($crx$$CondRegister, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(pipe_class_compare);
%}

// Used in postalloc expand.
instruct cmpP_reg_imm16(flagsReg crx, iRegPsrc src1, immL16 src2) %{
  // This match rule prevents reordering of node before a safepoint.
  // This only makes sense if this instructions is used exclusively
  // for the expansion of EncodeP!
  match(Set crx (CmpP src1 src2));
  predicate(false);

  format %{ "CMPDI   $crx, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmpi);
    __ cmpdi($crx$$CondRegister, $src1$$Register, $src2$$constant);
  %}
  ins_pipe(pipe_class_compare);
%}

//----------Float Compares----------------------------------------------------

instruct cmpFUnordered_reg_reg(flagsReg crx, regF src1, regF src2) %{
  // no match-rule, false predicate
  effect(DEF crx, USE src1, USE src2);
  predicate(false);

  format %{ "cmpFUrd $crx, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fcmpu);
    __ fcmpu($crx$$CondRegister, $src1$$FloatRegister, $src2$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

instruct cmov_bns_less(flagsReg crx) %{
  // no match-rule, false predicate
  effect(DEF crx);
  predicate(false);

  ins_variable_size_depending_on_alignment(true);

  format %{ "cmov    $crx" %}
  // Worst case is branch + move + stop, no stop without scheduler.
  size(false /* TODO: PPC PORT(InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 16 : 12);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cmovecr);
    Label done;
    __ bns($crx$$CondRegister, done);        // not unordered -> keep crx
    __ li(R0, 0);
    __ cmpwi($crx$$CondRegister, R0, 1);     // unordered -> set crx to 'less'
    // TODO PPC port __ endgroup_if_needed(_size == 16);
    __ bind(done);
  %}
  ins_pipe(pipe_class_default);
%}

// Compare floating, generate condition code.
instruct cmpF_reg_reg_Ex(flagsReg crx, regF src1, regF src2) %{
  // FIXME: should we match 'If cmp (CmpF src1 src2))' ??
  //
  // The following code sequence occurs a lot in mpegaudio:
  //
  // block BXX:
  // 0: instruct cmpFUnordered_reg_reg (cmpF_reg_reg-0):
  //    cmpFUrd CCR6, F11, F9
  // 4: instruct cmov_bns_less (cmpF_reg_reg-1):
  //    cmov CCR6
  // 8: instruct branchConSched:
  //    B_FARle CCR6, B56  P=0.500000 C=-1.000000
  match(Set crx (CmpF src1 src2));
  ins_cost(DEFAULT_COST+BRANCH_COST);

  format %{ "CmpF    $crx, $src1, $src2 \t// postalloc expanded" %}
  postalloc_expand %{
    //
    // replaces
    //
    //   region  src1  src2
    //    \       |     |
    //     crx=cmpF_reg_reg
    //
    // with
    //
    //   region  src1  src2
    //    \       |     |
    //     crx=cmpFUnordered_reg_reg
    //      |
    //      ^  region
    //      |   \
    //      crx=cmov_bns_less
    //

    // Create new nodes.
    MachNode *m1 = new (C) cmpFUnordered_reg_regNode();
    MachNode *m2 = new (C) cmov_bns_lessNode();

    // inputs for new nodes
    m1->add_req(n_region, n_src1, n_src2);
    m2->add_req(n_region);
    m2->add_prec(m1);

    // operands for new nodes
    m1->_opnds[0] = op_crx;
    m1->_opnds[1] = op_src1;
    m1->_opnds[2] = op_src2;
    m2->_opnds[0] = op_crx;

    // registers for new nodes
    ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
    ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx

    // Insert new nodes.
    nodes->push(m1);
    nodes->push(m2);
  %}
%}

// Compare float, generate -1,0,1
instruct cmpF3_reg_reg_ExEx(iRegIdst dst, regF src1, regF src2) %{
  match(Set dst (CmpF3 src1 src2));
  ins_cost(DEFAULT_COST*5+BRANCH_COST);

  expand %{
    flagsReg tmp1;
    cmpFUnordered_reg_reg(tmp1, src1, src2);
    cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(dst, tmp1);
  %}
%}

instruct cmpDUnordered_reg_reg(flagsReg crx, regD src1, regD src2) %{
  // no match-rule, false predicate
  effect(DEF crx, USE src1, USE src2);
  predicate(false);

  format %{ "cmpFUrd $crx, $src1, $src2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fcmpu);
    __ fcmpu($crx$$CondRegister, $src1$$FloatRegister, $src2$$FloatRegister);
  %}
  ins_pipe(pipe_class_default);
%}

instruct cmpD_reg_reg_Ex(flagsReg crx, regD src1, regD src2) %{
  match(Set crx (CmpD src1 src2));
  ins_cost(DEFAULT_COST+BRANCH_COST);

  format %{ "CmpD    $crx, $src1, $src2 \t// postalloc expanded" %}
  postalloc_expand %{
    //
    // replaces
    //
    //   region  src1  src2
    //    \       |     |
    //     crx=cmpD_reg_reg
    //
    // with
    //
    //   region  src1  src2
    //    \       |     |
    //     crx=cmpDUnordered_reg_reg
    //      |
    //      ^  region
    //      |   \
    //      crx=cmov_bns_less
    //

    // create new nodes
    MachNode *m1 = new (C) cmpDUnordered_reg_regNode();
    MachNode *m2 = new (C) cmov_bns_lessNode();

    // inputs for new nodes
    m1->add_req(n_region, n_src1, n_src2);
    m2->add_req(n_region);
    m2->add_prec(m1);

    // operands for new nodes
    m1->_opnds[0] = op_crx;
    m1->_opnds[1] = op_src1;
    m1->_opnds[2] = op_src2;
    m2->_opnds[0] = op_crx;

    // registers for new nodes
    ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
    ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx

    // Insert new nodes.
    nodes->push(m1);
    nodes->push(m2);
  %}
%}

// Compare double, generate -1,0,1
instruct cmpD3_reg_reg_ExEx(iRegIdst dst, regD src1, regD src2) %{
  match(Set dst (CmpD3 src1 src2));
  ins_cost(DEFAULT_COST*5+BRANCH_COST);

  expand %{
    flagsReg tmp1;
    cmpDUnordered_reg_reg(tmp1, src1, src2);
    cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(dst, tmp1);
  %}
%}

//----------Branches---------------------------------------------------------
// Jump

// Direct Branch.
instruct branch(label labl) %{
  match(Goto);
  effect(USE labl);
  ins_cost(BRANCH_COST);

  format %{ "B       $labl" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_b);
     Label d;    // dummy
     __ bind(d);
     Label* p = $labl$$label;
     // `p' is `NULL' when this encoding class is used only to
     // determine the size of the encoded instruction.
     Label& l = (NULL == p)? d : *(p);
     __ b(l);
  %}
  ins_pipe(pipe_class_default);
%}

// Conditional Near Branch
instruct branchCon(cmpOp cmp, flagsReg crx, label lbl) %{
  // Same match rule as `branchConFar'.
  match(If cmp crx);
  effect(USE lbl);
  ins_cost(BRANCH_COST);

  // If set to 1 this indicates that the current instruction is a
  // short variant of a long branch. This avoids using this
  // instruction in first-pass matching. It will then only be used in
  // the `Shorten_branches' pass.
  ins_short_branch(1);

  format %{ "B$cmp     $crx, $lbl" %}
  size(4);
  ins_encode( enc_bc(crx, cmp, lbl) );
  ins_pipe(pipe_class_default);
%}

// This is for cases when the ppc64 `bc' instruction does not
// reach far enough. So we emit a far branch here, which is more
// expensive.
//
// Conditional Far Branch
instruct branchConFar(cmpOp cmp, flagsReg crx, label lbl) %{
  // Same match rule as `branchCon'.
  match(If cmp crx);
  effect(USE crx, USE lbl);
  predicate(!false /* TODO: PPC port HB_Schedule*/);
  // Higher cost than `branchCon'.
  ins_cost(5*BRANCH_COST);

  // This is not a short variant of a branch, but the long variant.
  ins_short_branch(0);

  format %{ "B_FAR$cmp $crx, $lbl" %}
  size(8);
  ins_encode( enc_bc_far(crx, cmp, lbl) );
  ins_pipe(pipe_class_default);
%}

// Conditional Branch used with Power6 scheduler (can be far or short).
instruct branchConSched(cmpOp cmp, flagsReg crx, label lbl) %{
  // Same match rule as `branchCon'.
  match(If cmp crx);
  effect(USE crx, USE lbl);
  predicate(false /* TODO: PPC port HB_Schedule*/);
  // Higher cost than `branchCon'.
  ins_cost(5*BRANCH_COST);

  // Actually size doesn't depend on alignment but on shortening.
  ins_variable_size_depending_on_alignment(true);
  // long variant.
  ins_short_branch(0);

  format %{ "B_FAR$cmp $crx, $lbl" %}
  size(8); // worst case
  ins_encode( enc_bc_short_far(crx, cmp, lbl) );
  ins_pipe(pipe_class_default);
%}

instruct branchLoopEnd(cmpOp cmp, flagsReg crx, label labl) %{
  match(CountedLoopEnd cmp crx);
  effect(USE labl);
  ins_cost(BRANCH_COST);

  // short variant.
  ins_short_branch(1);

  format %{ "B$cmp     $crx, $labl \t// counted loop end" %}
  size(4);
  ins_encode( enc_bc(crx, cmp, labl) );
  ins_pipe(pipe_class_default);
%}

instruct branchLoopEndFar(cmpOp cmp, flagsReg crx, label labl) %{
  match(CountedLoopEnd cmp crx);
  effect(USE labl);
  predicate(!false /* TODO: PPC port HB_Schedule */);
  ins_cost(BRANCH_COST);

  // Long variant.
  ins_short_branch(0);

  format %{ "B_FAR$cmp $crx, $labl \t// counted loop end" %}
  size(8);
  ins_encode( enc_bc_far(crx, cmp, labl) );
  ins_pipe(pipe_class_default);
%}

// Conditional Branch used with Power6 scheduler (can be far or short).
instruct branchLoopEndSched(cmpOp cmp, flagsReg crx, label labl) %{
  match(CountedLoopEnd cmp crx);
  effect(USE labl);
  predicate(false /* TODO: PPC port HB_Schedule */);
  // Higher cost than `branchCon'.
  ins_cost(5*BRANCH_COST);

  // Actually size doesn't depend on alignment but on shortening.
  ins_variable_size_depending_on_alignment(true);
  // Long variant.
  ins_short_branch(0);

  format %{ "B_FAR$cmp $crx, $labl \t// counted loop end" %}
  size(8); // worst case
  ins_encode( enc_bc_short_far(crx, cmp, labl) );
  ins_pipe(pipe_class_default);
%}

// ============================================================================
// Java runtime operations, intrinsics and other complex operations.

// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass
// array for an instance of the superklass. Set a hidden internal cache on a
// hit (cache is checked with exposed code in gen_subtype_check()). Return
// not zero for a miss or zero for a hit. The encoding ALSO sets flags.
//
// GL TODO: Improve this.
// - result should not be a TEMP
// - Add match rule as on sparc avoiding additional Cmp.
instruct partialSubtypeCheck(iRegPdst result, iRegP_N2P subklass, iRegP_N2P superklass,
                             iRegPdst tmp_klass, iRegPdst tmp_arrayptr) %{
  match(Set result (PartialSubtypeCheck subklass superklass));
  effect(TEMP result, TEMP tmp_klass, TEMP tmp_arrayptr);
  ins_cost(DEFAULT_COST*10);

  format %{ "PartialSubtypeCheck $result = ($subklass instanceOf $superklass) tmp: $tmp_klass, $tmp_arrayptr" %}
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    __ check_klass_subtype_slow_path($subklass$$Register, $superklass$$Register, $tmp_arrayptr$$Register,
                                     $tmp_klass$$Register, NULL, $result$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// inlined locking and unlocking

instruct cmpFastLock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
  match(Set crx (FastLock oop box));
  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
  // TODO PPC port predicate(!UseNewFastLockPPC64 || UseBiasedLocking);

  format %{ "FASTLOCK  $oop, $box, $tmp1, $tmp2, $tmp3" %}
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
                                 $tmp3$$Register, $tmp1$$Register, $tmp2$$Register);
    // If locking was successfull, crx should indicate 'EQ'.
    // The compiler generates a branch to the runtime call to
    // _complete_monitor_locking_Java for the case where crx is 'NE'.
  %}
  ins_pipe(pipe_class_compare);
%}

instruct cmpFastUnlock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
  match(Set crx (FastUnlock oop box));
  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);

  format %{ "FASTUNLOCK  $oop, $box, $tmp1, $tmp2" %}
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
                                   $tmp3$$Register, $tmp1$$Register, $tmp2$$Register);
    // If unlocking was successfull, crx should indicate 'EQ'.
    // The compiler generates a branch to the runtime call to
    // _complete_monitor_unlocking_Java for the case where crx is 'NE'.
  %}
  ins_pipe(pipe_class_compare);
%}

// Align address.
instruct align_addr(iRegPdst dst, iRegPsrc src, immLnegpow2 mask) %{
  match(Set dst (CastX2P (AndL (CastP2X src) mask)));

  format %{ "ANDDI   $dst, $src, $mask \t// next aligned address" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldicr);
    __ clrrdi($dst$$Register, $src$$Register, log2_long((jlong)-$mask$$constant));
  %}
  ins_pipe(pipe_class_default);
%}

// Array size computation.
instruct array_size(iRegLdst dst, iRegPsrc end, iRegPsrc start) %{
  match(Set dst (SubL (CastP2X end) (CastP2X start)));

  format %{ "SUB     $dst, $end, $start \t// array size in bytes" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_subf);
    __ subf($dst$$Register, $start$$Register, $end$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Clear-array with dynamic array-size.
instruct inlineCallClearArray(rarg1RegL cnt, rarg2RegP base, Universe dummy, regCTR ctr) %{
  match(Set dummy (ClearArray cnt base));
  effect(USE_KILL cnt, USE_KILL base, KILL ctr);
  ins_cost(MEMORY_REF_COST);

  ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.

  format %{ "ClearArray $cnt, $base" %}
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    __ clear_memory_doubleword($base$$Register, $cnt$$Register); // kills cnt, base, R0
  %}
  ins_pipe(pipe_class_default);
%}

// String_IndexOf for needle of length 1.
//
// Match needle into immediate operands: no loadConP node needed. Saves one
// register and two instructions over string_indexOf_imm1Node.
//
// Assumes register result differs from all input registers.
//
// Preserves registers haystack, haycnt
// Kills     registers tmp1, tmp2
// Defines   registers result
//
// Use dst register classes if register gets killed, as it is the case for tmp registers!
//
// Unfortunately this does not match too often. In many situations the AddP is used
// by several nodes, even several StrIndexOf nodes, breaking the match tree.
instruct string_indexOf_imm1_char(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
                                  immP needleImm, immL offsetImm, immI_1 needlecntImm,
                                  iRegIdst tmp1, iRegIdst tmp2,
                                  flagsRegCR0 cr0, flagsRegCR1 cr1) %{
  predicate(SpecialStringIndexOf);  // type check implicit by parameter type, See Matcher::match_rule_supported
  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm)));

  effect(TEMP result, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1);

  ins_cost(150);
  format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]"
            "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}

  ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    immPOper *needleOper = (immPOper *)$needleImm;
    const TypeOopPtr *t = needleOper->type()->isa_oopptr();
    ciTypeArray* needle_values = t->const_oop()->as_type_array();  // Pointer to live char *

    __ string_indexof_1($result$$Register,
                        $haystack$$Register, $haycnt$$Register,
                        R0, needle_values->char_at(0),
                        $tmp1$$Register, $tmp2$$Register);
  %}
  ins_pipe(pipe_class_compare);
%}

// String_IndexOf for needle of length 1.
//
// Special case requires less registers and emits less instructions.
//
// Assumes register result differs from all input registers.
//
// Preserves registers haystack, haycnt
// Kills     registers tmp1, tmp2, needle
// Defines   registers result
//
// Use dst register classes if register gets killed, as it is the case for tmp registers!
instruct string_indexOf_imm1(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
                             rscratch2RegP needle, immI_1 needlecntImm,
                             iRegIdst tmp1, iRegIdst tmp2,
                             flagsRegCR0 cr0, flagsRegCR1 cr1) %{
  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
  effect(USE_KILL needle, /* TDEF needle, */ TEMP result,
         TEMP tmp1, TEMP tmp2);
  // Required for EA: check if it is still a type_array.
  predicate(SpecialStringIndexOf && n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
            n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
  ins_cost(180);

  ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.

  format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]"
            " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %}
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    Node *ndl = in(operand_index($needle));  // The node that defines needle.
    ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
    guarantee(needle_values, "sanity");
    if (needle_values != NULL) {
      __ string_indexof_1($result$$Register,
                          $haystack$$Register, $haycnt$$Register,
                          R0, needle_values->char_at(0),
                          $tmp1$$Register, $tmp2$$Register);
    } else {
      __ string_indexof_1($result$$Register,
                          $haystack$$Register, $haycnt$$Register,
                          $needle$$Register, 0,
                          $tmp1$$Register, $tmp2$$Register);
    }
  %}
  ins_pipe(pipe_class_compare);
%}

// String_IndexOf.
//
// Length of needle as immediate. This saves instruction loading constant needle
// length.
// @@@ TODO Specify rules for length < 8 or so, and roll out comparison of needle
// completely or do it in vector instruction. This should save registers for
// needlecnt and needle.
//
// Assumes register result differs from all input registers.
// Overwrites haycnt, needlecnt.
// Use dst register classes if register gets killed, as it is the case for tmp registers!
instruct string_indexOf_imm(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt,
                            iRegPsrc needle, uimmI15 needlecntImm,
                            iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5,
                            flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6) %{
  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
  effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP result,
         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6);
  // Required for EA: check if it is still a type_array.
  predicate(SpecialStringIndexOf && n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
            n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
  ins_cost(250);

  ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.

  format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]"
            " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %}
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    Node *ndl = in(operand_index($needle));  // The node that defines needle.
    ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();

    __ string_indexof($result$$Register,
                      $haystack$$Register, $haycnt$$Register,
                      $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant,
                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register);
  %}
  ins_pipe(pipe_class_compare);
%}

// StrIndexOf node.
//
// Assumes register result differs from all input registers.
// Overwrites haycnt, needlecnt.
// Use dst register classes if register gets killed, as it is the case for tmp registers!
instruct string_indexOf(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt,
                        iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4,
                        flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6) %{
  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
  effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/
         TEMP result,
         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6);
  predicate(SpecialStringIndexOf);  // See Matcher::match_rule_supported.
  ins_cost(300);

  ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.

  format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]"
             " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %}
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    __ string_indexof($result$$Register,
                      $haystack$$Register, $haycnt$$Register,
                      $needle$$Register, NULL, $needlecnt$$Register, 0,  // needlecnt not constant.
                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register);
  %}
  ins_pipe(pipe_class_compare);
%}

// String equals with immediate.
instruct string_equals_imm(iRegPsrc str1, iRegPsrc str2, uimmI15 cntImm, iRegIdst result,
                           iRegPdst tmp1, iRegPdst tmp2,
                           flagsRegCR0 cr0, flagsRegCR6 cr6, regCTR ctr) %{
  match(Set result (StrEquals (Binary str1 str2) cntImm));
  effect(TEMP result, TEMP tmp1, TEMP tmp2,
         KILL cr0, KILL cr6, KILL ctr);
  predicate(SpecialStringEquals);  // See Matcher::match_rule_supported.
  ins_cost(250);

  ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.

  format %{ "String Equals SCL [0..$cntImm]($str1),[0..$cntImm]($str2)"
            " -> $result \t// KILL $cr0, $cr6, $ctr, TEMP $result, $tmp1, $tmp2" %}
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    __ char_arrays_equalsImm($str1$$Register, $str2$$Register, $cntImm$$constant,
                             $result$$Register, $tmp1$$Register, $tmp2$$Register);
  %}
  ins_pipe(pipe_class_compare);
%}

// String equals.
// Use dst register classes if register gets killed, as it is the case for TEMP operands!
instruct string_equals(iRegPsrc str1, iRegPsrc str2, iRegIsrc cnt, iRegIdst result,
                       iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3, iRegPdst tmp4, iRegPdst tmp5,
                       flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
  match(Set result (StrEquals (Binary str1 str2) cnt));
  effect(TEMP result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
         KILL cr0, KILL cr1, KILL cr6, KILL ctr);
  predicate(SpecialStringEquals);  // See Matcher::match_rule_supported.
  ins_cost(300);

  ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.

  format %{ "String Equals [0..$cnt]($str1),[0..$cnt]($str2) -> $result"
            " \t// KILL $cr0, $cr1, $cr6, $ctr, TEMP $result, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    __ char_arrays_equals($str1$$Register, $str2$$Register, $cnt$$Register, $result$$Register,
                          $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, $tmp5$$Register);
  %}
  ins_pipe(pipe_class_compare);
%}

// String compare.
// Char[] pointers are passed in.
// Use dst register classes if register gets killed, as it is the case for TEMP operands!
instruct string_compare(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
                        iRegPdst tmp, flagsRegCR0 cr0, regCTR ctr) %{
  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(USE_KILL cnt1, USE_KILL cnt2, USE_KILL str1, USE_KILL str2, TEMP result, TEMP tmp, KILL cr0, KILL ctr);
  ins_cost(300);

  ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.

  format %{ "String Compare $str1[0..$cnt1], $str2[0..$cnt2] -> $result"
            " \t// TEMP $tmp, $result KILLs $str1, $cnt1, $str2, $cnt2, $cr0, $ctr" %}
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    __ string_compare($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register,
                      $result$$Register, $tmp$$Register);
  %}
  ins_pipe(pipe_class_compare);
%}

//---------- Min/Max Instructions ---------------------------------------------

instruct minI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
  match(Set dst (MinI src1 src2));
  ins_cost(DEFAULT_COST*6);

  expand %{
    iRegLdst src1s;
    iRegLdst src2s;
    iRegLdst diff;
    iRegLdst sm;
    iRegLdst doz; // difference or zero
    convI2L_reg(src1s, src1); // Ensure proper sign extension.
    convI2L_reg(src2s, src2); // Ensure proper sign extension.
    subL_reg_reg(diff, src2s, src1s);
    // Need to consider >=33 bit result, therefore we need signmaskL.
    signmask64L_regL(sm, diff);
    andL_reg_reg(doz, diff, sm); // <=0
    addI_regL_regL(dst, doz, src1s);
  %}
%}

instruct maxI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
  match(Set dst (MaxI src1 src2));
  ins_cost(DEFAULT_COST*6);

  expand %{
    iRegLdst src1s;
    iRegLdst src2s;
    iRegLdst diff;
    iRegLdst sm;
    iRegLdst doz; // difference or zero
    convI2L_reg(src1s, src1); // Ensure proper sign extension.
    convI2L_reg(src2s, src2); // Ensure proper sign extension.
    subL_reg_reg(diff, src2s, src1s);
    // Need to consider >=33 bit result, therefore we need signmaskL.
    signmask64L_regL(sm, diff);
    andcL_reg_reg(doz, diff, sm); // >=0
    addI_regL_regL(dst, doz, src1s);
  %}
%}

//---------- Population Count Instructions ------------------------------------

// Popcnt for Power7.
instruct popCountI(iRegIdst dst, iRegIsrc src) %{
  match(Set dst (PopCountI src));
  predicate(UsePopCountInstruction && VM_Version::has_popcntw());
  ins_cost(DEFAULT_COST);

  format %{ "POPCNTW $dst, $src" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_popcntb);
    __ popcntw($dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

// Popcnt for Power7.
instruct popCountL(iRegIdst dst, iRegLsrc src) %{
  predicate(UsePopCountInstruction && VM_Version::has_popcntw());
  match(Set dst (PopCountL src));
  ins_cost(DEFAULT_COST);

  format %{ "POPCNTD $dst, $src" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_popcntb);
    __ popcntd($dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

instruct countLeadingZerosI(iRegIdst dst, iRegIsrc src) %{
  match(Set dst (CountLeadingZerosI src));
  predicate(UseCountLeadingZerosInstructionsPPC64);  // See Matcher::match_rule_supported.
  ins_cost(DEFAULT_COST);

  format %{ "CNTLZW  $dst, $src" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cntlzw);
    __ cntlzw($dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

instruct countLeadingZerosL(iRegIdst dst, iRegLsrc src) %{
  match(Set dst (CountLeadingZerosL src));
  predicate(UseCountLeadingZerosInstructionsPPC64);  // See Matcher::match_rule_supported.
  ins_cost(DEFAULT_COST);

  format %{ "CNTLZD  $dst, $src" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cntlzd);
    __ cntlzd($dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

instruct countLeadingZerosP(iRegIdst dst, iRegPsrc src) %{
  // no match-rule, false predicate
  effect(DEF dst, USE src);
  predicate(false);

  format %{ "CNTLZD  $dst, $src" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_cntlzd);
    __ cntlzd($dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_class_default);
%}

instruct countTrailingZerosI_Ex(iRegIdst dst, iRegIsrc src) %{
  match(Set dst (CountTrailingZerosI src));
  predicate(UseCountLeadingZerosInstructionsPPC64);
  ins_cost(DEFAULT_COST);

  expand %{
    immI16 imm1 %{ (int)-1 %}
    immI16 imm2 %{ (int)32 %}
    immI_minus1 m1 %{ -1 %}
    iRegIdst tmpI1;
    iRegIdst tmpI2;
    iRegIdst tmpI3;
    addI_reg_imm16(tmpI1, src, imm1);
    andcI_reg_reg(tmpI2, src, m1, tmpI1);
    countLeadingZerosI(tmpI3, tmpI2);
    subI_imm16_reg(dst, imm2, tmpI3);
  %}
%}

instruct countTrailingZerosL_Ex(iRegIdst dst, iRegLsrc src) %{
  match(Set dst (CountTrailingZerosL src));
  predicate(UseCountLeadingZerosInstructionsPPC64);
  ins_cost(DEFAULT_COST);

  expand %{
    immL16 imm1 %{ (long)-1 %}
    immI16 imm2 %{ (int)64 %}
    iRegLdst tmpL1;
    iRegLdst tmpL2;
    iRegIdst tmpL3;
    addL_reg_imm16(tmpL1, src, imm1);
    andcL_reg_reg(tmpL2, tmpL1, src);
    countLeadingZerosL(tmpL3, tmpL2);
    subI_imm16_reg(dst, imm2, tmpL3);
 %}
%}

// Expand nodes for byte_reverse_int.
instruct insrwi_a(iRegIdst dst, iRegIsrc src, immI16 pos, immI16 shift) %{
  effect(DEF dst, USE src, USE pos, USE shift);
  predicate(false);

  format %{ "INSRWI  $dst, $src, $pos, $shift" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rlwimi);
    __ insrwi($dst$$Register, $src$$Register, $shift$$constant, $pos$$constant);
  %}
  ins_pipe(pipe_class_default);
%}

// As insrwi_a, but with USE_DEF.
instruct insrwi(iRegIdst dst, iRegIsrc src, immI16 pos, immI16 shift) %{
  effect(USE_DEF dst, USE src, USE pos, USE shift);
  predicate(false);

  format %{ "INSRWI  $dst, $src, $pos, $shift" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rlwimi);
    __ insrwi($dst$$Register, $src$$Register, $shift$$constant, $pos$$constant);
  %}
  ins_pipe(pipe_class_default);
%}

// Just slightly faster than java implementation.
instruct bytes_reverse_int_Ex(iRegIdst dst, iRegIsrc src) %{
  match(Set dst (ReverseBytesI src));
  predicate(UseCountLeadingZerosInstructionsPPC64);
  ins_cost(DEFAULT_COST);

  expand %{
    immI16 imm24 %{ (int) 24 %}
    immI16 imm16 %{ (int) 16 %}
    immI16  imm8 %{ (int)  8 %}
    immI16  imm4 %{ (int)  4 %}
    immI16  imm0 %{ (int)  0 %}
    iRegLdst tmpI1;
    iRegLdst tmpI2;
    iRegLdst tmpI3;

    urShiftI_reg_imm(tmpI1, src, imm24);
    insrwi_a(dst, tmpI1, imm24, imm8);
    urShiftI_reg_imm(tmpI2, src, imm16);
    insrwi(dst, tmpI2, imm8, imm16);
    urShiftI_reg_imm(tmpI3, src, imm8);
    insrwi(dst, tmpI3, imm8, imm8);
    insrwi(dst, src, imm0, imm8);
  %}
%}

//---------- Replicate Vector Instructions ------------------------------------

// Insrdi does replicate if src == dst.
instruct repl32(iRegLdst dst) %{
  predicate(false);
  effect(USE_DEF dst);

  format %{ "INSRDI  $dst, #0, $dst, #32 \t// replicate" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldimi);
    __ insrdi($dst$$Register, $dst$$Register, 32, 0);
  %}
  ins_pipe(pipe_class_default);
%}

// Insrdi does replicate if src == dst.
instruct repl48(iRegLdst dst) %{
  predicate(false);
  effect(USE_DEF dst);

  format %{ "INSRDI  $dst, #0, $dst, #48 \t// replicate" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldimi);
    __ insrdi($dst$$Register, $dst$$Register, 48, 0);
  %}
  ins_pipe(pipe_class_default);
%}

// Insrdi does replicate if src == dst.
instruct repl56(iRegLdst dst) %{
  predicate(false);
  effect(USE_DEF dst);

  format %{ "INSRDI  $dst, #0, $dst, #56 \t// replicate" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_rldimi);
    __ insrdi($dst$$Register, $dst$$Register, 56, 0);
  %}
  ins_pipe(pipe_class_default);
%}

instruct repl8B_reg_Ex(iRegLdst dst, iRegIsrc src) %{
  match(Set dst (ReplicateB src));
  predicate(n->as_Vector()->length() == 8);
  expand %{
    moveReg(dst, src);
    repl56(dst);
    repl48(dst);
    repl32(dst);
  %}
%}

instruct repl8B_immI0(iRegLdst dst, immI_0 zero) %{
  match(Set dst (ReplicateB zero));
  predicate(n->as_Vector()->length() == 8);
  format %{ "LI      $dst, #0 \t// replicate8B" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
    __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF)));
  %}
  ins_pipe(pipe_class_default);
%}

instruct repl8B_immIminus1(iRegLdst dst, immI_minus1 src) %{
  match(Set dst (ReplicateB src));
  predicate(n->as_Vector()->length() == 8);
  format %{ "LI      $dst, #-1 \t// replicate8B" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
    __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
  %}
  ins_pipe(pipe_class_default);
%}

instruct repl4S_reg_Ex(iRegLdst dst, iRegIsrc src) %{
  match(Set dst (ReplicateS src));
  predicate(n->as_Vector()->length() == 4);
  expand %{
    moveReg(dst, src);
    repl48(dst);
    repl32(dst);
  %}
%}

instruct repl4S_immI0(iRegLdst dst, immI_0 zero) %{
  match(Set dst (ReplicateS zero));
  predicate(n->as_Vector()->length() == 4);
  format %{ "LI      $dst, #0 \t// replicate4C" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
    __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF)));
  %}
  ins_pipe(pipe_class_default);
%}

instruct repl4S_immIminus1(iRegLdst dst, immI_minus1 src) %{
  match(Set dst (ReplicateS src));
  predicate(n->as_Vector()->length() == 4);
  format %{ "LI      $dst, -1 \t// replicate4C" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
    __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
  %}
  ins_pipe(pipe_class_default);
%}

instruct repl2I_reg_Ex(iRegLdst dst, iRegIsrc src) %{
  match(Set dst (ReplicateI src));
  predicate(n->as_Vector()->length() == 2);
  ins_cost(2 * DEFAULT_COST);
  expand %{
    moveReg(dst, src);
    repl32(dst);
  %}
%}

instruct repl2I_immI0(iRegLdst dst, immI_0 zero) %{
  match(Set dst (ReplicateI zero));
  predicate(n->as_Vector()->length() == 2);
  format %{ "LI      $dst, #0 \t// replicate4C" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
    __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF)));
  %}
  ins_pipe(pipe_class_default);
%}

instruct repl2I_immIminus1(iRegLdst dst, immI_minus1 src) %{
  match(Set dst (ReplicateI src));
  predicate(n->as_Vector()->length() == 2);
  format %{ "LI      $dst, -1 \t// replicate4C" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
    __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
  %}
  ins_pipe(pipe_class_default);
%}

// Move float to int register via stack, replicate.
instruct repl2F_reg_Ex(iRegLdst dst, regF src) %{
  match(Set dst (ReplicateF src));
  predicate(n->as_Vector()->length() == 2);
  ins_cost(2 * MEMORY_REF_COST + DEFAULT_COST);
  expand %{
    stackSlotL tmpS;
    iRegIdst tmpI;
    moveF2I_reg_stack(tmpS, src);   // Move float to stack.
    moveF2I_stack_reg(tmpI, tmpS);  // Move stack to int reg.
    moveReg(dst, tmpI);             // Move int to long reg.
    repl32(dst);                    // Replicate bitpattern.
  %}
%}

// Replicate scalar constant to packed float values in Double register
instruct repl2F_immF_Ex(iRegLdst dst, immF src) %{
  match(Set dst (ReplicateF src));
  predicate(n->as_Vector()->length() == 2);
  ins_cost(5 * DEFAULT_COST);

  format %{ "LD      $dst, offset, $constanttablebase\t// load replicated float $src $src from table, postalloc expanded" %}
  postalloc_expand( postalloc_expand_load_replF_constant(dst, src, constanttablebase) );
%}

// Replicate scalar zero constant to packed float values in Double register
instruct repl2F_immF0(iRegLdst dst, immF_0 zero) %{
  match(Set dst (ReplicateF zero));
  predicate(n->as_Vector()->length() == 2);

  format %{ "LI      $dst, #0 \t// replicate2F" %}
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
    __ li($dst$$Register, 0x0);
  %}
  ins_pipe(pipe_class_default);
%}

// ============================================================================
// Safepoint Instruction

instruct safePoint_poll(iRegPdst poll) %{
  match(SafePoint poll);
  predicate(LoadPollAddressFromThread);

  // It caused problems to add the effect that r0 is killed, but this
  // effect no longer needs to be mentioned, since r0 is not contained
  // in a reg_class.

  format %{ "LD      R0, #0, $poll \t// Safepoint poll for GC" %}
  size(4);
  ins_encode( enc_poll(0x0, poll) );
  ins_pipe(pipe_class_default);
%}

// Safepoint without per-thread support. Load address of page to poll
// as constant.
// Rscratch2RegP is R12.
// LoadConPollAddr node is added in pd_post_matching_hook(). It must be
// a seperate node so that the oop map is at the right location.
instruct safePoint_poll_conPollAddr(rscratch2RegP poll) %{
  match(SafePoint poll);
  predicate(!LoadPollAddressFromThread);

  // It caused problems to add the effect that r0 is killed, but this
  // effect no longer needs to be mentioned, since r0 is not contained
  // in a reg_class.

  format %{ "LD      R0, #0, R12 \t// Safepoint poll for GC" %}
  ins_encode( enc_poll(0x0, poll) );
  ins_pipe(pipe_class_default);
%}

// ============================================================================
// Call Instructions

// Call Java Static Instruction

// Schedulable version of call static node.
instruct CallStaticJavaDirect(method meth) %{
  match(CallStaticJava);
  effect(USE meth);
  predicate(!((CallStaticJavaNode*)n)->is_method_handle_invoke());
  ins_cost(CALL_COST);

  ins_num_consts(3 /* up to 3 patchable constants: inline cache, 2 call targets. */);

  format %{ "CALL,static $meth \t// ==> " %}
  size(4);
  ins_encode( enc_java_static_call(meth) );
  ins_pipe(pipe_class_call);
%}

// Schedulable version of call static node.
instruct CallStaticJavaDirectHandle(method meth) %{
  match(CallStaticJava);
  effect(USE meth);
  predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
  ins_cost(CALL_COST);

  ins_num_consts(3 /* up to 3 patchable constants: inline cache, 2 call targets. */);

  format %{ "CALL,static $meth \t// ==> " %}
  ins_encode( enc_java_handle_call(meth) );
  ins_pipe(pipe_class_call);
%}

// Call Java Dynamic Instruction

// Used by postalloc expand of CallDynamicJavaDirectSchedEx (actual call).
// Loading of IC was postalloc expanded. The nodes loading the IC are reachable
// via fields ins_field_load_ic_hi_node and ins_field_load_ic_node.
// The call destination must still be placed in the constant pool.
instruct CallDynamicJavaDirectSched(method meth) %{
  match(CallDynamicJava); // To get all the data fields we need ...
  effect(USE meth);
  predicate(false);       // ... but never match.

  ins_field_load_ic_hi_node(loadConL_hiNode*);
  ins_field_load_ic_node(loadConLNode*);
  ins_num_consts(1 /* 1 patchable constant: call destination */);

  format %{ "BL        \t// dynamic $meth ==> " %}
  size(4);
  ins_encode( enc_java_dynamic_call_sched(meth) );
  ins_pipe(pipe_class_call);
%}

// Schedulable (i.e. postalloc expanded) version of call dynamic java.
// We use postalloc expanded calls if we use inline caches
// and do not update method data.
//
// This instruction has two constants: inline cache (IC) and call destination.
// Loading the inline cache will be postalloc expanded, thus leaving a call with
// one constant.
instruct CallDynamicJavaDirectSched_Ex(method meth) %{
  match(CallDynamicJava);
  effect(USE meth);
  predicate(UseInlineCaches);
  ins_cost(CALL_COST);

  ins_num_consts(2 /* 2 patchable constants: inline cache, call destination. */);

  format %{ "CALL,dynamic $meth \t// postalloc expanded" %}
  postalloc_expand( postalloc_expand_java_dynamic_call_sched(meth, constanttablebase) );
%}

// Compound version of call dynamic java
// We use postalloc expanded calls if we use inline caches
// and do not update method data.
instruct CallDynamicJavaDirect(method meth) %{
  match(CallDynamicJava);
  effect(USE meth);
  predicate(!UseInlineCaches);
  ins_cost(CALL_COST);

  // Enc_java_to_runtime_call needs up to 4 constants (method data oop).
  ins_num_consts(4);

  format %{ "CALL,dynamic $meth \t// ==> " %}
  ins_encode( enc_java_dynamic_call(meth, constanttablebase) );
  ins_pipe(pipe_class_call);
%}

// Call Runtime Instruction

instruct CallRuntimeDirect(method meth) %{
  match(CallRuntime);
  effect(USE meth);
  ins_cost(CALL_COST);

  // Enc_java_to_runtime_call needs up to 3 constants: call target,
  // env for callee, C-toc.
  ins_num_consts(3);

  format %{ "CALL,runtime" %}
  ins_encode( enc_java_to_runtime_call(meth) );
  ins_pipe(pipe_class_call);
%}

// Call Leaf

// Used by postalloc expand of CallLeafDirect_Ex (mtctr).
instruct CallLeafDirect_mtctr(iRegLdst dst, iRegLsrc src) %{
  effect(DEF dst, USE src);

  ins_num_consts(1);

  format %{ "MTCTR   $src" %}
  size(4);
  ins_encode( enc_leaf_call_mtctr(src) );
  ins_pipe(pipe_class_default);
%}

// Used by postalloc expand of CallLeafDirect_Ex (actual call).
instruct CallLeafDirect(method meth) %{
  match(CallLeaf);   // To get the data all the data fields we need ...
  effect(USE meth);
  predicate(false);  // but never match.

  format %{ "BCTRL     \t// leaf call $meth ==> " %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_bctrl);
    __ bctrl();
  %}
  ins_pipe(pipe_class_call);
%}

// postalloc expand of CallLeafDirect.
// Load adress to call from TOC, then bl to it.
instruct CallLeafDirect_Ex(method meth) %{
  match(CallLeaf);
  effect(USE meth);
  ins_cost(CALL_COST);

  // Postalloc_expand_java_to_runtime_call needs up to 3 constants: call target,
  // env for callee, C-toc.
  ins_num_consts(3);

  format %{ "CALL,runtime leaf $meth \t// postalloc expanded" %}
  postalloc_expand( postalloc_expand_java_to_runtime_call(meth, constanttablebase) );
%}

// Call runtime without safepoint - same as CallLeaf.
// postalloc expand of CallLeafNoFPDirect.
// Load adress to call from TOC, then bl to it.
instruct CallLeafNoFPDirect_Ex(method meth) %{
  match(CallLeafNoFP);
  effect(USE meth);
  ins_cost(CALL_COST);

  // Enc_java_to_runtime_call needs up to 3 constants: call target,
  // env for callee, C-toc.
  ins_num_consts(3);

  format %{ "CALL,runtime leaf nofp $meth \t// postalloc expanded" %}
  postalloc_expand( postalloc_expand_java_to_runtime_call(meth, constanttablebase) );
%}

// Tail Call; Jump from runtime stub to Java code.
// Also known as an 'interprocedural jump'.
// Target of jump will eventually return to caller.
// TailJump below removes the return address.
instruct TailCalljmpInd(iRegPdstNoScratch jump_target, inline_cache_regP method_oop) %{
  match(TailCall jump_target method_oop);
  ins_cost(CALL_COST);

  format %{ "MTCTR   $jump_target \t// $method_oop holds method oop\n\t"
            "BCTR         \t// tail call" %}
  size(8);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    __ mtctr($jump_target$$Register);
    __ bctr();
  %}
  ins_pipe(pipe_class_call);
%}

// Return Instruction
instruct Ret() %{
  match(Return);
  format %{ "BLR      \t// branch to link register" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_blr);
    // LR is restored in MachEpilogNode. Just do the RET here.
    __ blr();
  %}
  ins_pipe(pipe_class_default);
%}

// Tail Jump; remove the return address; jump to target.
// TailCall above leaves the return address around.
// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
// "restore" before this instruction (in Epilogue), we need to materialize it
// in %i0.
instruct tailjmpInd(iRegPdstNoScratch jump_target, rarg1RegP ex_oop) %{
  match(TailJump jump_target ex_oop);
  ins_cost(CALL_COST);

  format %{ "LD      R4_ARG2 = LR\n\t"
            "MTCTR   $jump_target\n\t"
            "BCTR     \t// TailJump, exception oop: $ex_oop" %}
  size(12);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    __ ld(R4_ARG2/* issuing pc */, _abi(lr), R1_SP);
    __ mtctr($jump_target$$Register);
    __ bctr();
  %}
  ins_pipe(pipe_class_call);
%}

// Create exception oop: created by stack-crawling runtime code.
// Created exception is now available to this handler, and is setup
// just prior to jumping to this handler. No code emitted.
instruct CreateException(rarg1RegP ex_oop) %{
  match(Set ex_oop (CreateEx));
  ins_cost(0);

  format %{ " -- \t// exception oop; no code emitted" %}
  size(0);
  ins_encode( /*empty*/ );
  ins_pipe(pipe_class_default);
%}

// Rethrow exception: The exception oop will come in the first
// argument position. Then JUMP (not call) to the rethrow stub code.
instruct RethrowException() %{
  match(Rethrow);
  ins_cost(CALL_COST);

  format %{ "Jmp     rethrow_stub" %}
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
    cbuf.set_insts_mark();
    __ b64_patchable((address)OptoRuntime::rethrow_stub(), relocInfo::runtime_call_type);
  %}
  ins_pipe(pipe_class_call);
%}

// Die now.
instruct ShouldNotReachHere() %{
  match(Halt);
  ins_cost(CALL_COST);

  format %{ "ShouldNotReachHere" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_tdi);
    __ trap_should_not_reach_here();
  %}
  ins_pipe(pipe_class_default);
%}

// This name is KNOWN by the ADLC and cannot be changed.  The ADLC
// forces a 'TypeRawPtr::BOTTOM' output type for this guy.
// Get a DEF on threadRegP, no costs, no encoding, use
// 'ins_should_rematerialize(true)' to avoid spilling.
instruct tlsLoadP(threadRegP dst) %{
  match(Set dst (ThreadLocal));
  ins_cost(0);

  ins_should_rematerialize(true);

  format %{ " -- \t// $dst=Thread::current(), empty" %}
  size(0);
  ins_encode( /*empty*/ );
  ins_pipe(pipe_class_empty);
%}

//---Some PPC specific nodes---------------------------------------------------

// Stop a group.
instruct endGroup() %{
  ins_cost(0);

  ins_is_nop(true);

  format %{ "End Bundle (ori r1, r1, 0)" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_endgroup);
    __ endgroup();
  %}
  ins_pipe(pipe_class_default);
%}

// Nop instructions

instruct fxNop() %{
  ins_cost(0);

  ins_is_nop(true);

  format %{ "fxNop" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fmr);
    __ nop();
  %}
  ins_pipe(pipe_class_default);
%}

instruct fpNop0() %{
  ins_cost(0);

  ins_is_nop(true);

  format %{ "fpNop0" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fmr);
    __ fpnop0();
  %}
  ins_pipe(pipe_class_default);
%}

instruct fpNop1() %{
  ins_cost(0);

  ins_is_nop(true);

  format %{ "fpNop1" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_fmr);
    __ fpnop1();
  %}
  ins_pipe(pipe_class_default);
%}

instruct brNop0() %{
  ins_cost(0);
  size(4);
  format %{ "brNop0" %}
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_mcrf);
    __ brnop0();
  %}
  ins_is_nop(true);
  ins_pipe(pipe_class_default);
%}

instruct brNop1() %{
  ins_cost(0);

  ins_is_nop(true);

  format %{ "brNop1" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_mcrf);
    __ brnop1();
  %}
  ins_pipe(pipe_class_default);
%}

instruct brNop2() %{
  ins_cost(0);

  ins_is_nop(true);

  format %{ "brNop2" %}
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_mcrf);
    __ brnop2();
  %}
  ins_pipe(pipe_class_default);
%}

//----------PEEPHOLE RULES-----------------------------------------------------
// These must follow all instruction definitions as they use the names
// defined in the instructions definitions.
//
// peepmatch ( root_instr_name [preceeding_instruction]* );
//
// peepconstraint %{
// (instruction_number.operand_name relational_op instruction_number.operand_name
//  [, ...] );
// // instruction numbers are zero-based using left to right order in peepmatch
//
// peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
// // provide an instruction_number.operand_name for each operand that appears
// // in the replacement instruction's match rule
//
// ---------VM FLAGS---------------------------------------------------------
//
// All peephole optimizations can be turned off using -XX:-OptoPeephole
//
// Each peephole rule is given an identifying number starting with zero and
// increasing by one in the order seen by the parser. An individual peephole
// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
// on the command-line.
//
// ---------CURRENT LIMITATIONS----------------------------------------------
//
// Only match adjacent instructions in same basic block
// Only equality constraints
// Only constraints between operands, not (0.dest_reg == EAX_enc)
// Only one replacement instruction
//
// ---------EXAMPLE----------------------------------------------------------
//
// // pertinent parts of existing instructions in architecture description
// instruct movI(eRegI dst, eRegI src) %{
//   match(Set dst (CopyI src));
// %}
//
// instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
//   match(Set dst (AddI dst src));
//   effect(KILL cr);
// %}
//
// // Change (inc mov) to lea
// peephole %{
//   // increment preceeded by register-register move
//   peepmatch ( incI_eReg movI );
//   // require that the destination register of the increment
//   // match the destination register of the move
//   peepconstraint ( 0.dst == 1.dst );
//   // construct a replacement instruction that sets
//   // the destination to ( move's source register + one )
//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
// %}
//
// Implementation no longer uses movX instructions since
// machine-independent system no longer uses CopyX nodes.
//
// peephole %{
//   peepmatch ( incI_eReg movI );
//   peepconstraint ( 0.dst == 1.dst );
//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
// %}
//
// peephole %{
//   peepmatch ( decI_eReg movI );
//   peepconstraint ( 0.dst == 1.dst );
//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
// %}
//
// peephole %{
//   peepmatch ( addI_eReg_imm movI );
//   peepconstraint ( 0.dst == 1.dst );
//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
// %}
//
// peephole %{
//   peepmatch ( addP_eReg_imm movP );
//   peepconstraint ( 0.dst == 1.dst );
//   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
// %}

// // Change load of spilled value to only a spill
// instruct storeI(memory mem, eRegI src) %{
//   match(Set mem (StoreI mem src));
// %}
//
// instruct loadI(eRegI dst, memory mem) %{
//   match(Set dst (LoadI mem));
// %}
//
peephole %{
  peepmatch ( loadI storeI );
  peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
  peepreplace ( storeI( 1.mem 1.mem 1.src ) );
%}

peephole %{
  peepmatch ( loadL storeL );
  peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
  peepreplace ( storeL( 1.mem 1.mem 1.src ) );
%}

peephole %{
  peepmatch ( loadP storeP );
  peepconstraint ( 1.src == 0.dst, 1.dst == 0.mem );
  peepreplace ( storeP( 1.dst 1.dst 1.src ) );
%}

//----------SMARTSPILL RULES---------------------------------------------------
// These must follow all instruction definitions as they use the names
// defined in the instructions definitions.