Mercurial > hg > truffle
diff src/cpu/ppc/vm/ppc.ad @ 14445:67fa91961822
8029940: PPC64 (part 122): C2 compiler port
Reviewed-by: kvn
author | goetz |
---|---|
date | Wed, 11 Dec 2013 00:06:11 +0100 |
parents | |
children | ad6695638a35 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/ppc.ad Wed Dec 11 00:06:11 2013 +0100 @@ -0,0 +1,12059 @@ +// +// Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved. +// Copyright 2012, 2013 SAP AG. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +// +// PPC64 Architecture Description File +// + +//----------REGISTER DEFINITION BLOCK------------------------------------------ +// This information is used by the matcher and the register allocator to +// describe individual registers and classes of registers within the target +// architecture. +register %{ +//----------Architecture Description Register Definitions---------------------- +// General Registers +// "reg_def" name (register save type, C convention save type, +// ideal register type, encoding); +// +// Register Save Types: +// +// NS = No-Save: The register allocator assumes that these registers +// can be used without saving upon entry to the method, & +// that they do not need to be saved at call sites. +// +// SOC = Save-On-Call: The register allocator assumes that these registers +// can be used without saving upon entry to the method, +// but that they must be saved at call sites. +// These are called "volatiles" on ppc. +// +// SOE = Save-On-Entry: The register allocator assumes that these registers +// must be saved before using them upon entry to the +// method, but they do not need to be saved at call +// sites. +// These are called "nonvolatiles" on ppc. +// +// AS = Always-Save: The register allocator assumes that these registers +// must be saved before using them upon entry to the +// method, & that they must be saved at call sites. +// +// Ideal Register Type is used to determine how to save & restore a +// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get +// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. +// +// The encoding number is the actual bit-pattern placed into the opcodes. +// +// PPC64 register definitions, based on the 64-bit PowerPC ELF ABI +// Supplement Version 1.7 as of 2003-10-29. +// +// For each 64-bit register we must define two registers: the register +// itself, e.g. R3, and a corresponding virtual other (32-bit-)'half', +// e.g. R3_H, which is needed by the allocator, but is not used +// for stores, loads, etc. + +// ---------------------------- +// Integer/Long Registers +// ---------------------------- + + // PPC64 has 32 64-bit integer registers. + + // types: v = volatile, nv = non-volatile, s = system + reg_def R0 ( SOC, SOC, Op_RegI, 0, R0->as_VMReg() ); // v used in prologs + reg_def R0_H ( SOC, SOC, Op_RegI, 99, R0->as_VMReg()->next() ); + reg_def R1 ( NS, NS, Op_RegI, 1, R1->as_VMReg() ); // s SP + reg_def R1_H ( NS, NS, Op_RegI, 99, R1->as_VMReg()->next() ); + reg_def R2 ( SOC, SOC, Op_RegI, 2, R2->as_VMReg() ); // v TOC + reg_def R2_H ( SOC, SOC, Op_RegI, 99, R2->as_VMReg()->next() ); + reg_def R3 ( SOC, SOC, Op_RegI, 3, R3->as_VMReg() ); // v iarg1 & iret + reg_def R3_H ( SOC, SOC, Op_RegI, 99, R3->as_VMReg()->next() ); + reg_def R4 ( SOC, SOC, Op_RegI, 4, R4->as_VMReg() ); // iarg2 + reg_def R4_H ( SOC, SOC, Op_RegI, 99, R4->as_VMReg()->next() ); + reg_def R5 ( SOC, SOC, Op_RegI, 5, R5->as_VMReg() ); // v iarg3 + reg_def R5_H ( SOC, SOC, Op_RegI, 99, R5->as_VMReg()->next() ); + reg_def R6 ( SOC, SOC, Op_RegI, 6, R6->as_VMReg() ); // v iarg4 + reg_def R6_H ( SOC, SOC, Op_RegI, 99, R6->as_VMReg()->next() ); + reg_def R7 ( SOC, SOC, Op_RegI, 7, R7->as_VMReg() ); // v iarg5 + reg_def R7_H ( SOC, SOC, Op_RegI, 99, R7->as_VMReg()->next() ); + reg_def R8 ( SOC, SOC, Op_RegI, 8, R8->as_VMReg() ); // v iarg6 + reg_def R8_H ( SOC, SOC, Op_RegI, 99, R8->as_VMReg()->next() ); + reg_def R9 ( SOC, SOC, Op_RegI, 9, R9->as_VMReg() ); // v iarg7 + reg_def R9_H ( SOC, SOC, Op_RegI, 99, R9->as_VMReg()->next() ); + reg_def R10 ( SOC, SOC, Op_RegI, 10, R10->as_VMReg() ); // v iarg8 + reg_def R10_H( SOC, SOC, Op_RegI, 99, R10->as_VMReg()->next()); + reg_def R11 ( SOC, SOC, Op_RegI, 11, R11->as_VMReg() ); // v ENV / scratch + reg_def R11_H( SOC, SOC, Op_RegI, 99, R11->as_VMReg()->next()); + reg_def R12 ( SOC, SOC, Op_RegI, 12, R12->as_VMReg() ); // v scratch + reg_def R12_H( SOC, SOC, Op_RegI, 99, R12->as_VMReg()->next()); + reg_def R13 ( NS, NS, Op_RegI, 13, R13->as_VMReg() ); // s system thread id + reg_def R13_H( NS, NS, Op_RegI, 99, R13->as_VMReg()->next()); + reg_def R14 ( SOC, SOE, Op_RegI, 14, R14->as_VMReg() ); // nv + reg_def R14_H( SOC, SOE, Op_RegI, 99, R14->as_VMReg()->next()); + reg_def R15 ( SOC, SOE, Op_RegI, 15, R15->as_VMReg() ); // nv + reg_def R15_H( SOC, SOE, Op_RegI, 99, R15->as_VMReg()->next()); + reg_def R16 ( SOC, SOE, Op_RegI, 16, R16->as_VMReg() ); // nv + reg_def R16_H( SOC, SOE, Op_RegI, 99, R16->as_VMReg()->next()); + reg_def R17 ( SOC, SOE, Op_RegI, 17, R17->as_VMReg() ); // nv + reg_def R17_H( SOC, SOE, Op_RegI, 99, R17->as_VMReg()->next()); + reg_def R18 ( SOC, SOE, Op_RegI, 18, R18->as_VMReg() ); // nv + reg_def R18_H( SOC, SOE, Op_RegI, 99, R18->as_VMReg()->next()); + reg_def R19 ( SOC, SOE, Op_RegI, 19, R19->as_VMReg() ); // nv + reg_def R19_H( SOC, SOE, Op_RegI, 99, R19->as_VMReg()->next()); + reg_def R20 ( SOC, SOE, Op_RegI, 20, R20->as_VMReg() ); // nv + reg_def R20_H( SOC, SOE, Op_RegI, 99, R20->as_VMReg()->next()); + reg_def R21 ( SOC, SOE, Op_RegI, 21, R21->as_VMReg() ); // nv + reg_def R21_H( SOC, SOE, Op_RegI, 99, R21->as_VMReg()->next()); + reg_def R22 ( SOC, SOE, Op_RegI, 22, R22->as_VMReg() ); // nv + reg_def R22_H( SOC, SOE, Op_RegI, 99, R22->as_VMReg()->next()); + reg_def R23 ( SOC, SOE, Op_RegI, 23, R23->as_VMReg() ); // nv + reg_def R23_H( SOC, SOE, Op_RegI, 99, R23->as_VMReg()->next()); + reg_def R24 ( SOC, SOE, Op_RegI, 24, R24->as_VMReg() ); // nv + reg_def R24_H( SOC, SOE, Op_RegI, 99, R24->as_VMReg()->next()); + reg_def R25 ( SOC, SOE, Op_RegI, 25, R25->as_VMReg() ); // nv + reg_def R25_H( SOC, SOE, Op_RegI, 99, R25->as_VMReg()->next()); + reg_def R26 ( SOC, SOE, Op_RegI, 26, R26->as_VMReg() ); // nv + reg_def R26_H( SOC, SOE, Op_RegI, 99, R26->as_VMReg()->next()); + reg_def R27 ( SOC, SOE, Op_RegI, 27, R27->as_VMReg() ); // nv + reg_def R27_H( SOC, SOE, Op_RegI, 99, R27->as_VMReg()->next()); + reg_def R28 ( SOC, SOE, Op_RegI, 28, R28->as_VMReg() ); // nv + reg_def R28_H( SOC, SOE, Op_RegI, 99, R28->as_VMReg()->next()); + reg_def R29 ( SOC, SOE, Op_RegI, 29, R29->as_VMReg() ); // nv + reg_def R29_H( SOC, SOE, Op_RegI, 99, R29->as_VMReg()->next()); + reg_def R30 ( SOC, SOE, Op_RegI, 30, R30->as_VMReg() ); // nv + reg_def R30_H( SOC, SOE, Op_RegI, 99, R30->as_VMReg()->next()); + reg_def R31 ( SOC, SOE, Op_RegI, 31, R31->as_VMReg() ); // nv + reg_def R31_H( SOC, SOE, Op_RegI, 99, R31->as_VMReg()->next()); + + +// ---------------------------- +// Float/Double Registers +// ---------------------------- + + // Double Registers + // The rules of ADL require that double registers be defined in pairs. + // Each pair must be two 32-bit values, but not necessarily a pair of + // single float registers. In each pair, ADLC-assigned register numbers + // must be adjacent, with the lower number even. Finally, when the + // CPU stores such a register pair to memory, the word associated with + // the lower ADLC-assigned number must be stored to the lower address. + + // PPC64 has 32 64-bit floating-point registers. Each can store a single + // or double precision floating-point value. + + // types: v = volatile, nv = non-volatile, s = system + reg_def F0 ( SOC, SOC, Op_RegF, 0, F0->as_VMReg() ); // v scratch + reg_def F0_H ( SOC, SOC, Op_RegF, 99, F0->as_VMReg()->next() ); + reg_def F1 ( SOC, SOC, Op_RegF, 1, F1->as_VMReg() ); // v farg1 & fret + reg_def F1_H ( SOC, SOC, Op_RegF, 99, F1->as_VMReg()->next() ); + reg_def F2 ( SOC, SOC, Op_RegF, 2, F2->as_VMReg() ); // v farg2 + reg_def F2_H ( SOC, SOC, Op_RegF, 99, F2->as_VMReg()->next() ); + reg_def F3 ( SOC, SOC, Op_RegF, 3, F3->as_VMReg() ); // v farg3 + reg_def F3_H ( SOC, SOC, Op_RegF, 99, F3->as_VMReg()->next() ); + reg_def F4 ( SOC, SOC, Op_RegF, 4, F4->as_VMReg() ); // v farg4 + reg_def F4_H ( SOC, SOC, Op_RegF, 99, F4->as_VMReg()->next() ); + reg_def F5 ( SOC, SOC, Op_RegF, 5, F5->as_VMReg() ); // v farg5 + reg_def F5_H ( SOC, SOC, Op_RegF, 99, F5->as_VMReg()->next() ); + reg_def F6 ( SOC, SOC, Op_RegF, 6, F6->as_VMReg() ); // v farg6 + reg_def F6_H ( SOC, SOC, Op_RegF, 99, F6->as_VMReg()->next() ); + reg_def F7 ( SOC, SOC, Op_RegF, 7, F7->as_VMReg() ); // v farg7 + reg_def F7_H ( SOC, SOC, Op_RegF, 99, F7->as_VMReg()->next() ); + reg_def F8 ( SOC, SOC, Op_RegF, 8, F8->as_VMReg() ); // v farg8 + reg_def F8_H ( SOC, SOC, Op_RegF, 99, F8->as_VMReg()->next() ); + reg_def F9 ( SOC, SOC, Op_RegF, 9, F9->as_VMReg() ); // v farg9 + reg_def F9_H ( SOC, SOC, Op_RegF, 99, F9->as_VMReg()->next() ); + reg_def F10 ( SOC, SOC, Op_RegF, 10, F10->as_VMReg() ); // v farg10 + reg_def F10_H( SOC, SOC, Op_RegF, 99, F10->as_VMReg()->next()); + reg_def F11 ( SOC, SOC, Op_RegF, 11, F11->as_VMReg() ); // v farg11 + reg_def F11_H( SOC, SOC, Op_RegF, 99, F11->as_VMReg()->next()); + reg_def F12 ( SOC, SOC, Op_RegF, 12, F12->as_VMReg() ); // v farg12 + reg_def F12_H( SOC, SOC, Op_RegF, 99, F12->as_VMReg()->next()); + reg_def F13 ( SOC, SOC, Op_RegF, 13, F13->as_VMReg() ); // v farg13 + reg_def F13_H( SOC, SOC, Op_RegF, 99, F13->as_VMReg()->next()); + reg_def F14 ( SOC, SOE, Op_RegF, 14, F14->as_VMReg() ); // nv + reg_def F14_H( SOC, SOE, Op_RegF, 99, F14->as_VMReg()->next()); + reg_def F15 ( SOC, SOE, Op_RegF, 15, F15->as_VMReg() ); // nv + reg_def F15_H( SOC, SOE, Op_RegF, 99, F15->as_VMReg()->next()); + reg_def F16 ( SOC, SOE, Op_RegF, 16, F16->as_VMReg() ); // nv + reg_def F16_H( SOC, SOE, Op_RegF, 99, F16->as_VMReg()->next()); + reg_def F17 ( SOC, SOE, Op_RegF, 17, F17->as_VMReg() ); // nv + reg_def F17_H( SOC, SOE, Op_RegF, 99, F17->as_VMReg()->next()); + reg_def F18 ( SOC, SOE, Op_RegF, 18, F18->as_VMReg() ); // nv + reg_def F18_H( SOC, SOE, Op_RegF, 99, F18->as_VMReg()->next()); + reg_def F19 ( SOC, SOE, Op_RegF, 19, F19->as_VMReg() ); // nv + reg_def F19_H( SOC, SOE, Op_RegF, 99, F19->as_VMReg()->next()); + reg_def F20 ( SOC, SOE, Op_RegF, 20, F20->as_VMReg() ); // nv + reg_def F20_H( SOC, SOE, Op_RegF, 99, F20->as_VMReg()->next()); + reg_def F21 ( SOC, SOE, Op_RegF, 21, F21->as_VMReg() ); // nv + reg_def F21_H( SOC, SOE, Op_RegF, 99, F21->as_VMReg()->next()); + reg_def F22 ( SOC, SOE, Op_RegF, 22, F22->as_VMReg() ); // nv + reg_def F22_H( SOC, SOE, Op_RegF, 99, F22->as_VMReg()->next()); + reg_def F23 ( SOC, SOE, Op_RegF, 23, F23->as_VMReg() ); // nv + reg_def F23_H( SOC, SOE, Op_RegF, 99, F23->as_VMReg()->next()); + reg_def F24 ( SOC, SOE, Op_RegF, 24, F24->as_VMReg() ); // nv + reg_def F24_H( SOC, SOE, Op_RegF, 99, F24->as_VMReg()->next()); + reg_def F25 ( SOC, SOE, Op_RegF, 25, F25->as_VMReg() ); // nv + reg_def F25_H( SOC, SOE, Op_RegF, 99, F25->as_VMReg()->next()); + reg_def F26 ( SOC, SOE, Op_RegF, 26, F26->as_VMReg() ); // nv + reg_def F26_H( SOC, SOE, Op_RegF, 99, F26->as_VMReg()->next()); + reg_def F27 ( SOC, SOE, Op_RegF, 27, F27->as_VMReg() ); // nv + reg_def F27_H( SOC, SOE, Op_RegF, 99, F27->as_VMReg()->next()); + reg_def F28 ( SOC, SOE, Op_RegF, 28, F28->as_VMReg() ); // nv + reg_def F28_H( SOC, SOE, Op_RegF, 99, F28->as_VMReg()->next()); + reg_def F29 ( SOC, SOE, Op_RegF, 29, F29->as_VMReg() ); // nv + reg_def F29_H( SOC, SOE, Op_RegF, 99, F29->as_VMReg()->next()); + reg_def F30 ( SOC, SOE, Op_RegF, 30, F30->as_VMReg() ); // nv + reg_def F30_H( SOC, SOE, Op_RegF, 99, F30->as_VMReg()->next()); + reg_def F31 ( SOC, SOE, Op_RegF, 31, F31->as_VMReg() ); // nv + reg_def F31_H( SOC, SOE, Op_RegF, 99, F31->as_VMReg()->next()); + +// ---------------------------- +// Special Registers +// ---------------------------- + +// Condition Codes Flag Registers + + // PPC64 has 8 condition code "registers" which are all contained + // in the CR register. + + // types: v = volatile, nv = non-volatile, s = system + reg_def CCR0(SOC, SOC, Op_RegFlags, 0, CCR0->as_VMReg()); // v + reg_def CCR1(SOC, SOC, Op_RegFlags, 1, CCR1->as_VMReg()); // v + reg_def CCR2(SOC, SOC, Op_RegFlags, 2, CCR2->as_VMReg()); // nv + reg_def CCR3(SOC, SOC, Op_RegFlags, 3, CCR3->as_VMReg()); // nv + reg_def CCR4(SOC, SOC, Op_RegFlags, 4, CCR4->as_VMReg()); // nv + reg_def CCR5(SOC, SOC, Op_RegFlags, 5, CCR5->as_VMReg()); // v + reg_def CCR6(SOC, SOC, Op_RegFlags, 6, CCR6->as_VMReg()); // v + reg_def CCR7(SOC, SOC, Op_RegFlags, 7, CCR7->as_VMReg()); // v + + // Special registers of PPC64 + + reg_def SR_XER( SOC, SOC, Op_RegP, 0, SR_XER->as_VMReg()); // v + reg_def SR_LR( SOC, SOC, Op_RegP, 1, SR_LR->as_VMReg()); // v + reg_def SR_CTR( SOC, SOC, Op_RegP, 2, SR_CTR->as_VMReg()); // v + reg_def SR_VRSAVE( SOC, SOC, Op_RegP, 3, SR_VRSAVE->as_VMReg()); // v + reg_def SR_SPEFSCR(SOC, SOC, Op_RegP, 4, SR_SPEFSCR->as_VMReg()); // v + reg_def SR_PPR( SOC, SOC, Op_RegP, 5, SR_PPR->as_VMReg()); // v + + +// ---------------------------- +// Specify priority of register selection within phases of register +// allocation. Highest priority is first. A useful heuristic is to +// give registers a low priority when they are required by machine +// instructions, like EAX and EDX on I486, and choose no-save registers +// before save-on-call, & save-on-call before save-on-entry. Registers +// which participate in fixed calling sequences should come last. +// Registers which are used as pairs must fall on an even boundary. + +// It's worth about 1% on SPEC geomean to get this right. + +// Chunk0, chunk1, and chunk2 form the MachRegisterNumbers enumeration +// in adGlobals_ppc64.hpp which defines the <register>_num values, e.g. +// R3_num. Therefore, R3_num may not be (and in reality is not) +// the same as R3->encoding()! Furthermore, we cannot make any +// assumptions on ordering, e.g. R3_num may be less than R2_num. +// Additionally, the function +// static enum RC rc_class(OptoReg::Name reg ) +// maps a given <register>_num value to its chunk type (except for flags) +// and its current implementation relies on chunk0 and chunk1 having a +// size of 64 each. + +// If you change this allocation class, please have a look at the +// default values for the parameters RoundRobinIntegerRegIntervalStart +// and RoundRobinFloatRegIntervalStart + +alloc_class chunk0 ( + // Chunk0 contains *all* 64 integer registers halves. + + // "non-volatile" registers + R14, R14_H, + R15, R15_H, + R17, R17_H, + R18, R18_H, + R19, R19_H, + R20, R20_H, + R21, R21_H, + R22, R22_H, + R23, R23_H, + R24, R24_H, + R25, R25_H, + R26, R26_H, + R27, R27_H, + R28, R28_H, + R29, R29_H, + R30, R30_H, + R31, R31_H, + + // scratch/special registers + R11, R11_H, + R12, R12_H, + + // argument registers + R10, R10_H, + R9, R9_H, + R8, R8_H, + R7, R7_H, + R6, R6_H, + R5, R5_H, + R4, R4_H, + R3, R3_H, + + // special registers, not available for allocation + R16, R16_H, // R16_thread + R13, R13_H, // system thread id + R2, R2_H, // may be used for TOC + R1, R1_H, // SP + R0, R0_H // R0 (scratch) +); + +// If you change this allocation class, please have a look at the +// default values for the parameters RoundRobinIntegerRegIntervalStart +// and RoundRobinFloatRegIntervalStart + +alloc_class chunk1 ( + // Chunk1 contains *all* 64 floating-point registers halves. + + // scratch register + F0, F0_H, + + // argument registers + F13, F13_H, + F12, F12_H, + F11, F11_H, + F10, F10_H, + F9, F9_H, + F8, F8_H, + F7, F7_H, + F6, F6_H, + F5, F5_H, + F4, F4_H, + F3, F3_H, + F2, F2_H, + F1, F1_H, + + // non-volatile registers + F14, F14_H, + F15, F15_H, + F16, F16_H, + F17, F17_H, + F18, F18_H, + F19, F19_H, + F20, F20_H, + F21, F21_H, + F22, F22_H, + F23, F23_H, + F24, F24_H, + F25, F25_H, + F26, F26_H, + F27, F27_H, + F28, F28_H, + F29, F29_H, + F30, F30_H, + F31, F31_H +); + +alloc_class chunk2 ( + // Chunk2 contains *all* 8 condition code registers. + + CCR0, + CCR1, + CCR2, + CCR3, + CCR4, + CCR5, + CCR6, + CCR7 +); + +alloc_class chunk3 ( + // special registers + // These registers are not allocated, but used for nodes generated by postalloc expand. + SR_XER, + SR_LR, + SR_CTR, + SR_VRSAVE, + SR_SPEFSCR, + SR_PPR +); + +//-------Architecture Description Register Classes----------------------- + +// Several register classes are automatically defined based upon +// information in this architecture description. + +// 1) reg_class inline_cache_reg ( as defined in frame section ) +// 2) reg_class compiler_method_oop_reg ( as defined in frame section ) +// 2) reg_class interpreter_method_oop_reg ( as defined in frame section ) +// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) +// + +// ---------------------------- +// 32 Bit Register Classes +// ---------------------------- + +// We specify registers twice, once as read/write, and once read-only. +// We use the read-only registers for source operands. With this, we +// can include preset read only registers in this class, as a hard-coded +// '0'-register. (We used to simulate this on ppc.) + +// 32 bit registers that can be read and written i.e. these registers +// can be dest (or src) of normal instructions. +reg_class bits32_reg_rw( +/*R0*/ // R0 +/*R1*/ // SP + R2, // TOC + R3, + R4, + R5, + R6, + R7, + R8, + R9, + R10, + R11, + R12, +/*R13*/ // system thread id + R14, + R15, +/*R16*/ // R16_thread + R17, + R18, + R19, + R20, + R21, + R22, + R23, + R24, + R25, + R26, + R27, + R28, +/*R29*/ // global TOC +/*R30*/ // Narrow Oop Base + R31 +); + +// 32 bit registers that can only be read i.e. these registers can +// only be src of all instructions. +reg_class bits32_reg_ro( +/*R0*/ // R0 +/*R1*/ // SP + R2 // TOC + R3, + R4, + R5, + R6, + R7, + R8, + R9, + R10, + R11, + R12, +/*R13*/ // system thread id + R14, + R15, +/*R16*/ // R16_thread + R17, + R18, + R19, + R20, + R21, + R22, + R23, + R24, + R25, + R26, + R27, + R28, +/*R29*/ +/*R30*/ // Narrow Oop Base + R31 +); + +// Complement-required-in-pipeline operands for narrow oops. +reg_class bits32_reg_ro_not_complement ( +/*R0*/ // R0 + R1, // SP + R2, // TOC + R3, + R4, + R5, + R6, + R7, + R8, + R9, + R10, + R11, + R12, +/*R13,*/ // system thread id + R14, + R15, + R16, // R16_thread + R17, + R18, + R19, + R20, + R21, + R22, +/*R23, + R24, + R25, + R26, + R27, + R28,*/ +/*R29,*/ // TODO: let allocator handle TOC!! +/*R30,*/ + R31 +); + +// Complement-required-in-pipeline operands for narrow oops. +// See 64-bit declaration. +reg_class bits32_reg_ro_complement ( + R23, + R24, + R25, + R26, + R27, + R28 +); + +reg_class rscratch1_bits32_reg(R11); +reg_class rscratch2_bits32_reg(R12); +reg_class rarg1_bits32_reg(R3); +reg_class rarg2_bits32_reg(R4); +reg_class rarg3_bits32_reg(R5); +reg_class rarg4_bits32_reg(R6); + +// ---------------------------- +// 64 Bit Register Classes +// ---------------------------- +// 64-bit build means 64-bit pointers means hi/lo pairs + +reg_class rscratch1_bits64_reg(R11_H, R11); +reg_class rscratch2_bits64_reg(R12_H, R12); +reg_class rarg1_bits64_reg(R3_H, R3); +reg_class rarg2_bits64_reg(R4_H, R4); +reg_class rarg3_bits64_reg(R5_H, R5); +reg_class rarg4_bits64_reg(R6_H, R6); +// Thread register, 'written' by tlsLoadP, see there. +reg_class thread_bits64_reg(R16_H, R16); + +reg_class r19_bits64_reg(R19_H, R19); + +// 64 bit registers that can be read and written i.e. these registers +// can be dest (or src) of normal instructions. +reg_class bits64_reg_rw( +/*R0_H, R0*/ // R0 +/*R1_H, R1*/ // SP + R2_H, R2, // TOC + R3_H, R3, + R4_H, R4, + R5_H, R5, + R6_H, R6, + R7_H, R7, + R8_H, R8, + R9_H, R9, + R10_H, R10, + R11_H, R11, + R12_H, R12, +/*R13_H, R13*/ // system thread id + R14_H, R14, + R15_H, R15, +/*R16_H, R16*/ // R16_thread + R17_H, R17, + R18_H, R18, + R19_H, R19, + R20_H, R20, + R21_H, R21, + R22_H, R22, + R23_H, R23, + R24_H, R24, + R25_H, R25, + R26_H, R26, + R27_H, R27, + R28_H, R28, +/*R29_H, R29*/ +/*R30_H, R30*/ + R31_H, R31 +); + +// 64 bit registers used excluding r2, r11 and r12 +// Used to hold the TOC to avoid collisions with expanded LeafCall which uses +// r2, r11 and r12 internally. +reg_class bits64_reg_leaf_call( +/*R0_H, R0*/ // R0 +/*R1_H, R1*/ // SP +/*R2_H, R2*/ // TOC + R3_H, R3, + R4_H, R4, + R5_H, R5, + R6_H, R6, + R7_H, R7, + R8_H, R8, + R9_H, R9, + R10_H, R10, +/*R11_H, R11*/ +/*R12_H, R12*/ +/*R13_H, R13*/ // system thread id + R14_H, R14, + R15_H, R15, +/*R16_H, R16*/ // R16_thread + R17_H, R17, + R18_H, R18, + R19_H, R19, + R20_H, R20, + R21_H, R21, + R22_H, R22, + R23_H, R23, + R24_H, R24, + R25_H, R25, + R26_H, R26, + R27_H, R27, + R28_H, R28, +/*R29_H, R29*/ +/*R30_H, R30*/ + R31_H, R31 +); + +// 64 bit registers used excluding r19. +// Used to hold the TOC to avoid collisions with expanded DynamicCall +// which uses r19 as inline cache internally and expanded LeafCall which uses +// r2, r11 and r12 internally. +reg_class bits64_constant_table_base( +/*R0_H, R0*/ // R0 +/*R1_H, R1*/ // SP +/*R2_H, R2*/ // TOC + R3_H, R3, + R4_H, R4, + R5_H, R5, + R6_H, R6, + R7_H, R7, + R8_H, R8, + R9_H, R9, + R10_H, R10, +/*R11_H, R11*/ +/*R12_H, R12*/ +/*R13_H, R13*/ // system thread id + R14_H, R14, + R15_H, R15, +/*R16_H, R16*/ // R16_thread + R17_H, R17, + R18_H, R18, +/*R19_H, R19*/ + R20_H, R20, + R21_H, R21, + R22_H, R22, + R23_H, R23, + R24_H, R24, + R25_H, R25, + R26_H, R26, + R27_H, R27, + R28_H, R28, +/*R29_H, R29*/ +/*R30_H, R30*/ + R31_H, R31 +); + +// 64 bit registers that can only be read i.e. these registers can +// only be src of all instructions. +reg_class bits64_reg_ro( +/*R0_H, R0*/ // R0 + R1_H, R1, + R2_H, R2, // TOC + R3_H, R3, + R4_H, R4, + R5_H, R5, + R6_H, R6, + R7_H, R7, + R8_H, R8, + R9_H, R9, + R10_H, R10, + R11_H, R11, + R12_H, R12, +/*R13_H, R13*/ // system thread id + R14_H, R14, + R15_H, R15, + R16_H, R16, // R16_thread + R17_H, R17, + R18_H, R18, + R19_H, R19, + R20_H, R20, + R21_H, R21, + R22_H, R22, + R23_H, R23, + R24_H, R24, + R25_H, R25, + R26_H, R26, + R27_H, R27, + R28_H, R28, +/*R29_H, R29*/ // TODO: let allocator handle TOC!! +/*R30_H, R30,*/ + R31_H, R31 +); + +// Complement-required-in-pipeline operands. +reg_class bits64_reg_ro_not_complement ( +/*R0_H, R0*/ // R0 + R1_H, R1, // SP + R2_H, R2, // TOC + R3_H, R3, + R4_H, R4, + R5_H, R5, + R6_H, R6, + R7_H, R7, + R8_H, R8, + R9_H, R9, + R10_H, R10, + R11_H, R11, + R12_H, R12, +/*R13_H, R13*/ // system thread id + R14_H, R14, + R15_H, R15, + R16_H, R16, // R16_thread + R17_H, R17, + R18_H, R18, + R19_H, R19, + R20_H, R20, + R21_H, R21, + R22_H, R22, +/*R23_H, R23, + R24_H, R24, + R25_H, R25, + R26_H, R26, + R27_H, R27, + R28_H, R28,*/ +/*R29_H, R29*/ // TODO: let allocator handle TOC!! +/*R30_H, R30,*/ + R31_H, R31 +); + +// Complement-required-in-pipeline operands. +// This register mask is used for the trap instructions that implement +// the null checks on AIX. The trap instruction first computes the +// complement of the value it shall trap on. Because of this, the +// instruction can not be scheduled in the same cycle as an other +// instruction reading the normal value of the same register. So we +// force the value to check into 'bits64_reg_ro_not_complement' +// and then copy it to 'bits64_reg_ro_complement' for the trap. +reg_class bits64_reg_ro_complement ( + R23_H, R23, + R24_H, R24, + R25_H, R25, + R26_H, R26, + R27_H, R27, + R28_H, R28 +); + + +// ---------------------------- +// Special Class for Condition Code Flags Register + +reg_class int_flags( +/*CCR0*/ // scratch +/*CCR1*/ // scratch +/*CCR2*/ // nv! +/*CCR3*/ // nv! +/*CCR4*/ // nv! + CCR5, + CCR6, + CCR7 +); + +reg_class int_flags_CR0(CCR0); +reg_class int_flags_CR1(CCR1); +reg_class int_flags_CR6(CCR6); +reg_class ctr_reg(SR_CTR); + +// ---------------------------- +// Float Register Classes +// ---------------------------- + +reg_class flt_reg( +/*F0*/ // scratch + F1, + F2, + F3, + F4, + F5, + F6, + F7, + F8, + F9, + F10, + F11, + F12, + F13, + F14, // nv! + F15, // nv! + F16, // nv! + F17, // nv! + F18, // nv! + F19, // nv! + F20, // nv! + F21, // nv! + F22, // nv! + F23, // nv! + F24, // nv! + F25, // nv! + F26, // nv! + F27, // nv! + F28, // nv! + F29, // nv! + F30, // nv! + F31 // nv! +); + +// Double precision float registers have virtual `high halves' that +// are needed by the allocator. +reg_class dbl_reg( +/*F0, F0_H*/ // scratch + F1, F1_H, + F2, F2_H, + F3, F3_H, + F4, F4_H, + F5, F5_H, + F6, F6_H, + F7, F7_H, + F8, F8_H, + F9, F9_H, + F10, F10_H, + F11, F11_H, + F12, F12_H, + F13, F13_H, + F14, F14_H, // nv! + F15, F15_H, // nv! + F16, F16_H, // nv! + F17, F17_H, // nv! + F18, F18_H, // nv! + F19, F19_H, // nv! + F20, F20_H, // nv! + F21, F21_H, // nv! + F22, F22_H, // nv! + F23, F23_H, // nv! + F24, F24_H, // nv! + F25, F25_H, // nv! + F26, F26_H, // nv! + F27, F27_H, // nv! + F28, F28_H, // nv! + F29, F29_H, // nv! + F30, F30_H, // nv! + F31, F31_H // nv! +); + + %} + +//----------DEFINITION BLOCK--------------------------------------------------- +// Define name --> value mappings to inform the ADLC of an integer valued name +// Current support includes integer values in the range [0, 0x7FFFFFFF] +// Format: +// int_def <name> ( <int_value>, <expression>); +// Generated Code in ad_<arch>.hpp +// #define <name> (<expression>) +// // value == <int_value> +// Generated code in ad_<arch>.cpp adlc_verification() +// assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>"); +// +definitions %{ + // The default cost (of an ALU instruction). + int_def DEFAULT_COST_LOW ( 30, 30); + int_def DEFAULT_COST ( 100, 100); + int_def HUGE_COST (1000000, 1000000); + + // Memory refs + int_def MEMORY_REF_COST_LOW ( 200, DEFAULT_COST * 2); + int_def MEMORY_REF_COST ( 300, DEFAULT_COST * 3); + + // Branches are even more expensive. + int_def BRANCH_COST ( 900, DEFAULT_COST * 9); + int_def CALL_COST ( 1300, DEFAULT_COST * 13); +%} + + +//----------SOURCE BLOCK------------------------------------------------------- +// This is a block of C++ code which provides values, functions, and +// definitions necessary in the rest of the architecture description. +source_hpp %{ + // Returns true if Node n is followed by a MemBar node that + // will do an acquire. If so, this node must not do the acquire + // operation. + bool followed_by_acquire(const Node *n); +%} + +source %{ + +// Optimize load-acquire. +// +// Check if acquire is unnecessary due to following operation that does +// acquire anyways. +// Walk the pattern: +// +// n: Load.acq +// | +// MemBarAcquire +// | | +// Proj(ctrl) Proj(mem) +// | | +// MemBarRelease/Volatile +// +bool followed_by_acquire(const Node *load) { + assert(load->is_Load(), "So far implemented only for loads."); + + // Find MemBarAcquire. + const Node *mba = NULL; + for (DUIterator_Fast imax, i = load->fast_outs(imax); i < imax; i++) { + const Node *out = load->fast_out(i); + if (out->Opcode() == Op_MemBarAcquire) { + if (out->in(0) == load) continue; // Skip control edge, membar should be found via precedence edge. + mba = out; + break; + } + } + if (!mba) return false; + + // Find following MemBar node. + // + // The following node must be reachable by control AND memory + // edge to assure no other operations are in between the two nodes. + // + // So first get the Proj node, mem_proj, to use it to iterate forward. + Node *mem_proj = NULL; + for (DUIterator_Fast imax, i = mba->fast_outs(imax); i < imax; i++) { + mem_proj = mba->fast_out(i); // Throw out-of-bounds if proj not found + assert(mem_proj->is_Proj(), "only projections here"); + ProjNode *proj = mem_proj->as_Proj(); + if (proj->_con == TypeFunc::Memory && + !Compile::current()->node_arena()->contains(mem_proj)) // Unmatched old-space only + break; + } + assert(mem_proj->as_Proj()->_con == TypeFunc::Memory, "Graph broken"); + + // Search MemBar behind Proj. If there are other memory operations + // behind the Proj we lost. + for (DUIterator_Fast jmax, j = mem_proj->fast_outs(jmax); j < jmax; j++) { + Node *x = mem_proj->fast_out(j); + // Proj might have an edge to a store or load node which precedes the membar. + if (x->is_Mem()) return false; + + // On PPC64 release and volatile are implemented by an instruction + // that also has acquire semantics. I.e. there is no need for an + // acquire before these. + int xop = x->Opcode(); + if (xop == Op_MemBarRelease || xop == Op_MemBarVolatile) { + // Make sure we're not missing Call/Phi/MergeMem by checking + // control edges. The control edge must directly lead back + // to the MemBarAcquire + Node *ctrl_proj = x->in(0); + if (ctrl_proj->is_Proj() && ctrl_proj->in(0) == mba) { + return true; + } + } + } + + return false; +} + +#define __ _masm. + +// Tertiary op of a LoadP or StoreP encoding. +#define REGP_OP true + +// **************************************************************************** + +// REQUIRED FUNCTIONALITY + +// !!!!! Special hack to get all type of calls to specify the byte offset +// from the start of the call to the point where the return address +// will point. + +// PPC port: Removed use of lazy constant construct. + +int MachCallStaticJavaNode::ret_addr_offset() { + // It's only a single branch-and-link instruction. + return 4; +} + +int MachCallDynamicJavaNode::ret_addr_offset() { + // Offset is 4 with postalloc expanded calls (bl is one instruction). We use + // postalloc expanded calls if we use inline caches and do not update method data. + if (UseInlineCaches) + return 4; + + int vtable_index = this->_vtable_index; + if (vtable_index < 0) { + // Must be invalid_vtable_index, not nonvirtual_vtable_index. + assert(vtable_index == Method::invalid_vtable_index, "correct sentinel value"); + return 12; + } else { + assert(!UseInlineCaches, "expect vtable calls only if not using ICs"); + return 24; + } +} + +int MachCallRuntimeNode::ret_addr_offset() { + return 40; +} + +//============================================================================= + +// condition code conversions + +static int cc_to_boint(int cc) { + return Assembler::bcondCRbiIs0 | (cc & 8); +} + +static int cc_to_inverse_boint(int cc) { + return Assembler::bcondCRbiIs0 | (8-(cc & 8)); +} + +static int cc_to_biint(int cc, int flags_reg) { + return (flags_reg << 2) | (cc & 3); +} + +//============================================================================= + +// Compute padding required for nodes which need alignment. The padding +// is the number of bytes (not instructions) which will be inserted before +// the instruction. The padding must match the size of a NOP instruction. + +int string_indexOf_imm1_charNode::compute_padding(int current_offset) const { + return (3*4-current_offset)&31; +} + +int string_indexOf_imm1Node::compute_padding(int current_offset) const { + return (2*4-current_offset)&31; +} + +int string_indexOf_immNode::compute_padding(int current_offset) const { + return (3*4-current_offset)&31; +} + +int string_indexOfNode::compute_padding(int current_offset) const { + return (1*4-current_offset)&31; +} + +int string_compareNode::compute_padding(int current_offset) const { + return (4*4-current_offset)&31; +} + +int string_equals_immNode::compute_padding(int current_offset) const { + if (opnd_array(3)->constant() < 16) return 0; // Don't insert nops for short version (loop completely unrolled). + return (2*4-current_offset)&31; +} + +int string_equalsNode::compute_padding(int current_offset) const { + return (7*4-current_offset)&31; +} + +int inlineCallClearArrayNode::compute_padding(int current_offset) const { + return (2*4-current_offset)&31; +} + +//============================================================================= + +// Indicate if the safepoint node needs the polling page as an input. +bool SafePointNode::needs_polling_address_input() { + // The address is loaded from thread by a seperate node. + return true; +} + +//============================================================================= + +// Emit an interrupt that is caught by the debugger (for debugging compiler). +void emit_break(CodeBuffer &cbuf) { + MacroAssembler _masm(&cbuf); + __ illtrap(); +} + +#ifndef PRODUCT +void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + st->print("BREAKPOINT"); +} +#endif + +void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + emit_break(cbuf); +} + +uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + +//============================================================================= + +void emit_nop(CodeBuffer &cbuf) { + MacroAssembler _masm(&cbuf); + __ nop(); +} + +static inline void emit_long(CodeBuffer &cbuf, int value) { + *((int*)(cbuf.insts_end())) = value; + cbuf.set_insts_end(cbuf.insts_end() + BytesPerInstWord); +} + +//============================================================================= + +// Emit a trampoline stub for a call to a target which is too far away. +// +// code sequences: +// +// call-site: +// branch-and-link to <destination> or <trampoline stub> +// +// Related trampoline stub for this call-site in the stub section: +// load the call target from the constant pool +// branch via CTR (LR/link still points to the call-site above) + +const uint trampoline_stub_size = 6 * BytesPerInstWord; + +void emit_trampoline_stub(MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset) { + // Start the stub. + address stub = __ start_a_stub(Compile::MAX_stubs_size/2); + if (stub == NULL) { + Compile::current()->env()->record_out_of_memory_failure(); + return; + } + + // For java_to_interp stubs we use R11_scratch1 as scratch register + // and in call trampoline stubs we use R12_scratch2. This way we + // can distinguish them (see is_NativeCallTrampolineStub_at()). + Register reg_scratch = R12_scratch2; + + // Create a trampoline stub relocation which relates this trampoline stub + // with the call instruction at insts_call_instruction_offset in the + // instructions code-section. + __ relocate(trampoline_stub_Relocation::spec(__ code()->insts()->start() + insts_call_instruction_offset)); + const int stub_start_offset = __ offset(); + + // Now, create the trampoline stub's code: + // - load the TOC + // - load the call target from the constant pool + // - call + __ calculate_address_from_global_toc(reg_scratch, __ method_toc()); + __ ld_largeoffset_unchecked(reg_scratch, destination_toc_offset, reg_scratch, false); + __ mtctr(reg_scratch); + __ bctr(); + + const address stub_start_addr = __ addr_at(stub_start_offset); + + // FIXME: Assert that the trampoline stub can be identified and patched. + + // Assert that the encoded destination_toc_offset can be identified and that it is correct. + assert(destination_toc_offset == NativeCallTrampolineStub_at(stub_start_addr)->destination_toc_offset(), + "encoded offset into the constant pool must match"); + // Trampoline_stub_size should be good. + assert((uint)(__ offset() - stub_start_offset) <= trampoline_stub_size, "should be good size"); + assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline"); + + // End the stub. + __ end_a_stub(); +} + +// Size of trampoline stub, this doesn't need to be accurate but it must +// be larger or equal to the real size of the stub. +// Used for optimization in Compile::Shorten_branches. +uint size_call_trampoline() { + return trampoline_stub_size; +} + +// Number of relocation entries needed by trampoline stub. +// Used for optimization in Compile::Shorten_branches. +uint reloc_call_trampoline() { + return 5; +} + +//============================================================================= + +// Emit an inline branch-and-link call and a related trampoline stub. +// +// code sequences: +// +// call-site: +// branch-and-link to <destination> or <trampoline stub> +// +// Related trampoline stub for this call-site in the stub section: +// load the call target from the constant pool +// branch via CTR (LR/link still points to the call-site above) +// + +typedef struct { + int insts_call_instruction_offset; + int ret_addr_offset; +} EmitCallOffsets; + +// Emit a branch-and-link instruction that branches to a trampoline. +// - Remember the offset of the branch-and-link instruction. +// - Add a relocation at the branch-and-link instruction. +// - Emit a branch-and-link. +// - Remember the return pc offset. +EmitCallOffsets emit_call_with_trampoline_stub(MacroAssembler &_masm, address entry_point, relocInfo::relocType rtype) { + EmitCallOffsets offsets = { -1, -1 }; + const int start_offset = __ offset(); + offsets.insts_call_instruction_offset = __ offset(); + + // No entry point given, use the current pc. + if (entry_point == NULL) entry_point = __ pc(); + + if (!Compile::current()->in_scratch_emit_size()) { + // Put the entry point as a constant into the constant pool. + const address entry_point_toc_addr = __ address_constant(entry_point, RelocationHolder::none); + const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr); + + // Emit the trampoline stub which will be related to the branch-and-link below. + emit_trampoline_stub(_masm, entry_point_toc_offset, offsets.insts_call_instruction_offset); + __ relocate(rtype); + } + + // Note: At this point we do not have the address of the trampoline + // stub, and the entry point might be too far away for bl, so __ pc() + // serves as dummy and the bl will be patched later. + __ bl((address) __ pc()); + + offsets.ret_addr_offset = __ offset() - start_offset; + + return offsets; +} + +//============================================================================= + +// Factory for creating loadConL* nodes for large/small constant pool. + +static inline jlong replicate_immF(float con) { + // Replicate float con 2 times and pack into vector. + int val = *((int*)&con); + jlong lval = val; + lval = (lval << 32) | (lval & 0xFFFFFFFFl); + return lval; +} + +//============================================================================= + +const RegMask& MachConstantBaseNode::_out_RegMask = BITS64_CONSTANT_TABLE_BASE_mask(); +int Compile::ConstantTable::calculate_table_base_offset() const { + return 0; // absolute addressing, no offset +} + +bool MachConstantBaseNode::requires_postalloc_expand() const { return true; } +void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { + Compile *C = ra_->C; + + iRegPdstOper *op_dst = new (C) iRegPdstOper(); + MachNode *m1 = new (C) loadToc_hiNode(); + MachNode *m2 = new (C) loadToc_loNode(); + + m1->add_req(NULL); + m2->add_req(NULL, m1); + m1->_opnds[0] = op_dst; + m2->_opnds[0] = op_dst; + m2->_opnds[1] = op_dst; + ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + nodes->push(m1); + nodes->push(m2); +} + +void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { + // Is postalloc expanded. + ShouldNotReachHere(); +} + +uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { + return 0; +} + +#ifndef PRODUCT +void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { + st->print("-- \t// MachConstantBaseNode (empty encoding)"); +} +#endif + +//============================================================================= + +#ifndef PRODUCT +void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + Compile* C = ra_->C; + const long framesize = C->frame_slots() << LogBytesPerInt; + + st->print("PROLOG\n\t"); + if (C->need_stack_bang(framesize)) { + st->print("stack_overflow_check\n\t"); + } + + if (!false /* TODO: PPC port C->is_frameless_method()*/) { + st->print("save return pc\n\t"); + st->print("push frame %d\n\t", -framesize); + } +} +#endif + +// Macro used instead of the common __ to emulate the pipes of PPC. +// Instead of e.g. __ ld(...) one hase to write ___(ld) ld(...) This enables the +// micro scheduler to cope with "hand written" assembler like in the prolog. Though +// still no scheduling of this code is possible, the micro scheduler is aware of the +// code and can update its internal data. The following mechanism is used to achieve this: +// The micro scheduler calls size() of each compound node during scheduling. size() does a +// dummy emit and only during this dummy emit C->hb_scheduling() is not NULL. +#if 0 // TODO: PPC port +#define ___(op) if (UsePower6SchedulerPPC64 && C->hb_scheduling()) \ + C->hb_scheduling()->_pdScheduling->PdEmulatePipe(ppc64Opcode_##op); \ + _masm. +#define ___stop if (UsePower6SchedulerPPC64 && C->hb_scheduling()) \ + C->hb_scheduling()->_pdScheduling->PdEmulatePipe(archOpcode_none) +#define ___advance if (UsePower6SchedulerPPC64 && C->hb_scheduling()) \ + C->hb_scheduling()->_pdScheduling->advance_offset +#else +#define ___(op) if (UsePower6SchedulerPPC64) \ + Unimplemented(); \ + _masm. +#define ___stop if (UsePower6SchedulerPPC64) \ + Unimplemented() +#define ___advance if (UsePower6SchedulerPPC64) \ + Unimplemented() +#endif + +void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + Compile* C = ra_->C; + MacroAssembler _masm(&cbuf); + + const long framesize = ((long)C->frame_slots()) << LogBytesPerInt; + assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment"); + + const bool method_is_frameless = false /* TODO: PPC port C->is_frameless_method()*/; + + const Register return_pc = R20; // Must match return_addr() in frame section. + const Register callers_sp = R21; + const Register push_frame_temp = R22; + const Register toc_temp = R23; + assert_different_registers(R11, return_pc, callers_sp, push_frame_temp, toc_temp); + + if (method_is_frameless) { + // Add nop at beginning of all frameless methods to prevent any + // oop instructions from getting overwritten by make_not_entrant + // (patching attempt would fail). + ___(nop) nop(); + } else { + // Get return pc. + ___(mflr) mflr(return_pc); + } + + // Calls to C2R adapters often do not accept exceptional returns. + // We require that their callers must bang for them. But be + // careful, because some VM calls (such as call site linkage) can + // use several kilobytes of stack. But the stack safety zone should + // account for that. See bugs 4446381, 4468289, 4497237. + if (C->need_stack_bang(framesize) && UseStackBanging) { + // Unfortunately we cannot use the function provided in + // assembler.cpp as we have to emulate the pipes. So I had to + // insert the code of generate_stack_overflow_check(), see + // assembler.cpp for some illuminative comments. + const int page_size = os::vm_page_size(); + int bang_end = StackShadowPages*page_size; + + // This is how far the previous frame's stack banging extended. + const int bang_end_safe = bang_end; + + if (framesize > page_size) { + bang_end += framesize; + } + + int bang_offset = bang_end_safe; + + while (bang_offset <= bang_end) { + // Need at least one stack bang at end of shadow zone. + + // Again I had to copy code, this time from assembler_ppc64.cpp, + // bang_stack_with_offset - see there for comments. + + // Stack grows down, caller passes positive offset. + assert(bang_offset > 0, "must bang with positive offset"); + + long stdoffset = -bang_offset; + + if (Assembler::is_simm(stdoffset, 16)) { + // Signed 16 bit offset, a simple std is ok. + if (UseLoadInstructionsForStackBangingPPC64) { + ___(ld) ld(R0, (int)(signed short)stdoffset, R1_SP); + } else { + ___(std) std(R0, (int)(signed short)stdoffset, R1_SP); + } + } else if (Assembler::is_simm(stdoffset, 31)) { + // Use largeoffset calculations for addis & ld/std. + const int hi = MacroAssembler::largeoffset_si16_si16_hi(stdoffset); + const int lo = MacroAssembler::largeoffset_si16_si16_lo(stdoffset); + + Register tmp = R11; + ___(addis) addis(tmp, R1_SP, hi); + if (UseLoadInstructionsForStackBangingPPC64) { + ___(ld) ld(R0, lo, tmp); + } else { + ___(std) std(R0, lo, tmp); + } + } else { + ShouldNotReachHere(); + } + + bang_offset += page_size; + } + // R11 trashed + } // C->need_stack_bang(framesize) && UseStackBanging + + unsigned int bytes = (unsigned int)framesize; + long offset = Assembler::align_addr(bytes, frame::alignment_in_bytes); + ciMethod *currMethod = C -> method(); + + // Optimized version for most common case. + if (UsePower6SchedulerPPC64 && + !method_is_frameless && Assembler::is_simm((int)(-(_abi(lr) + offset)), 16) && + !(false /* ConstantsALot TODO: PPC port*/)) { + ___(or) mr(callers_sp, R1_SP); + ___(addi) addi(R1_SP, R1_SP, -offset); + ___stop; // Emulator won't recognize dependency. + ___(std) std(return_pc, _abi(lr) + offset, R1_SP); + ___(std) std(callers_sp, 0, R1_SP); + return; + } + + if (!method_is_frameless) { + // Get callers sp. + ___(or) mr(callers_sp, R1_SP); + + // Push method's frame, modifies SP. + assert(Assembler::is_uimm(framesize, 32U), "wrong type"); + // The ABI is already accounted for in 'framesize' via the + // 'out_preserve' area. + Register tmp = push_frame_temp; + // Had to insert code of push_frame((unsigned int)framesize, push_frame_temp). + if (Assembler::is_simm(-offset, 16)) { + ___(stdu) stdu(R1_SP, -offset, R1_SP); + } else { + long x = -offset; + // Had to insert load_const(tmp, -offset). + ___(addis) lis( tmp, (int)((signed short)(((x >> 32) & 0xffff0000) >> 16))); + ___(ori) ori( tmp, tmp, ((x >> 32) & 0x0000ffff)); + ___(rldicr) sldi(tmp, tmp, 32); + ___(oris) oris(tmp, tmp, (x & 0xffff0000) >> 16); + ___(ori) ori( tmp, tmp, (x & 0x0000ffff)); + + ___(stdux) stdux(R1_SP, R1_SP, tmp); + } + } +#if 0 // TODO: PPC port + // For testing large constant pools, emit a lot of constants to constant pool. + // "Randomize" const_size. + if (ConstantsALot) { + const int num_consts = const_size(); + for (int i = 0; i < num_consts; i++) { + __ long_constant(0xB0B5B00BBABE); + } + } +#endif + if (!method_is_frameless) { + // Save return pc. + ___(std) std(return_pc, _abi(lr), callers_sp); + } +} +#undef ___ +#undef ___stop + +uint MachPrologNode::size(PhaseRegAlloc *ra_) const { + // Variable size. determine dynamically. + return MachNode::size(ra_); +} + +int MachPrologNode::reloc() const { + // Return number of relocatable values contained in this instruction. + return 1; // 1 reloc entry for load_const(toc). +} + +//============================================================================= + +#ifndef PRODUCT +void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + Compile* C = ra_->C; + + st->print("EPILOG\n\t"); + st->print("restore return pc\n\t"); + st->print("pop frame\n\t"); + + if (do_polling() && C->is_method_compilation()) { + st->print("touch polling page\n\t"); + } +} +#endif + +void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + Compile* C = ra_->C; + MacroAssembler _masm(&cbuf); + + const long framesize = ((long)C->frame_slots()) << LogBytesPerInt; + assert(framesize >= 0, "negative frame-size?"); + + const bool method_needs_polling = do_polling() && C->is_method_compilation(); + const bool method_is_frameless = false /* TODO: PPC port C->is_frameless_method()*/; + const Register return_pc = R11; + const Register polling_page = R12; + + if (!method_is_frameless) { + // Restore return pc relative to callers' sp. + __ ld(return_pc, ((int)framesize) + _abi(lr), R1_SP); + } + + if (method_needs_polling) { + if (LoadPollAddressFromThread) { + // TODO: PPC port __ ld(polling_page, in_bytes(JavaThread::poll_address_offset()), R16_thread); + Unimplemented(); + } else { + __ load_const_optimized(polling_page, (long)(address) os::get_polling_page()); // TODO: PPC port: get_standard_polling_page() + } + } + + if (!method_is_frameless) { + // Move return pc to LR. + __ mtlr(return_pc); + // Pop frame (fixed frame-size). + __ addi(R1_SP, R1_SP, (int)framesize); + } + + if (method_needs_polling) { + // We need to mark the code position where the load from the safepoint + // polling page was emitted as relocInfo::poll_return_type here. + __ relocate(relocInfo::poll_return_type); + __ load_from_polling_page(polling_page); + } +} + +uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { + // Variable size. Determine dynamically. + return MachNode::size(ra_); +} + +int MachEpilogNode::reloc() const { + // Return number of relocatable values contained in this instruction. + return 1; // 1 for load_from_polling_page. +} + +const Pipeline * MachEpilogNode::pipeline() const { + return MachNode::pipeline_class(); +} + +// This method seems to be obsolete. It is declared in machnode.hpp +// and defined in all *.ad files, but it is never called. Should we +// get rid of it? +int MachEpilogNode::safepoint_offset() const { + assert(do_polling(), "no return for this epilog node"); + return 0; +} + +#if 0 // TODO: PPC port +void MachLoadPollAddrLateNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { + MacroAssembler _masm(&cbuf); + if (LoadPollAddressFromThread) { + _masm.ld(R11, in_bytes(JavaThread::poll_address_offset()), R16_thread); + } else { + _masm.nop(); + } +} + +uint MachLoadPollAddrLateNode::size(PhaseRegAlloc* ra_) const { + if (LoadPollAddressFromThread) { + return 4; + } else { + return 4; + } +} + +#ifndef PRODUCT +void MachLoadPollAddrLateNode::format(PhaseRegAlloc* ra_, outputStream* st) const { + st->print_cr(" LD R11, PollAddressOffset, R16_thread \t// LoadPollAddressFromThread"); +} +#endif + +const RegMask &MachLoadPollAddrLateNode::out_RegMask() const { + return RSCRATCH1_BITS64_REG_mask(); +} +#endif // PPC port + +// ============================================================================= + +// Figure out which register class each belongs in: rc_int, rc_float or +// rc_stack. +enum RC { rc_bad, rc_int, rc_float, rc_stack }; + +static enum RC rc_class(OptoReg::Name reg) { + // Return the register class for the given register. The given register + // reg is a <register>_num value, which is an index into the MachRegisterNumbers + // enumeration in adGlobals_ppc64.hpp. + + if (reg == OptoReg::Bad) return rc_bad; + + // We have 64 integer register halves, starting at index 0. + if (reg < 64) return rc_int; + + // We have 64 floating-point register halves, starting at index 64. + if (reg < 64+64) return rc_float; + + // Between float regs & stack are the flags regs. + assert(OptoReg::is_stack(reg), "blow up if spilling flags"); + + return rc_stack; +} + +static int ld_st_helper(CodeBuffer *cbuf, const char *op_str, uint opcode, int reg, int offset, + bool do_print, Compile* C, outputStream *st) { + + assert(opcode == Assembler::LD_OPCODE || + opcode == Assembler::STD_OPCODE || + opcode == Assembler::LWZ_OPCODE || + opcode == Assembler::STW_OPCODE || + opcode == Assembler::LFD_OPCODE || + opcode == Assembler::STFD_OPCODE || + opcode == Assembler::LFS_OPCODE || + opcode == Assembler::STFS_OPCODE, + "opcode not supported"); + + if (cbuf) { + int d = + (Assembler::LD_OPCODE == opcode || Assembler::STD_OPCODE == opcode) ? + Assembler::ds(offset+0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/) + : Assembler::d1(offset+0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/); // Makes no difference in opt build. + emit_long(*cbuf, opcode | Assembler::rt(Matcher::_regEncode[reg]) | d | Assembler::ra(R1_SP)); + } +#ifndef PRODUCT + else if (do_print) { + st->print("%-7s %s, [R1_SP + #%d+%d] \t// spill copy", + op_str, + Matcher::regName[reg], + offset, 0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/); + } +#endif + return 4; // size +} + +uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const { + Compile* C = ra_->C; + + // Get registers to move. + OptoReg::Name src_hi = ra_->get_reg_second(in(1)); + OptoReg::Name src_lo = ra_->get_reg_first(in(1)); + OptoReg::Name dst_hi = ra_->get_reg_second(this); + OptoReg::Name dst_lo = ra_->get_reg_first(this); + + enum RC src_hi_rc = rc_class(src_hi); + enum RC src_lo_rc = rc_class(src_lo); + enum RC dst_hi_rc = rc_class(dst_hi); + enum RC dst_lo_rc = rc_class(dst_lo); + + assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register"); + if (src_hi != OptoReg::Bad) + assert((src_lo&1)==0 && src_lo+1==src_hi && + (dst_lo&1)==0 && dst_lo+1==dst_hi, + "expected aligned-adjacent pairs"); + // Generate spill code! + int size = 0; + + if (src_lo == dst_lo && src_hi == dst_hi) + return size; // Self copy, no move. + + // -------------------------------------- + // Memory->Memory Spill. Use R0 to hold the value. + if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { + int src_offset = ra_->reg2offset(src_lo); + int dst_offset = ra_->reg2offset(dst_lo); + if (src_hi != OptoReg::Bad) { + assert(src_hi_rc==rc_stack && dst_hi_rc==rc_stack, + "expected same type of move for high parts"); + size += ld_st_helper(cbuf, "LD ", Assembler::LD_OPCODE, R0_num, src_offset, !do_size, C, st); + if (!cbuf && !do_size) st->print("\n\t"); + size += ld_st_helper(cbuf, "STD ", Assembler::STD_OPCODE, R0_num, dst_offset, !do_size, C, st); + } else { + size += ld_st_helper(cbuf, "LWZ ", Assembler::LWZ_OPCODE, R0_num, src_offset, !do_size, C, st); + if (!cbuf && !do_size) st->print("\n\t"); + size += ld_st_helper(cbuf, "STW ", Assembler::STW_OPCODE, R0_num, dst_offset, !do_size, C, st); + } + return size; + } + + // -------------------------------------- + // Check for float->int copy; requires a trip through memory. + if (src_lo_rc == rc_float && dst_lo_rc == rc_int) { + Unimplemented(); + } + + // -------------------------------------- + // Check for integer reg-reg copy. + if (src_lo_rc == rc_int && dst_lo_rc == rc_int) { + Register Rsrc = as_Register(Matcher::_regEncode[src_lo]); + Register Rdst = as_Register(Matcher::_regEncode[dst_lo]); + size = (Rsrc != Rdst) ? 4 : 0; + + if (cbuf) { + MacroAssembler _masm(cbuf); + if (size) { + __ mr(Rdst, Rsrc); + } + } +#ifndef PRODUCT + else if (!do_size) { + if (size) { + st->print("%-7s %s, %s \t// spill copy", "MR", Matcher::regName[dst_lo], Matcher::regName[src_lo]); + } else { + st->print("%-7s %s, %s \t// spill copy", "MR-NOP", Matcher::regName[dst_lo], Matcher::regName[src_lo]); + } + } +#endif + return size; + } + + // Check for integer store. + if (src_lo_rc == rc_int && dst_lo_rc == rc_stack) { + int dst_offset = ra_->reg2offset(dst_lo); + if (src_hi != OptoReg::Bad) { + assert(src_hi_rc==rc_int && dst_hi_rc==rc_stack, + "expected same type of move for high parts"); + size += ld_st_helper(cbuf, "STD ", Assembler::STD_OPCODE, src_lo, dst_offset, !do_size, C, st); + } else { + size += ld_st_helper(cbuf, "STW ", Assembler::STW_OPCODE, src_lo, dst_offset, !do_size, C, st); + } + return size; + } + + // Check for integer load. + if (dst_lo_rc == rc_int && src_lo_rc == rc_stack) { + int src_offset = ra_->reg2offset(src_lo); + if (src_hi != OptoReg::Bad) { + assert(dst_hi_rc==rc_int && src_hi_rc==rc_stack, + "expected same type of move for high parts"); + size += ld_st_helper(cbuf, "LD ", Assembler::LD_OPCODE, dst_lo, src_offset, !do_size, C, st); + } else { + size += ld_st_helper(cbuf, "LWZ ", Assembler::LWZ_OPCODE, dst_lo, src_offset, !do_size, C, st); + } + return size; + } + + // Check for float reg-reg copy. + if (src_lo_rc == rc_float && dst_lo_rc == rc_float) { + if (cbuf) { + MacroAssembler _masm(cbuf); + FloatRegister Rsrc = as_FloatRegister(Matcher::_regEncode[src_lo]); + FloatRegister Rdst = as_FloatRegister(Matcher::_regEncode[dst_lo]); + __ fmr(Rdst, Rsrc); + } +#ifndef PRODUCT + else if (!do_size) { + st->print("%-7s %s, %s \t// spill copy", "FMR", Matcher::regName[dst_lo], Matcher::regName[src_lo]); + } +#endif + return 4; + } + + // Check for float store. + if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) { + int dst_offset = ra_->reg2offset(dst_lo); + if (src_hi != OptoReg::Bad) { + assert(src_hi_rc==rc_float && dst_hi_rc==rc_stack, + "expected same type of move for high parts"); + size += ld_st_helper(cbuf, "STFD", Assembler::STFD_OPCODE, src_lo, dst_offset, !do_size, C, st); + } else { + size += ld_st_helper(cbuf, "STFS", Assembler::STFS_OPCODE, src_lo, dst_offset, !do_size, C, st); + } + return size; + } + + // Check for float load. + if (dst_lo_rc == rc_float && src_lo_rc == rc_stack) { + int src_offset = ra_->reg2offset(src_lo); + if (src_hi != OptoReg::Bad) { + assert(dst_hi_rc==rc_float && src_hi_rc==rc_stack, + "expected same type of move for high parts"); + size += ld_st_helper(cbuf, "LFD ", Assembler::LFD_OPCODE, dst_lo, src_offset, !do_size, C, st); + } else { + size += ld_st_helper(cbuf, "LFS ", Assembler::LFS_OPCODE, dst_lo, src_offset, !do_size, C, st); + } + return size; + } + + // -------------------------------------------------------------------- + // Check for hi bits still needing moving. Only happens for misaligned + // arguments to native calls. + if (src_hi == dst_hi) + return size; // Self copy; no move. + + assert(src_hi_rc != rc_bad && dst_hi_rc != rc_bad, "src_hi & dst_hi cannot be Bad"); + ShouldNotReachHere(); // Unimplemented + return 0; +} + +#ifndef PRODUCT +void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + if (!ra_) + st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx); + else + implementation(NULL, ra_, false, st); +} +#endif + +void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + implementation(&cbuf, ra_, false, NULL); +} + +uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { + return implementation(NULL, ra_, true, NULL); +} + +#if 0 // TODO: PPC port +ArchOpcode MachSpillCopyNode_archOpcode(MachSpillCopyNode *n, PhaseRegAlloc *ra_) { +#ifndef PRODUCT + if (ra_->node_regs_max_index() == 0) return archOpcode_undefined; +#endif + assert(ra_->node_regs_max_index() != 0, ""); + + // Get registers to move. + OptoReg::Name src_hi = ra_->get_reg_second(n->in(1)); + OptoReg::Name src_lo = ra_->get_reg_first(n->in(1)); + OptoReg::Name dst_hi = ra_->get_reg_second(n); + OptoReg::Name dst_lo = ra_->get_reg_first(n); + + enum RC src_lo_rc = rc_class(src_lo); + enum RC dst_lo_rc = rc_class(dst_lo); + + if (src_lo == dst_lo && src_hi == dst_hi) + return ppc64Opcode_none; // Self copy, no move. + + // -------------------------------------- + // Memory->Memory Spill. Use R0 to hold the value. + if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { + return ppc64Opcode_compound; + } + + // -------------------------------------- + // Check for float->int copy; requires a trip through memory. + if (src_lo_rc == rc_float && dst_lo_rc == rc_int) { + Unimplemented(); + } + + // -------------------------------------- + // Check for integer reg-reg copy. + if (src_lo_rc == rc_int && dst_lo_rc == rc_int) { + Register Rsrc = as_Register(Matcher::_regEncode[src_lo]); + Register Rdst = as_Register(Matcher::_regEncode[dst_lo]); + if (Rsrc == Rdst) { + return ppc64Opcode_none; + } else { + return ppc64Opcode_or; + } + } + + // Check for integer store. + if (src_lo_rc == rc_int && dst_lo_rc == rc_stack) { + if (src_hi != OptoReg::Bad) { + return ppc64Opcode_std; + } else { + return ppc64Opcode_stw; + } + } + + // Check for integer load. + if (dst_lo_rc == rc_int && src_lo_rc == rc_stack) { + if (src_hi != OptoReg::Bad) { + return ppc64Opcode_ld; + } else { + return ppc64Opcode_lwz; + } + } + + // Check for float reg-reg copy. + if (src_lo_rc == rc_float && dst_lo_rc == rc_float) { + return ppc64Opcode_fmr; + } + + // Check for float store. + if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) { + if (src_hi != OptoReg::Bad) { + return ppc64Opcode_stfd; + } else { + return ppc64Opcode_stfs; + } + } + + // Check for float load. + if (dst_lo_rc == rc_float && src_lo_rc == rc_stack) { + if (src_hi != OptoReg::Bad) { + return ppc64Opcode_lfd; + } else { + return ppc64Opcode_lfs; + } + } + + // -------------------------------------------------------------------- + // Check for hi bits still needing moving. Only happens for misaligned + // arguments to native calls. + if (src_hi == dst_hi) + return ppc64Opcode_none; // Self copy; no move. + + ShouldNotReachHere(); + return ppc64Opcode_undefined; +} +#endif // PPC port + +#ifndef PRODUCT +void MachNopNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + st->print("NOP \t// %d nops to pad for loops.", _count); +} +#endif + +void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *) const { + MacroAssembler _masm(&cbuf); + // _count contains the number of nops needed for padding. + for (int i = 0; i < _count; i++) { + __ nop(); + } +} + +uint MachNopNode::size(PhaseRegAlloc *ra_) const { + return _count * 4; +} + +#ifndef PRODUCT +void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + int reg = ra_->get_reg_first(this); + st->print("ADDI %s, SP, %d \t// box node", Matcher::regName[reg], offset); +} +#endif + +void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + MacroAssembler _masm(&cbuf); + + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + int reg = ra_->get_encode(this); + + if (Assembler::is_simm(offset, 16)) { + __ addi(as_Register(reg), R1, offset); + } else { + ShouldNotReachHere(); + } +} + +uint BoxLockNode::size(PhaseRegAlloc *ra_) const { + // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_). + return 4; +} + +#ifndef PRODUCT +void MachUEPNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + st->print_cr("---- MachUEPNode ----"); + st->print_cr("..."); +} +#endif + +void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + // This is the unverified entry point. + MacroAssembler _masm(&cbuf); + + // Inline_cache contains a klass. + Register ic_klass = as_Register(Matcher::inline_cache_reg_encode()); + Register receiver_klass = R0; // tmp + + assert_different_registers(ic_klass, receiver_klass, R11_scratch1, R3_ARG1); + assert(R11_scratch1 == R11, "need prologue scratch register"); + + // Check for NULL argument if we don't have implicit null checks. + if (!ImplicitNullChecks || !os::zero_page_read_protected()) { + if (TrapBasedNullChecks) { + __ trap_null_check(R3_ARG1); + } else { + Label valid; + __ cmpdi(CCR0, R3_ARG1, 0); + __ bne_predict_taken(CCR0, valid); + // We have a null argument, branch to ic_miss_stub. + __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(), + relocInfo::runtime_call_type); + __ bind(valid); + } + } + // Assume argument is not NULL, load klass from receiver. + __ load_klass(receiver_klass, R3_ARG1); + + if (TrapBasedICMissChecks) { + __ trap_ic_miss_check(receiver_klass, ic_klass); + } else { + Label valid; + __ cmpd(CCR0, receiver_klass, ic_klass); + __ beq_predict_taken(CCR0, valid); + // We have an unexpected klass, branch to ic_miss_stub. + __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(), + relocInfo::runtime_call_type); + __ bind(valid); + } + + // Argument is valid and klass is as expected, continue. +} + +#if 0 // TODO: PPC port +// Optimize UEP code on z (save a load_const() call in main path). +int MachUEPNode::ep_offset() { + return 0; +} +#endif + +uint MachUEPNode::size(PhaseRegAlloc *ra_) const { + // Variable size. Determine dynamically. + return MachNode::size(ra_); +} + +//============================================================================= + +uint size_exception_handler() { + // The exception_handler is a b64_patchable. + return MacroAssembler::b64_patchable_size; +} + +uint size_deopt_handler() { + // The deopt_handler is a bl64_patchable. + return MacroAssembler::bl64_patchable_size; +} + +int emit_exception_handler(CodeBuffer &cbuf) { + MacroAssembler _masm(&cbuf); + + address base = __ start_a_stub(size_exception_handler()); + if (base == NULL) return 0; // CodeBuffer::expand failed + + int offset = __ offset(); + __ b64_patchable((address)OptoRuntime::exception_blob()->content_begin(), + relocInfo::runtime_call_type); + assert(__ offset() - offset == (int)size_exception_handler(), "must be fixed size"); + __ end_a_stub(); + + return offset; +} + +// The deopt_handler is like the exception handler, but it calls to +// the deoptimization blob instead of jumping to the exception blob. +int emit_deopt_handler(CodeBuffer& cbuf) { + MacroAssembler _masm(&cbuf); + + address base = __ start_a_stub(size_deopt_handler()); + if (base == NULL) return 0; // CodeBuffer::expand failed + + int offset = __ offset(); + __ bl64_patchable((address)SharedRuntime::deopt_blob()->unpack(), + relocInfo::runtime_call_type); + assert(__ offset() - offset == (int) size_deopt_handler(), "must be fixed size"); + __ end_a_stub(); + + return offset; +} + +//============================================================================= + +// Use a frame slots bias for frameless methods if accessing the stack. +static int frame_slots_bias(int reg_enc, PhaseRegAlloc* ra_) { + if (as_Register(reg_enc) == R1_SP) { + return 0; // TODO: PPC port ra_->C->frame_slots_sp_bias_in_bytes(); + } + return 0; +} + +const bool Matcher::match_rule_supported(int opcode) { + if (!has_match_rule(opcode)) + return false; + + switch (opcode) { + case Op_CountLeadingZerosI: + case Op_CountLeadingZerosL: + case Op_CountTrailingZerosI: + case Op_CountTrailingZerosL: + if (!UseCountLeadingZerosInstructionsPPC64) + return false; + break; + + case Op_PopCountI: + case Op_PopCountL: + return (UsePopCountInstruction && VM_Version::has_popcntw()); + + case Op_StrComp: + return SpecialStringCompareTo; + case Op_StrEquals: + return SpecialStringEquals; + case Op_StrIndexOf: + return SpecialStringIndexOf; + } + + return true; // Per default match rules are supported. +} + +int Matcher::regnum_to_fpu_offset(int regnum) { + // No user for this method? + Unimplemented(); + return 999; +} + +const bool Matcher::convL2FSupported(void) { + // fcfids can do the conversion (>= Power7). + // fcfid + frsp showed rounding problem when result should be 0x3f800001. + return VM_Version::has_fcfids(); // False means that conversion is done by runtime call. +} + +// Vector width in bytes. +const int Matcher::vector_width_in_bytes(BasicType bt) { + assert(MaxVectorSize == 8, ""); + return 8; +} + +// Vector ideal reg. +const int Matcher::vector_ideal_reg(int size) { + assert(MaxVectorSize == 8 && size == 8, ""); + return Op_RegL; +} + +const int Matcher::vector_shift_count_ideal_reg(int size) { + fatal("vector shift is not supported"); + return Node::NotAMachineReg; +} + +// Limits on vector size (number of elements) loaded into vector. +const int Matcher::max_vector_size(const BasicType bt) { + assert(is_java_primitive(bt), "only primitive type vectors"); + return vector_width_in_bytes(bt)/type2aelembytes(bt); +} + +const int Matcher::min_vector_size(const BasicType bt) { + return max_vector_size(bt); // Same as max. +} + +// PPC doesn't support misaligned vectors store/load. +const bool Matcher::misaligned_vectors_ok() { + return false; +} + +// RETURNS: whether this branch offset is short enough that a short +// branch can be used. +// +// If the platform does not provide any short branch variants, then +// this method should return `false' for offset 0. +// +// `Compile::Fill_buffer' will decide on basis of this information +// whether to do the pass `Compile::Shorten_branches' at all. +// +// And `Compile::Shorten_branches' will decide on basis of this +// information whether to replace particular branch sites by short +// ones. +bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { + // Is the offset within the range of a ppc64 pc relative branch? + bool b; + + const int safety_zone = 3 * BytesPerInstWord; + b = Assembler::is_simm((offset<0 ? offset-safety_zone : offset+safety_zone), + 29 - 16 + 1 + 2); + return b; +} + +const bool Matcher::isSimpleConstant64(jlong value) { + // Probably always true, even if a temp register is required. + return true; +} +/* TODO: PPC port +// Make a new machine dependent decode node (with its operands). +MachTypeNode *Matcher::make_decode_node(Compile *C) { + assert(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0, + "This method is only implemented for unscaled cOops mode so far"); + MachTypeNode *decode = new (C) decodeN_unscaledNode(); + decode->set_opnd_array(0, new (C) iRegPdstOper()); + decode->set_opnd_array(1, new (C) iRegNsrcOper()); + return decode; +} +*/ +// Threshold size for cleararray. +const int Matcher::init_array_short_size = 8 * BytesPerLong; + +// false => size gets scaled to BytesPerLong, ok. +const bool Matcher::init_array_count_is_in_bytes = false; + +// Use conditional move (CMOVL) on Power7. +const int Matcher::long_cmove_cost() { return 0; } // this only makes long cmoves more expensive than int cmoves + +// Suppress CMOVF. Conditional move available (sort of) on PPC64 only from P7 onwards. Not exploited yet. +// fsel doesn't accept a condition register as input, so this would be slightly different. +const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; } + +// Power6 requires postalloc expand (see block.cpp for description of postalloc expand). +const bool Matcher::require_postalloc_expand = true; + +// Should the Matcher clone shifts on addressing modes, expecting them to +// be subsumed into complex addressing expressions or compute them into +// registers? True for Intel but false for most RISCs. +const bool Matcher::clone_shift_expressions = false; + +// Do we need to mask the count passed to shift instructions or does +// the cpu only look at the lower 5/6 bits anyway? +// Off, as masks are generated in expand rules where required. +// Constant shift counts are handled in Ideal phase. +const bool Matcher::need_masked_shift_count = false; + +// This affects two different things: +// - how Decode nodes are matched +// - how ImplicitNullCheck opportunities are recognized +// If true, the matcher will try to remove all Decodes and match them +// (as operands) into nodes. NullChecks are not prepared to deal with +// Decodes by final_graph_reshaping(). +// If false, final_graph_reshaping() forces the decode behind the Cmp +// for a NullCheck. The matcher matches the Decode node into a register. +// Implicit_null_check optimization moves the Decode along with the +// memory operation back up before the NullCheck. +bool Matcher::narrow_oop_use_complex_address() { + // TODO: PPC port if (MatchDecodeNodes) return true; + return false; +} + +bool Matcher::narrow_klass_use_complex_address() { + NOT_LP64(ShouldNotCallThis()); + assert(UseCompressedClassPointers, "only for compressed klass code"); + // TODO: PPC port if (MatchDecodeNodes) return true; + return false; +} + +// Is it better to copy float constants, or load them directly from memory? +// Intel can load a float constant from a direct address, requiring no +// extra registers. Most RISCs will have to materialize an address into a +// register first, so they would do better to copy the constant from stack. +const bool Matcher::rematerialize_float_constants = false; + +// If CPU can load and store mis-aligned doubles directly then no fixup is +// needed. Else we split the double into 2 integer pieces and move it +// piece-by-piece. Only happens when passing doubles into C code as the +// Java calling convention forces doubles to be aligned. +const bool Matcher::misaligned_doubles_ok = true; + +void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { + Unimplemented(); +} + +// Advertise here if the CPU requires explicit rounding operations +// to implement the UseStrictFP mode. +const bool Matcher::strict_fp_requires_explicit_rounding = false; + +// Do floats take an entire double register or just half? +// +// A float occupies a ppc64 double register. For the allocator, a +// ppc64 double register appears as a pair of float registers. +bool Matcher::float_in_double() { return true; } + +// Do ints take an entire long register or just half? +// The relevant question is how the int is callee-saved: +// the whole long is written but de-opt'ing will have to extract +// the relevant 32 bits. +const bool Matcher::int_in_long = true; + +// Constants for c2c and c calling conventions. + +const MachRegisterNumbers iarg_reg[8] = { + R3_num, R4_num, R5_num, R6_num, + R7_num, R8_num, R9_num, R10_num +}; + +const MachRegisterNumbers farg_reg[13] = { + F1_num, F2_num, F3_num, F4_num, + F5_num, F6_num, F7_num, F8_num, + F9_num, F10_num, F11_num, F12_num, + F13_num +}; + +const int num_iarg_registers = sizeof(iarg_reg) / sizeof(iarg_reg[0]); + +const int num_farg_registers = sizeof(farg_reg) / sizeof(farg_reg[0]); + +// Return whether or not this register is ever used as an argument. This +// function is used on startup to build the trampoline stubs in generateOptoStub. +// Registers not mentioned will be killed by the VM call in the trampoline, and +// arguments in those registers not be available to the callee. +bool Matcher::can_be_java_arg(int reg) { + // We return true for all registers contained in iarg_reg[] and + // farg_reg[] and their virtual halves. + // We must include the virtual halves in order to get STDs and LDs + // instead of STWs and LWs in the trampoline stubs. + + if ( reg == R3_num || reg == R3_H_num + || reg == R4_num || reg == R4_H_num + || reg == R5_num || reg == R5_H_num + || reg == R6_num || reg == R6_H_num + || reg == R7_num || reg == R7_H_num + || reg == R8_num || reg == R8_H_num + || reg == R9_num || reg == R9_H_num + || reg == R10_num || reg == R10_H_num) + return true; + + if ( reg == F1_num || reg == F1_H_num + || reg == F2_num || reg == F2_H_num + || reg == F3_num || reg == F3_H_num + || reg == F4_num || reg == F4_H_num + || reg == F5_num || reg == F5_H_num + || reg == F6_num || reg == F6_H_num + || reg == F7_num || reg == F7_H_num + || reg == F8_num || reg == F8_H_num + || reg == F9_num || reg == F9_H_num + || reg == F10_num || reg == F10_H_num + || reg == F11_num || reg == F11_H_num + || reg == F12_num || reg == F12_H_num + || reg == F13_num || reg == F13_H_num) + return true; + + return false; +} + +bool Matcher::is_spillable_arg(int reg) { + return can_be_java_arg(reg); +} + +bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) { + return false; +} + +// Register for DIVI projection of divmodI. +RegMask Matcher::divI_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for MODI projection of divmodI. +RegMask Matcher::modI_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for DIVL projection of divmodL. +RegMask Matcher::divL_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for MODL projection of divmodL. +RegMask Matcher::modL_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +const RegMask Matcher::method_handle_invoke_SP_save_mask() { + return RegMask(); +} + +const RegMask Matcher::mathExactI_result_proj_mask() { + return RARG4_BITS64_REG_mask(); +} + +const RegMask Matcher::mathExactL_result_proj_mask() { + return RARG4_BITS64_REG_mask(); +} + +const RegMask Matcher::mathExactI_flags_proj_mask() { + return INT_FLAGS_mask(); +} + +%} + +//----------ENCODING BLOCK----------------------------------------------------- +// This block specifies the encoding classes used by the compiler to output +// byte streams. Encoding classes are parameterized macros used by +// Machine Instruction Nodes in order to generate the bit encoding of the +// instruction. Operands specify their base encoding interface with the +// interface keyword. There are currently supported four interfaces, +// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an +// operand to generate a function which returns its register number when +// queried. CONST_INTER causes an operand to generate a function which +// returns the value of the constant when queried. MEMORY_INTER causes an +// operand to generate four functions which return the Base Register, the +// Index Register, the Scale Value, and the Offset Value of the operand when +// queried. COND_INTER causes an operand to generate six functions which +// return the encoding code (ie - encoding bits for the instruction) +// associated with each basic boolean condition for a conditional instruction. +// +// Instructions specify two basic values for encoding. Again, a function +// is available to check if the constant displacement is an oop. They use the +// ins_encode keyword to specify their encoding classes (which must be +// a sequence of enc_class names, and their parameters, specified in +// the encoding block), and they use the +// opcode keyword to specify, in order, their primary, secondary, and +// tertiary opcode. Only the opcode sections which a particular instruction +// needs for encoding need to be specified. +encode %{ + enc_class enc_unimplemented %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + MacroAssembler _masm(&cbuf); + __ unimplemented("Unimplemented mach node encoding in AD file.", 13); + %} + + enc_class enc_untested %{ +#ifdef ASSERT + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + MacroAssembler _masm(&cbuf); + __ untested("Untested mach node encoding in AD file."); +#else + // TODO: PPC port $archOpcode(ppc64Opcode_none); +#endif + %} + + enc_class enc_lbz(iRegIdst dst, memory mem) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_lbz); + MacroAssembler _masm(&cbuf); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + __ lbz($dst$$Register, Idisp, $mem$$base$$Register); + %} + + // Load acquire. + enc_class enc_lbz_ac(iRegIdst dst, memory mem) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + MacroAssembler _masm(&cbuf); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + __ lbz($dst$$Register, Idisp, $mem$$base$$Register); + __ twi_0($dst$$Register); + __ isync(); + %} + + enc_class enc_lhz(iRegIdst dst, memory mem) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_lhz); + + MacroAssembler _masm(&cbuf); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + __ lhz($dst$$Register, Idisp, $mem$$base$$Register); + %} + + // Load acquire. + enc_class enc_lhz_ac(iRegIdst dst, memory mem) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + + MacroAssembler _masm(&cbuf); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + __ lhz($dst$$Register, Idisp, $mem$$base$$Register); + __ twi_0($dst$$Register); + __ isync(); + %} + + enc_class enc_lwz(iRegIdst dst, memory mem) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_lwz); + + MacroAssembler _masm(&cbuf); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + __ lwz($dst$$Register, Idisp, $mem$$base$$Register); + %} + + // Load acquire. + enc_class enc_lwz_ac(iRegIdst dst, memory mem) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + + MacroAssembler _masm(&cbuf); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + __ lwz($dst$$Register, Idisp, $mem$$base$$Register); + __ twi_0($dst$$Register); + __ isync(); + %} + + enc_class enc_ld(iRegLdst dst, memoryAlg4 mem) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_ld); + MacroAssembler _masm(&cbuf); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + // Operand 'ds' requires 4-alignment. + assert((Idisp & 0x3) == 0, "unaligned offset"); + __ ld($dst$$Register, Idisp, $mem$$base$$Register); + %} + + // Load acquire. + enc_class enc_ld_ac(iRegLdst dst, memoryAlg4 mem) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + MacroAssembler _masm(&cbuf); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + // Operand 'ds' requires 4-alignment. + assert((Idisp & 0x3) == 0, "unaligned offset"); + __ ld($dst$$Register, Idisp, $mem$$base$$Register); + __ twi_0($dst$$Register); + __ isync(); + %} + + enc_class enc_lfd(RegF dst, memory mem) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_lfd); + MacroAssembler _masm(&cbuf); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + __ lfd($dst$$FloatRegister, Idisp, $mem$$base$$Register); + %} + + enc_class enc_load_long_constL(iRegLdst dst, immL src, iRegLdst toc) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_ld); + + MacroAssembler _masm(&cbuf); + int toc_offset = 0; + + if (!ra_->C->in_scratch_emit_size()) { + address const_toc_addr; + // Create a non-oop constant, no relocation needed. + // If it is an IC, it has a virtual_call_Relocation. + const_toc_addr = __ long_constant((jlong)$src$$constant); + + // Get the constant's TOC offset. + toc_offset = __ offset_to_method_toc(const_toc_addr); + + // Keep the current instruction offset in mind. + ((loadConLNode*)this)->_cbuf_insts_offset = __ offset(); + } + + __ ld($dst$$Register, toc_offset, $toc$$Register); + %} + + enc_class enc_load_long_constL_hi(iRegLdst dst, iRegLdst toc, immL src) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addis); + + MacroAssembler _masm(&cbuf); + + if (!ra_->C->in_scratch_emit_size()) { + address const_toc_addr; + // Create a non-oop constant, no relocation needed. + // If it is an IC, it has a virtual_call_Relocation. + const_toc_addr = __ long_constant((jlong)$src$$constant); + + // Get the constant's TOC offset. + const int toc_offset = __ offset_to_method_toc(const_toc_addr); + // Store the toc offset of the constant. + ((loadConL_hiNode*)this)->_const_toc_offset = toc_offset; + + // Also keep the current instruction offset in mind. + ((loadConL_hiNode*)this)->_cbuf_insts_offset = __ offset(); + } + + __ addis($dst$$Register, $toc$$Register, MacroAssembler::largeoffset_si16_si16_hi(_const_toc_offset)); + %} + +%} // encode + +source %{ + +typedef struct { + loadConL_hiNode *_large_hi; + loadConL_loNode *_large_lo; + loadConLNode *_small; + MachNode *_last; +} loadConLNodesTuple; + +loadConLNodesTuple loadConLNodesTuple_create(Compile *C, PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc, + OptoReg::Name reg_second, OptoReg::Name reg_first) { + loadConLNodesTuple nodes; + + const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000; + if (large_constant_pool) { + // Create new nodes. + loadConL_hiNode *m1 = new (C) loadConL_hiNode(); + loadConL_loNode *m2 = new (C) loadConL_loNode(); + + // inputs for new nodes + m1->add_req(NULL, toc); + m2->add_req(NULL, m1); + + // operands for new nodes + m1->_opnds[0] = new (C) iRegLdstOper(); // dst + m1->_opnds[1] = immSrc; // src + m1->_opnds[2] = new (C) iRegPdstOper(); // toc + m2->_opnds[0] = new (C) iRegLdstOper(); // dst + m2->_opnds[1] = immSrc; // src + m2->_opnds[2] = new (C) iRegLdstOper(); // base + + // Initialize ins_attrib TOC fields. + m1->_const_toc_offset = -1; + m2->_const_toc_offset_hi_node = m1; + + // Initialize ins_attrib instruction offset. + m1->_cbuf_insts_offset = -1; + + // register allocation for new nodes + ra_->set_pair(m1->_idx, reg_second, reg_first); + ra_->set_pair(m2->_idx, reg_second, reg_first); + + // Create result. + nodes._large_hi = m1; + nodes._large_lo = m2; + nodes._small = NULL; + nodes._last = nodes._large_lo; + assert(m2->bottom_type()->isa_long(), "must be long"); + } else { + loadConLNode *m2 = new (C) loadConLNode(); + + // inputs for new nodes + m2->add_req(NULL, toc); + + // operands for new nodes + m2->_opnds[0] = new (C) iRegLdstOper(); // dst + m2->_opnds[1] = immSrc; // src + m2->_opnds[2] = new (C) iRegPdstOper(); // toc + + // Initialize ins_attrib instruction offset. + m2->_cbuf_insts_offset = -1; + + // register allocation for new nodes + ra_->set_pair(m2->_idx, reg_second, reg_first); + + // Create result. + nodes._large_hi = NULL; + nodes._large_lo = NULL; + nodes._small = m2; + nodes._last = nodes._small; + assert(m2->bottom_type()->isa_long(), "must be long"); + } + + return nodes; +} + +%} // source + +encode %{ + // Postalloc expand emitter for loading a long constant from the method's TOC. + // Enc_class needed as consttanttablebase is not supported by postalloc + // expand. + enc_class postalloc_expand_load_long_constant(iRegLdst dst, immL src, iRegLdst toc) %{ + // Create new nodes. + loadConLNodesTuple loadConLNodes = + loadConLNodesTuple_create(C, ra_, n_toc, op_src, + ra_->get_reg_second(this), ra_->get_reg_first(this)); + + // Push new nodes. + if (loadConLNodes._large_hi) nodes->push(loadConLNodes._large_hi); + if (loadConLNodes._last) nodes->push(loadConLNodes._last); + + // some asserts + assert(nodes->length() >= 1, "must have created at least 1 node"); + assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long"); + %} + + enc_class enc_load_long_constP(iRegLdst dst, immP src, iRegLdst toc) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_ld); + + MacroAssembler _masm(&cbuf); + int toc_offset = 0; + + if (!ra_->C->in_scratch_emit_size()) { + intptr_t val = $src$$constant; + relocInfo::relocType constant_reloc = $src->constant_reloc(); // src + address const_toc_addr; + if (constant_reloc == relocInfo::oop_type) { + // Create an oop constant and a corresponding relocation. + AddressLiteral a = __ allocate_oop_address((jobject)val); + const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none); + __ relocate(a.rspec()); + } else if (constant_reloc == relocInfo::metadata_type) { + AddressLiteral a = __ allocate_metadata_address((Metadata *)val); + const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none); + __ relocate(a.rspec()); + } else { + // Create a non-oop constant, no relocation needed. + const_toc_addr = __ long_constant((jlong)$src$$constant); + } + + // Get the constant's TOC offset. + toc_offset = __ offset_to_method_toc(const_toc_addr); + } + + __ ld($dst$$Register, toc_offset, $toc$$Register); + %} + + enc_class enc_load_long_constP_hi(iRegLdst dst, immP src, iRegLdst toc) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addis); + + MacroAssembler _masm(&cbuf); + if (!ra_->C->in_scratch_emit_size()) { + intptr_t val = $src$$constant; + relocInfo::relocType constant_reloc = $src->constant_reloc(); // src + address const_toc_addr; + if (constant_reloc == relocInfo::oop_type) { + // Create an oop constant and a corresponding relocation. + AddressLiteral a = __ allocate_oop_address((jobject)val); + const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none); + __ relocate(a.rspec()); + } else if (constant_reloc == relocInfo::metadata_type) { + AddressLiteral a = __ allocate_metadata_address((Metadata *)val); + const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none); + __ relocate(a.rspec()); + } else { // non-oop pointers, e.g. card mark base, heap top + // Create a non-oop constant, no relocation needed. + const_toc_addr = __ long_constant((jlong)$src$$constant); + } + + // Get the constant's TOC offset. + const int toc_offset = __ offset_to_method_toc(const_toc_addr); + // Store the toc offset of the constant. + ((loadConP_hiNode*)this)->_const_toc_offset = toc_offset; + } + + __ addis($dst$$Register, $toc$$Register, MacroAssembler::largeoffset_si16_si16_hi(_const_toc_offset)); + %} + + // Postalloc expand emitter for loading a ptr constant from the method's TOC. + // Enc_class needed as consttanttablebase is not supported by postalloc + // expand. + enc_class postalloc_expand_load_ptr_constant(iRegPdst dst, immP src, iRegLdst toc) %{ + const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000; + if (large_constant_pool) { + // Create new nodes. + loadConP_hiNode *m1 = new (C) loadConP_hiNode(); + loadConP_loNode *m2 = new (C) loadConP_loNode(); + + // inputs for new nodes + m1->add_req(NULL, n_toc); + m2->add_req(NULL, m1); + + // operands for new nodes + m1->_opnds[0] = new (C) iRegPdstOper(); // dst + m1->_opnds[1] = op_src; // src + m1->_opnds[2] = new (C) iRegPdstOper(); // toc + m2->_opnds[0] = new (C) iRegPdstOper(); // dst + m2->_opnds[1] = op_src; // src + m2->_opnds[2] = new (C) iRegLdstOper(); // base + + // Initialize ins_attrib TOC fields. + m1->_const_toc_offset = -1; + m2->_const_toc_offset_hi_node = m1; + + // Register allocation for new nodes. + ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + + nodes->push(m1); + nodes->push(m2); + assert(m2->bottom_type()->isa_ptr(), "must be ptr"); + } else { + loadConPNode *m2 = new (C) loadConPNode(); + + // inputs for new nodes + m2->add_req(NULL, n_toc); + + // operands for new nodes + m2->_opnds[0] = new (C) iRegPdstOper(); // dst + m2->_opnds[1] = op_src; // src + m2->_opnds[2] = new (C) iRegPdstOper(); // toc + + // Register allocation for new nodes. + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + + nodes->push(m2); + assert(m2->bottom_type()->isa_ptr(), "must be ptr"); + } + %} + + // Enc_class needed as consttanttablebase is not supported by postalloc + // expand. + enc_class postalloc_expand_load_float_constant(regF dst, immF src, iRegLdst toc) %{ + bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000; + + MachNode *m2; + if (large_constant_pool) { + m2 = new (C) loadConFCompNode(); + } else { + m2 = new (C) loadConFNode(); + } + // inputs for new nodes + m2->add_req(NULL, n_toc); + + // operands for new nodes + m2->_opnds[0] = op_dst; + m2->_opnds[1] = op_src; + m2->_opnds[2] = new (C) iRegPdstOper(); // constanttablebase + + // register allocation for new nodes + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + nodes->push(m2); + %} + + // Enc_class needed as consttanttablebase is not supported by postalloc + // expand. + enc_class postalloc_expand_load_double_constant(regD dst, immD src, iRegLdst toc) %{ + bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000; + + MachNode *m2; + if (large_constant_pool) { + m2 = new (C) loadConDCompNode(); + } else { + m2 = new (C) loadConDNode(); + } + // inputs for new nodes + m2->add_req(NULL, n_toc); + + // operands for new nodes + m2->_opnds[0] = op_dst; + m2->_opnds[1] = op_src; + m2->_opnds[2] = new (C) iRegPdstOper(); // constanttablebase + + // register allocation for new nodes + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + nodes->push(m2); + %} + + enc_class enc_stw(iRegIsrc src, memory mem) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_stw); + MacroAssembler _masm(&cbuf); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + __ stw($src$$Register, Idisp, $mem$$base$$Register); + %} + + enc_class enc_std(iRegIsrc src, memoryAlg4 mem) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_std); + MacroAssembler _masm(&cbuf); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + // Operand 'ds' requires 4-alignment. + assert((Idisp & 0x3) == 0, "unaligned offset"); + __ std($src$$Register, Idisp, $mem$$base$$Register); + %} + + enc_class enc_stfs(RegF src, memory mem) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_stfs); + MacroAssembler _masm(&cbuf); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + __ stfs($src$$FloatRegister, Idisp, $mem$$base$$Register); + %} + + enc_class enc_stfd(RegF src, memory mem) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_stfd); + MacroAssembler _masm(&cbuf); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + __ stfd($src$$FloatRegister, Idisp, $mem$$base$$Register); + %} + + // Use release_store for card-marking to ensure that previous + // oop-stores are visible before the card-mark change. + enc_class enc_cms_card_mark(memory mem, iRegLdst releaseFieldAddr) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // FIXME: Implement this as a cmove and use a fixed condition code + // register which is written on every transition to compiled code, + // e.g. in call-stub and when returning from runtime stubs. + // + // Proposed code sequence for the cmove implementation: + // + // Label skip_release; + // __ beq(CCRfixed, skip_release); + // __ release(); + // __ bind(skip_release); + // __ stb(card mark); + + MacroAssembler _masm(&cbuf); + Label skip_storestore; + +#if 0 // TODO: PPC port + // Check CMSCollectorCardTableModRefBSExt::_requires_release and do the + // StoreStore barrier conditionally. + __ lwz(R0, 0, $releaseFieldAddr$$Register); + __ cmpwi(CCR0, R0, 0); + __ beq_predict_taken(CCR0, skip_release); +#endif + __ li(R0, 0); + __ membar(Assembler::StoreStore); +#if 0 // TODO: PPC port + __ bind(skip_storestore); +#endif + + // Do the store. + if ($mem$$index == 0) { + __ stb(R0, $mem$$disp, $mem$$base$$Register); + } else { + assert(0 == $mem$$disp, "no displacement possible with indexed load/stores on ppc"); + __ stbx(R0, $mem$$base$$Register, $mem$$index$$Register); + } + %} + + enc_class postalloc_expand_encode_oop(iRegNdst dst, iRegPdst src, flagsReg crx) %{ + + if (VM_Version::has_isel()) { + // use isel instruction with Power 7 + cmpP_reg_imm16Node *n_compare = new (C) cmpP_reg_imm16Node(); + encodeP_subNode *n_sub_base = new (C) encodeP_subNode(); + encodeP_shiftNode *n_shift = new (C) encodeP_shiftNode(); + cond_set_0_oopNode *n_cond_set = new (C) cond_set_0_oopNode(); + + n_compare->add_req(n_region, n_src); + n_compare->_opnds[0] = op_crx; + n_compare->_opnds[1] = op_src; + n_compare->_opnds[2] = new (C) immL16Oper(0); + + n_sub_base->add_req(n_region, n_src); + n_sub_base->_opnds[0] = op_dst; + n_sub_base->_opnds[1] = op_src; + n_sub_base->_bottom_type = _bottom_type; + + n_shift->add_req(n_region, n_sub_base); + n_shift->_opnds[0] = op_dst; + n_shift->_opnds[1] = op_dst; + n_shift->_bottom_type = _bottom_type; + + n_cond_set->add_req(n_region, n_compare, n_shift); + n_cond_set->_opnds[0] = op_dst; + n_cond_set->_opnds[1] = op_crx; + n_cond_set->_opnds[2] = op_dst; + n_cond_set->_bottom_type = _bottom_type; + + ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx)); + ra_->set_pair(n_sub_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + + nodes->push(n_compare); + nodes->push(n_sub_base); + nodes->push(n_shift); + nodes->push(n_cond_set); + + } else { + // before Power 7 + moveRegNode *n_move = new (C) moveRegNode(); + cmpP_reg_imm16Node *n_compare = new (C) cmpP_reg_imm16Node(); + encodeP_shiftNode *n_shift = new (C) encodeP_shiftNode(); + cond_sub_baseNode *n_sub_base = new (C) cond_sub_baseNode(); + + n_move->add_req(n_region, n_src); + n_move->_opnds[0] = op_dst; + n_move->_opnds[1] = op_src; + ra_->set_oop(n_move, true); // Until here, 'n_move' still produces an oop. + + n_compare->add_req(n_region, n_src); + n_compare->add_prec(n_move); + + n_compare->_opnds[0] = op_crx; + n_compare->_opnds[1] = op_src; + n_compare->_opnds[2] = new (C) immL16Oper(0); + + n_sub_base->add_req(n_region, n_compare, n_src); + n_sub_base->_opnds[0] = op_dst; + n_sub_base->_opnds[1] = op_crx; + n_sub_base->_opnds[2] = op_src; + n_sub_base->_bottom_type = _bottom_type; + + n_shift->add_req(n_region, n_sub_base); + n_shift->_opnds[0] = op_dst; + n_shift->_opnds[1] = op_dst; + n_shift->_bottom_type = _bottom_type; + + ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx)); + ra_->set_pair(n_sub_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + ra_->set_pair(n_move->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + + nodes->push(n_move); + nodes->push(n_compare); + nodes->push(n_sub_base); + nodes->push(n_shift); + } + + assert(!(ra_->is_oop(this)), "sanity"); // This is not supposed to be GC'ed. + %} + + enc_class postalloc_expand_encode_oop_not_null(iRegNdst dst, iRegPdst src) %{ + + encodeP_subNode *n1 = new (C) encodeP_subNode(); + n1->add_req(n_region, n_src); + n1->_opnds[0] = op_dst; + n1->_opnds[1] = op_src; + n1->_bottom_type = _bottom_type; + + encodeP_shiftNode *n2 = new (C) encodeP_shiftNode(); + n2->add_req(n_region, n1); + n2->_opnds[0] = op_dst; + n2->_opnds[1] = op_dst; + n2->_bottom_type = _bottom_type; + ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + + nodes->push(n1); + nodes->push(n2); + assert(!(ra_->is_oop(this)), "sanity"); // This is not supposed to be GC'ed. + %} + + enc_class postalloc_expand_decode_oop(iRegPdst dst, iRegNsrc src, flagsReg crx) %{ + decodeN_shiftNode *n_shift = new (C) decodeN_shiftNode(); + cmpN_reg_imm0Node *n_compare = new (C) cmpN_reg_imm0Node(); + + n_compare->add_req(n_region, n_src); + n_compare->_opnds[0] = op_crx; + n_compare->_opnds[1] = op_src; + n_compare->_opnds[2] = new (C) immN_0Oper(TypeNarrowOop::NULL_PTR); + + n_shift->add_req(n_region, n_src); + n_shift->_opnds[0] = op_dst; + n_shift->_opnds[1] = op_src; + n_shift->_bottom_type = _bottom_type; + + if (VM_Version::has_isel()) { + // use isel instruction with Power 7 + + decodeN_addNode *n_add_base = new (C) decodeN_addNode(); + n_add_base->add_req(n_region, n_shift); + n_add_base->_opnds[0] = op_dst; + n_add_base->_opnds[1] = op_dst; + n_add_base->_bottom_type = _bottom_type; + + cond_set_0_ptrNode *n_cond_set = new (C) cond_set_0_ptrNode(); + n_cond_set->add_req(n_region, n_compare, n_add_base); + n_cond_set->_opnds[0] = op_dst; + n_cond_set->_opnds[1] = op_crx; + n_cond_set->_opnds[2] = op_dst; + n_cond_set->_bottom_type = _bottom_type; + + assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!"); + ra_->set_oop(n_cond_set, true); + + ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx)); + ra_->set_pair(n_add_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + + nodes->push(n_compare); + nodes->push(n_shift); + nodes->push(n_add_base); + nodes->push(n_cond_set); + + } else { + // before Power 7 + cond_add_baseNode *n_add_base = new (C) cond_add_baseNode(); + + n_add_base->add_req(n_region, n_compare, n_shift); + n_add_base->_opnds[0] = op_dst; + n_add_base->_opnds[1] = op_crx; + n_add_base->_opnds[2] = op_dst; + n_add_base->_bottom_type = _bottom_type; + + assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!"); + ra_->set_oop(n_add_base, true); + + ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx)); + ra_->set_pair(n_add_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + + nodes->push(n_compare); + nodes->push(n_shift); + nodes->push(n_add_base); + } + %} + + enc_class postalloc_expand_decode_oop_not_null(iRegPdst dst, iRegNsrc src) %{ + decodeN_shiftNode *n1 = new (C) decodeN_shiftNode(); + n1->add_req(n_region, n_src); + n1->_opnds[0] = op_dst; + n1->_opnds[1] = op_src; + n1->_bottom_type = _bottom_type; + + decodeN_addNode *n2 = new (C) decodeN_addNode(); + n2->add_req(n_region, n1); + n2->_opnds[0] = op_dst; + n2->_opnds[1] = op_dst; + n2->_bottom_type = _bottom_type; + ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + + assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!"); + ra_->set_oop(n2, true); + + nodes->push(n1); + nodes->push(n2); + %} + + enc_class enc_cmove_reg(iRegIdst dst, flagsReg crx, iRegIsrc src, cmpOp cmp) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cmove); + + MacroAssembler _masm(&cbuf); + int cc = $cmp$$cmpcode; + int flags_reg = $crx$$reg; + Label done; + assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding"); + // Branch if not (cmp crx). + __ bc(cc_to_inverse_boint(cc), cc_to_biint(cc, flags_reg), done); + __ mr($dst$$Register, $src$$Register); + // TODO PPC port __ endgroup_if_needed(_size == 12); + __ bind(done); + %} + + enc_class enc_cmove_imm(iRegIdst dst, flagsReg crx, immI16 src, cmpOp cmp) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cmove); + + MacroAssembler _masm(&cbuf); + Label done; + assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding"); + // Branch if not (cmp crx). + __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done); + __ li($dst$$Register, $src$$constant); + // TODO PPC port __ endgroup_if_needed(_size == 12); + __ bind(done); + %} + + // New atomics. + enc_class enc_GetAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + + MacroAssembler _masm(&cbuf); + Register Rtmp = R0; + Register Rres = $res$$Register; + Register Rsrc = $src$$Register; + Register Rptr = $mem_ptr$$Register; + bool RegCollision = (Rres == Rsrc) || (Rres == Rptr); + Register Rold = RegCollision ? Rtmp : Rres; + + Label Lretry; + __ bind(Lretry); + __ lwarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update()); + __ add(Rtmp, Rsrc, Rold); + __ stwcx_(Rtmp, Rptr); + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { + __ bne_predict_not_taken(CCR0, Lretry); + } else { + __ bne( CCR0, Lretry); + } + if (RegCollision) __ subf(Rres, Rsrc, Rtmp); + __ fence(); + %} + + enc_class enc_GetAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + + MacroAssembler _masm(&cbuf); + Register Rtmp = R0; + Register Rres = $res$$Register; + Register Rsrc = $src$$Register; + Register Rptr = $mem_ptr$$Register; + bool RegCollision = (Rres == Rsrc) || (Rres == Rptr); + Register Rold = RegCollision ? Rtmp : Rres; + + Label Lretry; + __ bind(Lretry); + __ ldarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update()); + __ add(Rtmp, Rsrc, Rold); + __ stdcx_(Rtmp, Rptr); + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { + __ bne_predict_not_taken(CCR0, Lretry); + } else { + __ bne( CCR0, Lretry); + } + if (RegCollision) __ subf(Rres, Rsrc, Rtmp); + __ fence(); + %} + + enc_class enc_GetAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + + MacroAssembler _masm(&cbuf); + Register Rtmp = R0; + Register Rres = $res$$Register; + Register Rsrc = $src$$Register; + Register Rptr = $mem_ptr$$Register; + bool RegCollision = (Rres == Rsrc) || (Rres == Rptr); + Register Rold = RegCollision ? Rtmp : Rres; + + Label Lretry; + __ bind(Lretry); + __ lwarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update()); + __ stwcx_(Rsrc, Rptr); + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { + __ bne_predict_not_taken(CCR0, Lretry); + } else { + __ bne( CCR0, Lretry); + } + if (RegCollision) __ mr(Rres, Rtmp); + __ fence(); + %} + + enc_class enc_GetAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + + MacroAssembler _masm(&cbuf); + Register Rtmp = R0; + Register Rres = $res$$Register; + Register Rsrc = $src$$Register; + Register Rptr = $mem_ptr$$Register; + bool RegCollision = (Rres == Rsrc) || (Rres == Rptr); + Register Rold = RegCollision ? Rtmp : Rres; + + Label Lretry; + __ bind(Lretry); + __ ldarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update()); + __ stdcx_(Rsrc, Rptr); + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { + __ bne_predict_not_taken(CCR0, Lretry); + } else { + __ bne( CCR0, Lretry); + } + if (RegCollision) __ mr(Rres, Rtmp); + __ fence(); + %} + + // This enc_class is needed so that scheduler gets proper + // input mapping for latency computation. + enc_class enc_andc(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_andc); + MacroAssembler _masm(&cbuf); + __ andc($dst$$Register, $src1$$Register, $src2$$Register); + %} + + enc_class enc_convI2B_regI__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + + MacroAssembler _masm(&cbuf); + + Label done; + __ cmpwi($crx$$CondRegister, $src$$Register, 0); + __ li($dst$$Register, $zero$$constant); + __ beq($crx$$CondRegister, done); + __ li($dst$$Register, $notzero$$constant); + __ bind(done); + %} + + enc_class enc_convP2B_regP__cmove(iRegIdst dst, iRegPsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + + MacroAssembler _masm(&cbuf); + + Label done; + __ cmpdi($crx$$CondRegister, $src$$Register, 0); + __ li($dst$$Register, $zero$$constant); + __ beq($crx$$CondRegister, done); + __ li($dst$$Register, $notzero$$constant); + __ bind(done); + %} + + enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsReg crx, stackSlotL mem ) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cmove); + + MacroAssembler _masm(&cbuf); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + Label done; + __ bso($crx$$CondRegister, done); + __ ld($dst$$Register, Idisp, $mem$$base$$Register); + // TODO PPC port __ endgroup_if_needed(_size == 12); + __ bind(done); + %} + + enc_class enc_bc(flagsReg crx, cmpOp cmp, Label lbl) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_bc); + + MacroAssembler _masm(&cbuf); + Label d; // dummy + __ bind(d); + Label* p = ($lbl$$label); + // `p' is `NULL' when this encoding class is used only to + // determine the size of the encoded instruction. + Label& l = (NULL == p)? d : *(p); + int cc = $cmp$$cmpcode; + int flags_reg = $crx$$reg; + assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding"); + int bhint = Assembler::bhintNoHint; + + if (UseStaticBranchPredictionForUncommonPathsPPC64) { + if (_prob <= PROB_NEVER) { + bhint = Assembler::bhintIsNotTaken; + } else if (_prob >= PROB_ALWAYS) { + bhint = Assembler::bhintIsTaken; + } + } + + __ bc(Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)), + cc_to_biint(cc, flags_reg), + l); + %} + + enc_class enc_bc_far(flagsReg crx, cmpOp cmp, Label lbl) %{ + // The scheduler doesn't know about branch shortening, so we set the opcode + // to ppc64Opcode_bc in order to hide this detail from the scheduler. + // TODO: PPC port $archOpcode(ppc64Opcode_bc); + + MacroAssembler _masm(&cbuf); + Label d; // dummy + __ bind(d); + Label* p = ($lbl$$label); + // `p' is `NULL' when this encoding class is used only to + // determine the size of the encoded instruction. + Label& l = (NULL == p)? d : *(p); + int cc = $cmp$$cmpcode; + int flags_reg = $crx$$reg; + int bhint = Assembler::bhintNoHint; + + if (UseStaticBranchPredictionForUncommonPathsPPC64) { + if (_prob <= PROB_NEVER) { + bhint = Assembler::bhintIsNotTaken; + } else if (_prob >= PROB_ALWAYS) { + bhint = Assembler::bhintIsTaken; + } + } + + // Tell the conditional far branch to optimize itself when being relocated. + __ bc_far(Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)), + cc_to_biint(cc, flags_reg), + l, + MacroAssembler::bc_far_optimize_on_relocate); + %} + + // Branch used with Power6 scheduling (can be shortened without changing the node). + enc_class enc_bc_short_far(flagsReg crx, cmpOp cmp, Label lbl) %{ + // The scheduler doesn't know about branch shortening, so we set the opcode + // to ppc64Opcode_bc in order to hide this detail from the scheduler. + // TODO: PPC port $archOpcode(ppc64Opcode_bc); + + MacroAssembler _masm(&cbuf); + Label d; // dummy + __ bind(d); + Label* p = ($lbl$$label); + // `p' is `NULL' when this encoding class is used only to + // determine the size of the encoded instruction. + Label& l = (NULL == p)? d : *(p); + int cc = $cmp$$cmpcode; + int flags_reg = $crx$$reg; + int bhint = Assembler::bhintNoHint; + + if (UseStaticBranchPredictionForUncommonPathsPPC64) { + if (_prob <= PROB_NEVER) { + bhint = Assembler::bhintIsNotTaken; + } else if (_prob >= PROB_ALWAYS) { + bhint = Assembler::bhintIsTaken; + } + } + +#if 0 // TODO: PPC port + if (_size == 8) { + // Tell the conditional far branch to optimize itself when being relocated. + __ bc_far(Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)), + cc_to_biint(cc, flags_reg), + l, + MacroAssembler::bc_far_optimize_on_relocate); + } else { + __ bc (Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)), + cc_to_biint(cc, flags_reg), + l); + } +#endif + Unimplemented(); + %} + + // Postalloc expand emitter for loading a replicatef float constant from + // the method's TOC. + // Enc_class needed as consttanttablebase is not supported by postalloc + // expand. + enc_class postalloc_expand_load_replF_constant(iRegLdst dst, immF src, iRegLdst toc) %{ + // Create new nodes. + + // Make an operand with the bit pattern to load as float. + immLOper *op_repl = new (C) immLOper((jlong)replicate_immF(op_src->constantF())); + + loadConLNodesTuple loadConLNodes = + loadConLNodesTuple_create(C, ra_, n_toc, op_repl, + ra_->get_reg_second(this), ra_->get_reg_first(this)); + + // Push new nodes. + if (loadConLNodes._large_hi) nodes->push(loadConLNodes._large_hi); + if (loadConLNodes._last) nodes->push(loadConLNodes._last); + + assert(nodes->length() >= 1, "must have created at least 1 node"); + assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long"); + %} + + // This enc_class is needed so that scheduler gets proper + // input mapping for latency computation. + enc_class enc_poll(immI dst, iRegLdst poll) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_ld); + // Fake operand dst needed for PPC scheduler. + assert($dst$$constant == 0x0, "dst must be 0x0"); + + MacroAssembler _masm(&cbuf); + // Mark the code position where the load from the safepoint + // polling page was emitted as relocInfo::poll_type. + __ relocate(relocInfo::poll_type); + __ load_from_polling_page($poll$$Register); + %} + + // A Java static call or a runtime call. + // + // Branch-and-link relative to a trampoline. + // The trampoline loads the target address and does a long branch to there. + // In case we call java, the trampoline branches to a interpreter_stub + // which loads the inline cache and the real call target from the constant pool. + // + // This basically looks like this: + // + // >>>> consts -+ -+ + // | |- offset1 + // [call target1] | <-+ + // [IC cache] |- offset2 + // [call target2] <--+ + // + // <<<< consts + // >>>> insts + // + // bl offset16 -+ -+ ??? // How many bits available? + // | | + // <<<< insts | | + // >>>> stubs | | + // | |- trampoline_stub_Reloc + // trampoline stub: | <-+ + // r2 = toc | + // r2 = [r2 + offset1] | // Load call target1 from const section + // mtctr r2 | + // bctr |- static_stub_Reloc + // comp_to_interp_stub: <---+ + // r1 = toc + // ICreg = [r1 + IC_offset] // Load IC from const section + // r1 = [r1 + offset2] // Load call target2 from const section + // mtctr r1 + // bctr + // + // <<<< stubs + // + // The call instruction in the code either + // - Branches directly to a compiled method if the offset is encodable in instruction. + // - Branches to the trampoline stub if the offset to the compiled method is not encodable. + // - Branches to the compiled_to_interp stub if the target is interpreted. + // + // Further there are three relocations from the loads to the constants in + // the constant section. + // + // Usage of r1 and r2 in the stubs allows to distinguish them. + enc_class enc_java_static_call(method meth) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_bl); + + MacroAssembler _masm(&cbuf); + address entry_point = (address)$meth$$method; + + if (!_method) { + // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. + emit_call_with_trampoline_stub(_masm, entry_point, relocInfo::runtime_call_type); + } else { + // Remember the offset not the address. + const int start_offset = __ offset(); + // The trampoline stub. + if (!Compile::current()->in_scratch_emit_size()) { + // No entry point given, use the current pc. + // Make sure branch fits into + if (entry_point == 0) entry_point = __ pc(); + + // Put the entry point as a constant into the constant pool. + const address entry_point_toc_addr = __ address_constant(entry_point, RelocationHolder::none); + const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr); + + // Emit the trampoline stub which will be related to the branch-and-link below. + emit_trampoline_stub(_masm, entry_point_toc_offset, start_offset); + __ relocate(_optimized_virtual ? + relocInfo::opt_virtual_call_type : relocInfo::static_call_type); + } + + // The real call. + // Note: At this point we do not have the address of the trampoline + // stub, and the entry point might be too far away for bl, so __ pc() + // serves as dummy and the bl will be patched later. + cbuf.set_insts_mark(); + __ bl(__ pc()); // Emits a relocation. + + // The stub for call to interpreter. + CompiledStaticCall::emit_to_interp_stub(cbuf); + } + %} + + // Emit a method handle call. + // + // Method handle calls from compiled to compiled are going thru a + // c2i -> i2c adapter, extending the frame for their arguments. The + // caller however, returns directly to the compiled callee, that has + // to cope with the extended frame. We restore the original frame by + // loading the callers sp and adding the calculated framesize. + enc_class enc_java_handle_call(method meth) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + + MacroAssembler _masm(&cbuf); + address entry_point = (address)$meth$$method; + + // Remember the offset not the address. + const int start_offset = __ offset(); + // The trampoline stub. + if (!ra_->C->in_scratch_emit_size()) { + // No entry point given, use the current pc. + // Make sure branch fits into + if (entry_point == 0) entry_point = __ pc(); + + // Put the entry point as a constant into the constant pool. + const address entry_point_toc_addr = __ address_constant(entry_point, RelocationHolder::none); + const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr); + + // Emit the trampoline stub which will be related to the branch-and-link below. + emit_trampoline_stub(_masm, entry_point_toc_offset, start_offset); + assert(_optimized_virtual, "methodHandle call should be a virtual call"); + __ relocate(relocInfo::opt_virtual_call_type); + } + + // The real call. + // Note: At this point we do not have the address of the trampoline + // stub, and the entry point might be too far away for bl, so __ pc() + // serves as dummy and the bl will be patched later. + cbuf.set_insts_mark(); + __ bl(__ pc()); // Emits a relocation. + + assert(_method, "execute next statement conditionally"); + // The stub for call to interpreter. + CompiledStaticCall::emit_to_interp_stub(cbuf); + + // Restore original sp. + __ ld(R11_scratch1, 0, R1_SP); // Load caller sp. + const long framesize = ra_->C->frame_slots() << LogBytesPerInt; + unsigned int bytes = (unsigned int)framesize; + long offset = Assembler::align_addr(bytes, frame::alignment_in_bytes); + if (Assembler::is_simm(-offset, 16)) { + __ addi(R1_SP, R11_scratch1, -offset); + } else { + __ load_const_optimized(R12_scratch2, -offset); + __ add(R1_SP, R11_scratch1, R12_scratch2); + } +#ifdef ASSERT + __ ld(R12_scratch2, 0, R1_SP); // Load from unextended_sp. + __ cmpd(CCR0, R11_scratch1, R12_scratch2); + __ asm_assert_eq("backlink changed", 0x8000); +#endif + // If fails should store backlink before unextending. + + if (ra_->C->env()->failing()) + return; + %} + + // Second node of expanded dynamic call - the call. + enc_class enc_java_dynamic_call_sched(method meth) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_bl); + + MacroAssembler _masm(&cbuf); + + if (!ra_->C->in_scratch_emit_size()) { + // Create a call trampoline stub for the given method. + const address entry_point = !($meth$$method) ? 0 : (address)$meth$$method; + const address entry_point_const = __ address_constant(entry_point, RelocationHolder::none); + const int entry_point_const_toc_offset = __ offset_to_method_toc(entry_point_const); + emit_trampoline_stub(_masm, entry_point_const_toc_offset, __ offset()); + + if (ra_->C->env()->failing()) + return; + + // Build relocation at call site with ic position as data. + assert((_load_ic_hi_node != NULL && _load_ic_node == NULL) || + (_load_ic_hi_node == NULL && _load_ic_node != NULL), + "must have one, but can't have both"); + assert((_load_ic_hi_node != NULL && _load_ic_hi_node->_cbuf_insts_offset != -1) || + (_load_ic_node != NULL && _load_ic_node->_cbuf_insts_offset != -1), + "must contain instruction offset"); + const int virtual_call_oop_addr_offset = _load_ic_hi_node != NULL + ? _load_ic_hi_node->_cbuf_insts_offset + : _load_ic_node->_cbuf_insts_offset; + const address virtual_call_oop_addr = __ addr_at(virtual_call_oop_addr_offset); + assert(MacroAssembler::is_load_const_from_method_toc_at(virtual_call_oop_addr), + "should be load from TOC"); + + __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr)); + } + + // At this point I do not have the address of the trampoline stub, + // and the entry point might be too far away for bl. Pc() serves + // as dummy and bl will be patched later. + __ bl((address) __ pc()); + %} + + // postalloc expand emitter for virtual calls. + enc_class postalloc_expand_java_dynamic_call_sched(method meth, iRegLdst toc) %{ + // Toc is in return address field, though not accessible via postalloc_expand + // functionaliy. + Node *toc = in(TypeFunc::ReturnAdr); + + // Create the nodes for loading the IC from the TOC. + loadConLNodesTuple loadConLNodes_IC = + loadConLNodesTuple_create(C, ra_, n_toc, new (C) immLOper((jlong)Universe::non_oop_word()), + OptoReg::Name(R19_H_num), OptoReg::Name(R19_num)); + + // Create the call node. + CallDynamicJavaDirectSchedNode *call = new (C) CallDynamicJavaDirectSchedNode(); + call->_method_handle_invoke = _method_handle_invoke; + call->_vtable_index = _vtable_index; + call->_method = _method; + call->_bci = _bci; + call->_optimized_virtual = _optimized_virtual; + call->_tf = _tf; + call->_entry_point = _entry_point; + call->_cnt = _cnt; + call->_argsize = _argsize; + call->_oop_map = _oop_map; + call->_jvms = _jvms; + call->_jvmadj = _jvmadj; + call->_in_rms = _in_rms; + call->_nesting = _nesting; + + // New call needs all inputs of old call. + // Req... + for (uint i = 0; i < req(); ++i) { + if (i != TypeFunc::ReturnAdr) { + call->add_req(in(i)); + } else { + // The expanded node does not need toc any more. + call->add_req(C->top()); + } + } + // ...as well as prec + for (uint i = req(); i < len() ; ++i) { + call->add_prec(in(i)); + } + + // The cache must come before the call, but it's not a req edge. + // GL: actually it should be a req edge to express that the + // register must be live in the Call. But as R19 is declared to be + // the inline_cache_reg that's fine. + call->add_prec(loadConLNodes_IC._last); + // Remember nodes loading the inline cache into r19. + call->_load_ic_hi_node = loadConLNodes_IC._large_hi; + call->_load_ic_node = loadConLNodes_IC._small; + + // Operands for new nodes. + call->_opnds[0] = _opnds[0]; + call->_opnds[1] = _opnds[1]; + + // Only the inline cache is associated with a register. + assert(Matcher::inline_cache_reg() == OptoReg::Name(R19_num), "ic reg should be R19"); + + // Push new nodes. + if (loadConLNodes_IC._large_hi) nodes->push(loadConLNodes_IC._large_hi); + if (loadConLNodes_IC._last) nodes->push(loadConLNodes_IC._last); + nodes->push(call); + %} + + // Compound version of call dynamic + enc_class enc_java_dynamic_call(method meth, iRegLdst toc) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + MacroAssembler _masm(&cbuf); + int start_offset = __ offset(); + + Register Rtoc = (ra_) ? $constanttablebase : R2_TOC; +#if 0 + if (_vtable_index < 0) { + // Must be invalid_vtable_index, not nonvirtual_vtable_index. + assert(_vtable_index == Method::invalid_vtable_index, "correct sentinel value"); + Register ic_reg = as_Register(Matcher::inline_cache_reg_encode()); + AddressLiteral oop = __ allocate_metadata_address((Metadata *)Universe::non_oop_word()); + + address virtual_call_oop_addr = __ pc(); + __ load_const_from_method_toc(ic_reg, oop, Rtoc); + // CALL to fixup routine. Fixup routine uses ScopeDesc info + // to determine who we intended to call. + __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr)); + emit_call_with_trampoline_stub(_masm, (address)$meth$$method, relocInfo::none); + assert(((MachCallDynamicJavaNode*)this)->ret_addr_offset() == __ offset() - start_offset, + "Fix constant in ret_addr_offset()"); + } else { + assert(!UseInlineCaches, "expect vtable calls only if not using ICs"); + // Go thru the vtable. Get receiver klass. Receiver already + // checked for non-null. If we'll go thru a C2I adapter, the + // interpreter expects method in R19_method. + + __ load_klass(R11_scratch1, R3); + + int entry_offset = InstanceKlass::vtable_start_offset() + _vtable_index * vtableEntry::size(); + int v_off = entry_offset * wordSize + vtableEntry::method_offset_in_bytes(); + __ li(R19_method, v_off); + __ ldx(R19_method/*method oop*/, R19_method/*method offset*/, R11_scratch1/*class*/); + // NOTE: for vtable dispatches, the vtable entry will never be + // null. However it may very well end up in handle_wrong_method + // if the method is abstract for the particular class. + __ ld(R11_scratch1, in_bytes(Method::from_compiled_offset()), R19_method); + // Call target. Either compiled code or C2I adapter. + __ mtctr(R11_scratch1); + __ bctrl(); + if (((MachCallDynamicJavaNode*)this)->ret_addr_offset() != __ offset() - start_offset) { + tty->print(" %d, %d\n", ((MachCallDynamicJavaNode*)this)->ret_addr_offset(),__ offset() - start_offset); + } + assert(((MachCallDynamicJavaNode*)this)->ret_addr_offset() == __ offset() - start_offset, + "Fix constant in ret_addr_offset()"); + } +#endif + Unimplemented(); // ret_addr_offset not yet fixed. Depends on compressed oops (load klass!). + %} + + // a runtime call + enc_class enc_java_to_runtime_call (method meth) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + + MacroAssembler _masm(&cbuf); + const address start_pc = __ pc(); + + // The function we're going to call. + FunctionDescriptor fdtemp; + const FunctionDescriptor* fd = !($meth$$method) ? &fdtemp : (FunctionDescriptor*)$meth$$method; + + Register Rtoc = R12_scratch2; + // Calculate the method's TOC. + __ calculate_address_from_global_toc(Rtoc, __ method_toc()); + // Put entry, env, toc into the constant pool, this needs up to 3 constant + // pool entries; call_c_using_toc will optimize the call. + __ call_c_using_toc(fd, relocInfo::runtime_call_type, Rtoc); + + // Check the ret_addr_offset. + assert(((MachCallRuntimeNode*)this)->ret_addr_offset() == __ last_calls_return_pc() - start_pc, + "Fix constant in ret_addr_offset()"); + %} + + // Move to ctr for leaf call. + // This enc_class is needed so that scheduler gets proper + // input mapping for latency computation. + enc_class enc_leaf_call_mtctr(iRegLsrc src) %{ + // TODO: PPC port $archOpcode(ppc64Opcode_mtctr); + MacroAssembler _masm(&cbuf); + __ mtctr($src$$Register); + %} + + // postalloc expand emitter for runtime leaf calls. + enc_class postalloc_expand_java_to_runtime_call(method meth, iRegLdst toc) %{ + // Get the struct that describes the function we are about to call. + FunctionDescriptor* fd = (FunctionDescriptor*) this->entry_point(); + assert(fd, "need fd here"); + // new nodes + loadConLNodesTuple loadConLNodes_Entry; + loadConLNodesTuple loadConLNodes_Env; + loadConLNodesTuple loadConLNodes_Toc; + MachNode *mtctr = NULL; + MachCallLeafNode *call = NULL; + + // Create nodes and operands for loading the entry point. + loadConLNodes_Entry = loadConLNodesTuple_create(C, ra_, n_toc, new (C) immLOper((jlong) fd->entry()), + OptoReg::Name(R12_H_num), OptoReg::Name(R12_num)); + + + // Create nodes and operands for loading the env pointer. + if (fd->env() != NULL) { + loadConLNodes_Env = loadConLNodesTuple_create(C, ra_, n_toc, new (C) immLOper((jlong) fd->env()), + OptoReg::Name(R11_H_num), OptoReg::Name(R11_num)); + } else { + loadConLNodes_Env._large_hi = NULL; + loadConLNodes_Env._large_lo = NULL; + loadConLNodes_Env._small = NULL; + loadConLNodes_Env._last = new (C) loadConL16Node(); + loadConLNodes_Env._last->_opnds[0] = new (C) iRegLdstOper(); + loadConLNodes_Env._last->_opnds[1] = new (C) immL16Oper(0); + ra_->set_pair(loadConLNodes_Env._last->_idx, OptoReg::Name(R11_H_num), OptoReg::Name(R11_num)); + } + + // Create nodes and operands for loading the Toc point. + loadConLNodes_Toc = loadConLNodesTuple_create(C, ra_, n_toc, new (C) immLOper((jlong) fd->toc()), + OptoReg::Name(R2_H_num), OptoReg::Name(R2_num)); + // mtctr node + mtctr = new (C) CallLeafDirect_mtctrNode(); + + assert(loadConLNodes_Entry._last != NULL, "entry must exist"); + mtctr->add_req(0, loadConLNodes_Entry._last); + + mtctr->_opnds[0] = new (C) iRegLdstOper(); + mtctr->_opnds[1] = new (C) iRegLdstOper(); + + // call node + call = new (C) CallLeafDirectNode(); + + call->_opnds[0] = _opnds[0]; + call->_opnds[1] = new (C) methodOper((intptr_t) fd->entry()); // may get set later + + // Make the new call node look like the old one. + call->_name = _name; + call->_tf = _tf; + call->_entry_point = _entry_point; + call->_cnt = _cnt; + call->_argsize = _argsize; + call->_oop_map = _oop_map; + guarantee(!_jvms, "You must clone the jvms and adapt the offsets by fix_jvms()."); + call->_jvms = NULL; + call->_jvmadj = _jvmadj; + call->_in_rms = _in_rms; + call->_nesting = _nesting; + + + // New call needs all inputs of old call. + // Req... + for (uint i = 0; i < req(); ++i) { + if (i != TypeFunc::ReturnAdr) { + call->add_req(in(i)); + } else { + // put the mtctr where ReturnAdr would be + call->add_req(mtctr); + } + } + + // These must be reqired edges, as the registers are live up to + // the call. Else the constants are handled as kills. + call->add_req(loadConLNodes_Env._last); + call->add_req(loadConLNodes_Toc._last); + + // ...as well as prec + for (uint i = req(); i < len(); ++i) { + call->add_prec(in(i)); + } + + // registers + ra_->set1(mtctr->_idx, OptoReg::Name(SR_CTR_num)); + + // Insert the new nodes. + if (loadConLNodes_Entry._large_hi) nodes->push(loadConLNodes_Entry._large_hi); + if (loadConLNodes_Entry._last) nodes->push(loadConLNodes_Entry._last); + if (loadConLNodes_Env._large_hi) nodes->push(loadConLNodes_Env._large_hi); + if (loadConLNodes_Env._last) nodes->push(loadConLNodes_Env._last); + if (loadConLNodes_Toc._large_hi) nodes->push(loadConLNodes_Toc._large_hi); + if (loadConLNodes_Toc._last) nodes->push(loadConLNodes_Toc._last); + nodes->push(mtctr); + nodes->push(call); + %} +%} + +//----------FRAME-------------------------------------------------------------- +// Definition of frame structure and management information. + +frame %{ + // What direction does stack grow in (assumed to be same for native & Java). + stack_direction(TOWARDS_LOW); + + // These two registers define part of the calling convention between + // compiled code and the interpreter. + + // Inline Cache Register or methodOop for I2C. + inline_cache_reg(R19); // R19_method + + // Method Oop Register when calling interpreter. + interpreter_method_oop_reg(R19); // R19_method + + // Optional: name the operand used by cisc-spilling to access + // [stack_pointer + offset]. + cisc_spilling_operand_name(indOffset); + + // Number of stack slots consumed by a Monitor enter. + sync_stack_slots((frame::jit_monitor_size / VMRegImpl::stack_slot_size)); + + // Compiled code's Frame Pointer. + frame_pointer(R1); // R1_SP + + // Interpreter stores its frame pointer in a register which is + // stored to the stack by I2CAdaptors. I2CAdaptors convert from + // interpreted java to compiled java. + // + // R14_state holds pointer to caller's cInterpreter. + interpreter_frame_pointer(R14); // R14_state + + stack_alignment(frame::alignment_in_bytes); + + in_preserve_stack_slots((frame::jit_in_preserve_size / VMRegImpl::stack_slot_size)); + + // Number of outgoing stack slots killed above the + // out_preserve_stack_slots for calls to C. Supports the var-args + // backing area for register parms. + // + varargs_C_out_slots_killed(((frame::abi_112_size - frame::jit_out_preserve_size) / VMRegImpl::stack_slot_size)); + + // The after-PROLOG location of the return address. Location of + // return address specifies a type (REG or STACK) and a number + // representing the register number (i.e. - use a register name) or + // stack slot. + // + // A: Link register is stored in stack slot ... + // M: ... but it's in the caller's frame according to PPC-64 ABI. + // J: Therefore, we make sure that the link register is also in R11_scratch1 + // at the end of the prolog. + // B: We use R20, now. + //return_addr(REG R20); + + // G: After reading the comments made by all the luminaries on their + // failure to tell the compiler where the return address really is, + // I hardly dare to try myself. However, I'm convinced it's in slot + // 4 what apparently works and saves us some spills. + return_addr(STACK 4); + + // This is the body of the function + // + // void Matcher::calling_convention(OptoRegPair* sig, // array of ideal regs + // uint length, // length of array + // bool is_outgoing) + // + // The `sig' array is to be updated. sig[j] represents the location + // of the j-th argument, either a register or a stack slot. + + // Comment taken from i486.ad: + // Body of function which returns an integer array locating + // arguments either in registers or in stack slots. Passed an array + // of ideal registers called "sig" and a "length" count. Stack-slot + // offsets are based on outgoing arguments, i.e. a CALLER setting up + // arguments for a CALLEE. Incoming stack arguments are + // automatically biased by the preserve_stack_slots field above. + calling_convention %{ + // No difference between ingoing/outgoing. Just pass false. + SharedRuntime::java_calling_convention(sig_bt, regs, length, false); + %} + + // Comment taken from i486.ad: + // Body of function which returns an integer array locating + // arguments either in registers or in stack slots. Passed an array + // of ideal registers called "sig" and a "length" count. Stack-slot + // offsets are based on outgoing arguments, i.e. a CALLER setting up + // arguments for a CALLEE. Incoming stack arguments are + // automatically biased by the preserve_stack_slots field above. + c_calling_convention %{ + // This is obviously always outgoing. + // C argument in register AND stack slot. + (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); + %} + + // Location of native (C/C++) and interpreter return values. This + // is specified to be the same as Java. In the 32-bit VM, long + // values are actually returned from native calls in O0:O1 and + // returned to the interpreter in I0:I1. The copying to and from + // the register pairs is done by the appropriate call and epilog + // opcodes. This simplifies the register allocator. + c_return_value %{ + assert((ideal_reg >= Op_RegI && ideal_reg <= Op_RegL) || + (ideal_reg == Op_RegN && Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0), + "only return normal values"); + // enum names from opcodes.hpp: Op_Node Op_Set Op_RegN Op_RegI Op_RegP Op_RegF Op_RegD Op_RegL + static int typeToRegLo[Op_RegL+1] = { 0, 0, R3_num, R3_num, R3_num, F1_num, F1_num, R3_num }; + static int typeToRegHi[Op_RegL+1] = { 0, 0, OptoReg::Bad, R3_H_num, R3_H_num, OptoReg::Bad, F1_H_num, R3_H_num }; + return OptoRegPair(typeToRegHi[ideal_reg], typeToRegLo[ideal_reg]); + %} + + // Location of compiled Java return values. Same as C + return_value %{ + assert((ideal_reg >= Op_RegI && ideal_reg <= Op_RegL) || + (ideal_reg == Op_RegN && Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0), + "only return normal values"); + // enum names from opcodes.hpp: Op_Node Op_Set Op_RegN Op_RegI Op_RegP Op_RegF Op_RegD Op_RegL + static int typeToRegLo[Op_RegL+1] = { 0, 0, R3_num, R3_num, R3_num, F1_num, F1_num, R3_num }; + static int typeToRegHi[Op_RegL+1] = { 0, 0, OptoReg::Bad, R3_H_num, R3_H_num, OptoReg::Bad, F1_H_num, R3_H_num }; + return OptoRegPair(typeToRegHi[ideal_reg], typeToRegLo[ideal_reg]); + %} +%} + + +//----------ATTRIBUTES--------------------------------------------------------- + +//----------Operand Attributes------------------------------------------------- +op_attrib op_cost(1); // Required cost attribute. + +//----------Instruction Attributes--------------------------------------------- + +// Cost attribute. required. +ins_attrib ins_cost(DEFAULT_COST); + +// Is this instruction a non-matching short branch variant of some +// long branch? Not required. +ins_attrib ins_short_branch(0); + +// This instruction does implicit checks at the given machine-instruction offset +// (optional attribute). +ins_attrib ins_implicit_check_offset(-1); // TODO: PPC port + +ins_attrib ins_implicit_check_follows_matched_true_path(true); +ins_attrib ins_is_TrapBasedCheckNode(true); + +// Number of constants. +// This instruction uses the given number of constants +// (optional attribute). +// This is needed to determine in time whether the constant pool will +// exceed 4000 entries. Before postalloc_expand the overall number of constants +// is determined. It's also used to compute the constant pool size +// in Output(). +ins_attrib ins_num_consts(0); + +// Required alignment attribute (must be a power of 2) specifies the +// alignment that some part of the instruction (not necessarily the +// start) requires. If > 1, a compute_padding() function must be +// provided for the instruction. +ins_attrib ins_alignment(1); + +// Enforce/prohibit rematerializations. +// - If an instruction is attributed with 'ins_cannot_rematerialize(true)' +// then rematerialization of that instruction is prohibited and the +// instruction's value will be spilled if necessary. +// Causes that MachNode::rematerialize() returns false. +// - If an instruction is attributed with 'ins_should_rematerialize(true)' +// then rematerialization should be enforced and a copy of the instruction +// should be inserted if possible; rematerialization is not guaranteed. +// Note: this may result in rematerializations in front of every use. +// Causes that MachNode::rematerialize() can return true. +// (optional attribute) +ins_attrib ins_cannot_rematerialize(false); +ins_attrib ins_should_rematerialize(false); + +// Instruction has variable size depending on alignment. +ins_attrib ins_variable_size_depending_on_alignment(false); + +// Instruction is a nop. +ins_attrib ins_is_nop(false); + +// Instruction is mapped to a MachIfFastLock node (instead of MachFastLock). +ins_attrib ins_use_mach_if_fast_lock_node(false); + +// Field for the toc offset of a constant. +// +// This is needed if the toc offset is not encodable as an immediate in +// the PPC load instruction. If so, the upper (hi) bits of the offset are +// added to the toc, and from this a load with immediate is performed. +// With postalloc expand, we get two nodes that require the same offset +// but which don't know about each other. The offset is only known +// when the constant is added to the constant pool during emitting. +// It is generated in the 'hi'-node adding the upper bits, and saved +// in this node. The 'lo'-node has a link to the 'hi'-node and reads +// the offset from there when it gets encoded. +ins_attrib ins_field_const_toc_offset(0); +ins_attrib ins_field_const_toc_offset_hi_node(0); + +// A field that can hold the instructions offset in the code buffer. +// Set in the nodes emitter. +ins_attrib ins_field_cbuf_insts_offset(-1); + +// Fields for referencing a call's load-IC-node. +// If the toc offset can not be encoded as an immediate in a load, we +// use two nodes. +ins_attrib ins_field_load_ic_hi_node(0); +ins_attrib ins_field_load_ic_node(0); + +//----------OPERANDS----------------------------------------------------------- +// Operand definitions must precede instruction definitions for correct +// parsing in the ADLC because operands constitute user defined types +// which are used in instruction definitions. +// +// Formats are generated automatically for constants and base registers. + +//----------Simple Operands---------------------------------------------------- +// Immediate Operands + +// Integer Immediate: 32-bit +operand immI() %{ + match(ConI); + op_cost(40); + format %{ %} + interface(CONST_INTER); +%} + +operand immI8() %{ + predicate(Assembler::is_simm(n->get_int(), 8)); + op_cost(0); + match(ConI); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 16-bit +operand immI16() %{ + predicate(Assembler::is_simm(n->get_int(), 16)); + op_cost(0); + match(ConI); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 32-bit, where lowest 16 bits are 0x0000. +operand immIhi16() %{ + predicate(((n->get_int() & 0xffff0000) != 0) && ((n->get_int() & 0xffff) == 0)); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immInegpow2() %{ + predicate(is_power_of_2_long((jlong) (julong) (juint) (-(n->get_int())))); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immIpow2minus1() %{ + predicate(is_power_of_2_long((((jlong) (n->get_int()))+1))); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immIpowerOf2() %{ + predicate(is_power_of_2_long((((jlong) (julong) (juint) (n->get_int()))))); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Unsigned Integer Immediate: the values 0-31 +operand uimmI5() %{ + predicate(Assembler::is_uimm(n->get_int(), 5)); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Unsigned Integer Immediate: 6-bit +operand uimmI6() %{ + predicate(Assembler::is_uimm(n->get_int(), 6)); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Unsigned Integer Immediate: 6-bit int, greater than 32 +operand uimmI6_ge32() %{ + predicate(Assembler::is_uimm(n->get_int(), 6) && n->get_int() >= 32); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Unsigned Integer Immediate: 15-bit +operand uimmI15() %{ + predicate(Assembler::is_uimm(n->get_int(), 15)); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Unsigned Integer Immediate: 16-bit +operand uimmI16() %{ + predicate(Assembler::is_uimm(n->get_int(), 16)); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// constant 'int 0'. +operand immI_0() %{ + predicate(n->get_int() == 0); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// constant 'int 1'. +operand immI_1() %{ + predicate(n->get_int() == 1); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// constant 'int -1'. +operand immI_minus1() %{ + predicate(n->get_int() == -1); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// int value 16. +operand immI_16() %{ + predicate(n->get_int() == 16); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// int value 24. +operand immI_24() %{ + predicate(n->get_int() == 24); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Compressed oops constants +// Pointer Immediate +operand immN() %{ + match(ConN); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +// NULL Pointer Immediate +operand immN_0() %{ + predicate(n->get_narrowcon() == 0); + match(ConN); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Compressed klass constants +operand immNKlass() %{ + match(ConNKlass); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// This operand can be used to avoid matching of an instruct +// with chain rule. +operand immNKlass_NM() %{ + match(ConNKlass); + predicate(false); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate: 64-bit +operand immP() %{ + match(ConP); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Operand to avoid match of loadConP. +// This operand can be used to avoid matching of an instruct +// with chain rule. +operand immP_NM() %{ + match(ConP); + predicate(false); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// costant 'pointer 0'. +operand immP_0() %{ + predicate(n->get_ptr() == 0); + match(ConP); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// pointer 0x0 or 0x1 +operand immP_0or1() %{ + predicate((n->get_ptr() == 0) || (n->get_ptr() == 1)); + match(ConP); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immL() %{ + match(ConL); + op_cost(40); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: 16-bit +operand immL16() %{ + predicate(Assembler::is_simm(n->get_long(), 16)); + match(ConL); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: 16-bit, 4-aligned +operand immL16Alg4() %{ + predicate(Assembler::is_simm(n->get_long(), 16) && ((n->get_long() & 0x3) == 0)); + match(ConL); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: 32-bit, where lowest 16 bits are 0x0000. +operand immL32hi16() %{ + predicate(Assembler::is_simm(n->get_long(), 32) && ((n->get_long() & 0xffffL) == 0L)); + match(ConL); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: 32-bit +operand immL32() %{ + predicate(Assembler::is_simm(n->get_long(), 32)); + match(ConL); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: 64-bit, where highest 16 bits are not 0x0000. +operand immLhighest16() %{ + predicate((n->get_long() & 0xffff000000000000L) != 0L && (n->get_long() & 0x0000ffffffffffffL) == 0L); + match(ConL); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immLnegpow2() %{ + predicate(is_power_of_2_long((jlong)-(n->get_long()))); + match(ConL); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immLpow2minus1() %{ + predicate(is_power_of_2_long((((jlong) (n->get_long()))+1)) && + (n->get_long() != (jlong)0xffffffffffffffffL)); + match(ConL); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// constant 'long 0'. +operand immL_0() %{ + predicate(n->get_long() == 0L); + match(ConL); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// constat ' long -1'. +operand immL_minus1() %{ + predicate(n->get_long() == -1L); + match(ConL); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: low 32-bit mask +operand immL_32bits() %{ + predicate(n->get_long() == 0xFFFFFFFFL); + match(ConL); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Unsigned Long Immediate: 16-bit +operand uimmL16() %{ + predicate(Assembler::is_uimm(n->get_long(), 16)); + match(ConL); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Float Immediate +operand immF() %{ + match(ConF); + op_cost(40); + format %{ %} + interface(CONST_INTER); +%} + +// constant 'float +0.0'. +operand immF_0() %{ + predicate((n->getf() == 0) && + (fpclassify(n->getf()) == FP_ZERO) && (signbit(n->getf()) == 0)); + match(ConF); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Double Immediate +operand immD() %{ + match(ConD); + op_cost(40); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Register Operands +// Integer Destination Register +// See definition of reg_class bits32_reg_rw. +operand iRegIdst() %{ + constraint(ALLOC_IN_RC(bits32_reg_rw)); + match(RegI); + match(rscratch1RegI); + match(rscratch2RegI); + match(rarg1RegI); + match(rarg2RegI); + match(rarg3RegI); + match(rarg4RegI); + format %{ %} + interface(REG_INTER); +%} + +// Integer Source Register +// See definition of reg_class bits32_reg_ro. +operand iRegIsrc() %{ + constraint(ALLOC_IN_RC(bits32_reg_ro)); + match(RegI); + match(rscratch1RegI); + match(rscratch2RegI); + match(rarg1RegI); + match(rarg2RegI); + match(rarg3RegI); + match(rarg4RegI); + format %{ %} + interface(REG_INTER); +%} + +operand rscratch1RegI() %{ + constraint(ALLOC_IN_RC(rscratch1_bits32_reg)); + match(iRegIdst); + format %{ %} + interface(REG_INTER); +%} + +operand rscratch2RegI() %{ + constraint(ALLOC_IN_RC(rscratch2_bits32_reg)); + match(iRegIdst); + format %{ %} + interface(REG_INTER); +%} + +operand rarg1RegI() %{ + constraint(ALLOC_IN_RC(rarg1_bits32_reg)); + match(iRegIdst); + format %{ %} + interface(REG_INTER); +%} + +operand rarg2RegI() %{ + constraint(ALLOC_IN_RC(rarg2_bits32_reg)); + match(iRegIdst); + format %{ %} + interface(REG_INTER); +%} + +operand rarg3RegI() %{ + constraint(ALLOC_IN_RC(rarg3_bits32_reg)); + match(iRegIdst); + format %{ %} + interface(REG_INTER); +%} + +operand rarg4RegI() %{ + constraint(ALLOC_IN_RC(rarg4_bits32_reg)); + match(iRegIdst); + format %{ %} + interface(REG_INTER); +%} + +operand rarg1RegL() %{ + constraint(ALLOC_IN_RC(rarg1_bits64_reg)); + match(iRegLdst); + format %{ %} + interface(REG_INTER); +%} + +operand rarg2RegL() %{ + constraint(ALLOC_IN_RC(rarg2_bits64_reg)); + match(iRegLdst); + format %{ %} + interface(REG_INTER); +%} + +operand rarg3RegL() %{ + constraint(ALLOC_IN_RC(rarg3_bits64_reg)); + match(iRegLdst); + format %{ %} + interface(REG_INTER); +%} + +operand rarg4RegL() %{ + constraint(ALLOC_IN_RC(rarg4_bits64_reg)); + match(iRegLdst); + format %{ %} + interface(REG_INTER); +%} + +// Pointer Destination Register +// See definition of reg_class bits64_reg_rw. +operand iRegPdst() %{ + constraint(ALLOC_IN_RC(bits64_reg_rw)); + match(RegP); + match(rscratch1RegP); + match(rscratch2RegP); + match(rarg1RegP); + match(rarg2RegP); + match(rarg3RegP); + match(rarg4RegP); + format %{ %} + interface(REG_INTER); +%} + +// Pointer Destination Register +// Operand not using r11 and r12 (killed in epilog). +operand iRegPdstNoScratch() %{ + constraint(ALLOC_IN_RC(bits64_reg_leaf_call)); + match(RegP); + match(rarg1RegP); + match(rarg2RegP); + match(rarg3RegP); + match(rarg4RegP); + format %{ %} + interface(REG_INTER); +%} + +// Pointer Source Register +// See definition of reg_class bits64_reg_ro. +operand iRegPsrc() %{ + constraint(ALLOC_IN_RC(bits64_reg_ro)); + match(RegP); + match(iRegPdst); + match(rscratch1RegP); + match(rscratch2RegP); + match(rarg1RegP); + match(rarg2RegP); + match(rarg3RegP); + match(rarg4RegP); + match(threadRegP); + format %{ %} + interface(REG_INTER); +%} + +// Thread operand. +operand threadRegP() %{ + constraint(ALLOC_IN_RC(thread_bits64_reg)); + match(iRegPdst); + format %{ "R16" %} + interface(REG_INTER); +%} + +operand rscratch1RegP() %{ + constraint(ALLOC_IN_RC(rscratch1_bits64_reg)); + match(iRegPdst); + format %{ "R11" %} + interface(REG_INTER); +%} + +operand rscratch2RegP() %{ + constraint(ALLOC_IN_RC(rscratch2_bits64_reg)); + match(iRegPdst); + format %{ %} + interface(REG_INTER); +%} + +operand rarg1RegP() %{ + constraint(ALLOC_IN_RC(rarg1_bits64_reg)); + match(iRegPdst); + format %{ %} + interface(REG_INTER); +%} + +operand rarg2RegP() %{ + constraint(ALLOC_IN_RC(rarg2_bits64_reg)); + match(iRegPdst); + format %{ %} + interface(REG_INTER); +%} + +operand rarg3RegP() %{ + constraint(ALLOC_IN_RC(rarg3_bits64_reg)); + match(iRegPdst); + format %{ %} + interface(REG_INTER); +%} + +operand rarg4RegP() %{ + constraint(ALLOC_IN_RC(rarg4_bits64_reg)); + match(iRegPdst); + format %{ %} + interface(REG_INTER); +%} + +operand iRegNsrc() %{ + constraint(ALLOC_IN_RC(bits32_reg_ro)); + match(RegN); + match(iRegNdst); + + format %{ %} + interface(REG_INTER); +%} + +operand iRegNdst() %{ + constraint(ALLOC_IN_RC(bits32_reg_rw)); + match(RegN); + + format %{ %} + interface(REG_INTER); +%} + +// Long Destination Register +// See definition of reg_class bits64_reg_rw. +operand iRegLdst() %{ + constraint(ALLOC_IN_RC(bits64_reg_rw)); + match(RegL); + match(rscratch1RegL); + match(rscratch2RegL); + format %{ %} + interface(REG_INTER); +%} + +// Long Source Register +// See definition of reg_class bits64_reg_ro. +operand iRegLsrc() %{ + constraint(ALLOC_IN_RC(bits64_reg_ro)); + match(RegL); + match(iRegLdst); + match(rscratch1RegL); + match(rscratch2RegL); + format %{ %} + interface(REG_INTER); +%} + +// Special operand for ConvL2I. +operand iRegL2Isrc(iRegLsrc reg) %{ + constraint(ALLOC_IN_RC(bits64_reg_ro)); + match(ConvL2I reg); + format %{ "ConvL2I($reg)" %} + interface(REG_INTER) +%} + +operand rscratch1RegL() %{ + constraint(ALLOC_IN_RC(rscratch1_bits64_reg)); + match(RegL); + format %{ %} + interface(REG_INTER); +%} + +operand rscratch2RegL() %{ + constraint(ALLOC_IN_RC(rscratch2_bits64_reg)); + match(RegL); + format %{ %} + interface(REG_INTER); +%} + +// Condition Code Flag Registers +operand flagsReg() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + format %{ %} + interface(REG_INTER); +%} + +// Condition Code Flag Register CR0 +operand flagsRegCR0() %{ + constraint(ALLOC_IN_RC(int_flags_CR0)); + match(RegFlags); + format %{ "CR0" %} + interface(REG_INTER); +%} + +operand flagsRegCR1() %{ + constraint(ALLOC_IN_RC(int_flags_CR1)); + match(RegFlags); + format %{ "CR1" %} + interface(REG_INTER); +%} + +operand flagsRegCR6() %{ + constraint(ALLOC_IN_RC(int_flags_CR6)); + match(RegFlags); + format %{ "CR6" %} + interface(REG_INTER); +%} + +operand regCTR() %{ + constraint(ALLOC_IN_RC(ctr_reg)); + // RegFlags should work. Introducing a RegSpecial type would cause a + // lot of changes. + match(RegFlags); + format %{"SR_CTR" %} + interface(REG_INTER); +%} + +operand regD() %{ + constraint(ALLOC_IN_RC(dbl_reg)); + match(RegD); + format %{ %} + interface(REG_INTER); +%} + +operand regF() %{ + constraint(ALLOC_IN_RC(flt_reg)); + match(RegF); + format %{ %} + interface(REG_INTER); +%} + +// Special Registers + +// Method Register +operand inline_cache_regP(iRegPdst reg) %{ + constraint(ALLOC_IN_RC(r19_bits64_reg)); // inline_cache_reg + match(reg); + format %{ %} + interface(REG_INTER); +%} + +operand compiler_method_oop_regP(iRegPdst reg) %{ + constraint(ALLOC_IN_RC(rscratch1_bits64_reg)); // compiler_method_oop_reg + match(reg); + format %{ %} + interface(REG_INTER); +%} + +operand interpreter_method_oop_regP(iRegPdst reg) %{ + constraint(ALLOC_IN_RC(r19_bits64_reg)); // interpreter_method_oop_reg + match(reg); + format %{ %} + interface(REG_INTER); +%} + +// Operands to remove register moves in unscaled mode. +// Match read/write registers with an EncodeP node if neither shift nor add are required. +operand iRegP2N(iRegPsrc reg) %{ + predicate(false /* TODO: PPC port MatchDecodeNodes*/&& Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(bits64_reg_ro)); + match(EncodeP reg); + format %{ "$reg" %} + interface(REG_INTER) +%} + +operand iRegN2P(iRegNsrc reg) %{ + predicate(false /* TODO: PPC port MatchDecodeNodes*/); + constraint(ALLOC_IN_RC(bits32_reg_ro)); + match(DecodeN reg); + match(DecodeNKlass reg); + format %{ "$reg" %} + interface(REG_INTER) +%} + +//----------Complex Operands--------------------------------------------------- +// Indirect Memory Reference +operand indirect(iRegPsrc reg) %{ + constraint(ALLOC_IN_RC(bits64_reg_ro)); + match(reg); + op_cost(100); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp(0x0); + %} +%} + +// Indirect with Offset +operand indOffset16(iRegPsrc reg, immL16 offset) %{ + constraint(ALLOC_IN_RC(bits64_reg_ro)); + match(AddP reg offset); + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp($offset); + %} +%} + +// Indirect with 4-aligned Offset +operand indOffset16Alg4(iRegPsrc reg, immL16Alg4 offset) %{ + constraint(ALLOC_IN_RC(bits64_reg_ro)); + match(AddP reg offset); + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp($offset); + %} +%} + +//----------Complex Operands for Compressed OOPs------------------------------- +// Compressed OOPs with narrow_oop_shift == 0. + +// Indirect Memory Reference, compressed OOP +operand indirectNarrow(iRegNsrc reg) %{ + predicate(false /* TODO: PPC port MatchDecodeNodes*/); + constraint(ALLOC_IN_RC(bits64_reg_ro)); + match(DecodeN reg); + match(DecodeNKlass reg); + op_cost(100); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp(0x0); + %} +%} + +// Indirect with Offset, compressed OOP +operand indOffset16Narrow(iRegNsrc reg, immL16 offset) %{ + predicate(false /* TODO: PPC port MatchDecodeNodes*/); + constraint(ALLOC_IN_RC(bits64_reg_ro)); + match(AddP (DecodeN reg) offset); + match(AddP (DecodeNKlass reg) offset); + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp($offset); + %} +%} + +// Indirect with 4-aligned Offset, compressed OOP +operand indOffset16NarrowAlg4(iRegNsrc reg, immL16Alg4 offset) %{ + predicate(false /* TODO: PPC port MatchDecodeNodes*/); + constraint(ALLOC_IN_RC(bits64_reg_ro)); + match(AddP (DecodeN reg) offset); + match(AddP (DecodeNKlass reg) offset); + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp($offset); + %} +%} + +//----------Special Memory Operands-------------------------------------------- +// Stack Slot Operand +// +// This operand is used for loading and storing temporary values on +// the stack where a match requires a value to flow through memory. +operand stackSlotI(sRegI reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + op_cost(100); + //match(RegI); + format %{ "[sp+$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1); // R1_SP + index(0x0); + scale(0x0); + disp($reg); // Stack Offset + %} +%} + +operand stackSlotL(sRegL reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + op_cost(100); + //match(RegL); + format %{ "[sp+$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1); // R1_SP + index(0x0); + scale(0x0); + disp($reg); // Stack Offset + %} +%} + +operand stackSlotP(sRegP reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + op_cost(100); + //match(RegP); + format %{ "[sp+$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1); // R1_SP + index(0x0); + scale(0x0); + disp($reg); // Stack Offset + %} +%} + +operand stackSlotF(sRegF reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + op_cost(100); + //match(RegF); + format %{ "[sp+$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1); // R1_SP + index(0x0); + scale(0x0); + disp($reg); // Stack Offset + %} +%} + +operand stackSlotD(sRegD reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + op_cost(100); + //match(RegD); + format %{ "[sp+$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1); // R1_SP + index(0x0); + scale(0x0); + disp($reg); // Stack Offset + %} +%} + +// Operands for expressing Control Flow +// NOTE: Label is a predefined operand which should not be redefined in +// the AD file. It is generically handled within the ADLC. + +//----------Conditional Branch Operands---------------------------------------- +// Comparison Op +// +// This is the operation of the comparison, and is limited to the +// following set of codes: L (<), LE (<=), G (>), GE (>=), E (==), NE +// (!=). +// +// Other attributes of the comparison, such as unsignedness, are specified +// by the comparison instruction that sets a condition code flags register. +// That result is represented by a flags operand whose subtype is appropriate +// to the unsignedness (etc.) of the comparison. +// +// Later, the instruction which matches both the Comparison Op (a Bool) and +// the flags (produced by the Cmp) specifies the coding of the comparison op +// by matching a specific subtype of Bool operand below. + +// When used for floating point comparisons: unordered same as less. +operand cmpOp() %{ + match(Bool); + format %{ "" %} + interface(COND_INTER) %{ + // BO only encodes bit 4 of bcondCRbiIsX, as bits 1-3 are always '100'. + // BO & BI + equal(0xA); // 10 10: bcondCRbiIs1 & Condition::equal + not_equal(0x2); // 00 10: bcondCRbiIs0 & Condition::equal + less(0x8); // 10 00: bcondCRbiIs1 & Condition::less + greater_equal(0x0); // 00 00: bcondCRbiIs0 & Condition::less + less_equal(0x1); // 00 01: bcondCRbiIs0 & Condition::greater + greater(0x9); // 10 01: bcondCRbiIs1 & Condition::greater + overflow(0xB); // 10 11: bcondCRbiIs1 & Condition::summary_overflow + no_overflow(0x3); // 00 11: bcondCRbiIs0 & Condition::summary_overflow + %} +%} + +//----------OPERAND CLASSES---------------------------------------------------- +// Operand Classes are groups of operands that are used to simplify +// instruction definitions by not requiring the AD writer to specify +// seperate instructions for every form of operand when the +// instruction accepts multiple operand types with the same basic +// encoding and format. The classic case of this is memory operands. +// Indirect is not included since its use is limited to Compare & Swap. + +opclass memory(indirect, indOffset16 /*, indIndex, tlsReference*/, indirectNarrow, indOffset16Narrow); +// Memory operand where offsets are 4-aligned. Required for ld, std. +opclass memoryAlg4(indirect, indOffset16Alg4, indirectNarrow, indOffset16NarrowAlg4); +opclass indirectMemory(indirect, indirectNarrow); + +// Special opclass for I and ConvL2I. +opclass iRegIsrc_iRegL2Isrc(iRegIsrc, iRegL2Isrc); + +// Operand classes to match encode and decode. iRegN_P2N is only used +// for storeN. I have never seen an encode node elsewhere. +opclass iRegN_P2N(iRegNsrc, iRegP2N); +opclass iRegP_N2P(iRegPsrc, iRegN2P); + +//----------PIPELINE----------------------------------------------------------- + +pipeline %{ + +// See J.M.Tendler et al. "Power4 system microarchitecture", IBM +// J. Res. & Dev., No. 1, Jan. 2002. + +//----------ATTRIBUTES--------------------------------------------------------- +attributes %{ + + // Power4 instructions are of fixed length. + fixed_size_instructions; + + // TODO: if `bundle' means number of instructions fetched + // per cycle, this is 8. If `bundle' means Power4 `group', that is + // max instructions issued per cycle, this is 5. + max_instructions_per_bundle = 8; + + // A Power4 instruction is 4 bytes long. + instruction_unit_size = 4; + + // The Power4 processor fetches 64 bytes... + instruction_fetch_unit_size = 64; + + // ...in one line + instruction_fetch_units = 1 + + // Unused, list one so that array generated by adlc is not empty. + // Aix compiler chokes if _nop_count = 0. + nops(fxNop); +%} + +//----------RESOURCES---------------------------------------------------------- +// Resources are the functional units available to the machine +resources( + PPC_BR, // branch unit + PPC_CR, // condition unit + PPC_FX1, // integer arithmetic unit 1 + PPC_FX2, // integer arithmetic unit 2 + PPC_LDST1, // load/store unit 1 + PPC_LDST2, // load/store unit 2 + PPC_FP1, // float arithmetic unit 1 + PPC_FP2, // float arithmetic unit 2 + PPC_LDST = PPC_LDST1 | PPC_LDST2, + PPC_FX = PPC_FX1 | PPC_FX2, + PPC_FP = PPC_FP1 | PPC_FP2 + ); + +//----------PIPELINE DESCRIPTION----------------------------------------------- +// Pipeline Description specifies the stages in the machine's pipeline +pipe_desc( + // Power4 longest pipeline path + PPC_IF, // instruction fetch + PPC_IC, + //PPC_BP, // branch prediction + PPC_D0, // decode + PPC_D1, // decode + PPC_D2, // decode + PPC_D3, // decode + PPC_Xfer1, + PPC_GD, // group definition + PPC_MP, // map + PPC_ISS, // issue + PPC_RF, // resource fetch + PPC_EX1, // execute (all units) + PPC_EX2, // execute (FP, LDST) + PPC_EX3, // execute (FP, LDST) + PPC_EX4, // execute (FP) + PPC_EX5, // execute (FP) + PPC_EX6, // execute (FP) + PPC_WB, // write back + PPC_Xfer2, + PPC_CP + ); + +//----------PIPELINE CLASSES--------------------------------------------------- +// Pipeline Classes describe the stages in which input and output are +// referenced by the hardware pipeline. + +// Simple pipeline classes. + +// Default pipeline class. +pipe_class pipe_class_default() %{ + single_instruction; + fixed_latency(2); +%} + +// Pipeline class for empty instructions. +pipe_class pipe_class_empty() %{ + single_instruction; + fixed_latency(0); +%} + +// Pipeline class for compares. +pipe_class pipe_class_compare() %{ + single_instruction; + fixed_latency(16); +%} + +// Pipeline class for traps. +pipe_class pipe_class_trap() %{ + single_instruction; + fixed_latency(100); +%} + +// Pipeline class for memory operations. +pipe_class pipe_class_memory() %{ + single_instruction; + fixed_latency(16); +%} + +// Pipeline class for call. +pipe_class pipe_class_call() %{ + single_instruction; + fixed_latency(100); +%} + +// Define the class for the Nop node. +define %{ + MachNop = pipe_class_default; +%} + +%} + +//----------INSTRUCTIONS------------------------------------------------------- + +// Naming of instructions: +// opA_operB / opA_operB_operC: +// Operation 'op' with one or two source operands 'oper'. Result +// type is A, source operand types are B and C. +// Iff A == B == C, B and C are left out. +// +// The instructions are ordered according to the following scheme: +// - loads +// - load constants +// - prefetch +// - store +// - encode/decode +// - membar +// - conditional moves +// - compare & swap +// - arithmetic and logic operations +// * int: Add, Sub, Mul, Div, Mod +// * int: lShift, arShift, urShift, rot +// * float: Add, Sub, Mul, Div +// * and, or, xor ... +// - register moves: float <-> int, reg <-> stack, repl +// - cast (high level type cast, XtoP, castPP, castII, not_null etc. +// - conv (low level type cast requiring bit changes (sign extend etc) +// - compares, range & zero checks. +// - branches +// - complex operations, intrinsics, min, max, replicate +// - lock +// - Calls +// +// If there are similar instructions with different types they are sorted: +// int before float +// small before big +// signed before unsigned +// e.g., loadS before loadUS before loadI before loadF. + + +//----------Load/Store Instructions-------------------------------------------- + +//----------Load Instructions-------------------------------------------------- + +// Converts byte to int. +// As convB2I_reg, but without match rule. The match rule of convB2I_reg +// reuses the 'amount' operand, but adlc expects that operand specification +// and operands in match rule are equivalent. +instruct convB2I_reg_2(iRegIdst dst, iRegIsrc src) %{ + effect(DEF dst, USE src); + format %{ "EXTSB $dst, $src \t// byte->int" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_extsb); + __ extsb($dst$$Register, $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct loadUB_indirect(iRegIdst dst, indirectMemory mem) %{ + // match-rule, false predicate + match(Set dst (LoadB mem)); + predicate(false); + + format %{ "LBZ $dst, $mem" %} + size(4); + ins_encode( enc_lbz(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +instruct loadUB_indirect_ac(iRegIdst dst, indirectMemory mem) %{ + // match-rule, false predicate + match(Set dst (LoadB mem)); + predicate(false); + + format %{ "LBZ $dst, $mem\n\t" + "TWI $dst\n\t" + "ISYNC" %} + size(12); + ins_encode( enc_lbz_ac(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load Byte (8bit signed). LoadB = LoadUB + ConvUB2B. +instruct loadB_indirect_Ex(iRegIdst dst, indirectMemory mem) %{ + match(Set dst (LoadB mem)); + predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); + expand %{ + iRegIdst tmp; + loadUB_indirect(tmp, mem); + convB2I_reg_2(dst, tmp); + %} +%} + +instruct loadB_indirect_ac_Ex(iRegIdst dst, indirectMemory mem) %{ + match(Set dst (LoadB mem)); + ins_cost(3*MEMORY_REF_COST + DEFAULT_COST); + expand %{ + iRegIdst tmp; + loadUB_indirect_ac(tmp, mem); + convB2I_reg_2(dst, tmp); + %} +%} + +instruct loadUB_indOffset16(iRegIdst dst, indOffset16 mem) %{ + // match-rule, false predicate + match(Set dst (LoadB mem)); + predicate(false); + + format %{ "LBZ $dst, $mem" %} + size(4); + ins_encode( enc_lbz(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +instruct loadUB_indOffset16_ac(iRegIdst dst, indOffset16 mem) %{ + // match-rule, false predicate + match(Set dst (LoadB mem)); + predicate(false); + + format %{ "LBZ $dst, $mem\n\t" + "TWI $dst\n\t" + "ISYNC" %} + size(12); + ins_encode( enc_lbz_ac(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load Byte (8bit signed). LoadB = LoadUB + ConvUB2B. +instruct loadB_indOffset16_Ex(iRegIdst dst, indOffset16 mem) %{ + match(Set dst (LoadB mem)); + predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); + + expand %{ + iRegIdst tmp; + loadUB_indOffset16(tmp, mem); + convB2I_reg_2(dst, tmp); + %} +%} + +instruct loadB_indOffset16_ac_Ex(iRegIdst dst, indOffset16 mem) %{ + match(Set dst (LoadB mem)); + ins_cost(3*MEMORY_REF_COST + DEFAULT_COST); + + expand %{ + iRegIdst tmp; + loadUB_indOffset16_ac(tmp, mem); + convB2I_reg_2(dst, tmp); + %} +%} + +// Load Unsigned Byte (8bit UNsigned) into an int reg. +instruct loadUB(iRegIdst dst, memory mem) %{ + predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); + match(Set dst (LoadUB mem)); + ins_cost(MEMORY_REF_COST); + + format %{ "LBZ $dst, $mem \t// byte, zero-extend to int" %} + size(4); + ins_encode( enc_lbz(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load Unsigned Byte (8bit UNsigned) acquire. +instruct loadUB_ac(iRegIdst dst, memory mem) %{ + match(Set dst (LoadUB mem)); + ins_cost(3*MEMORY_REF_COST); + + format %{ "LBZ $dst, $mem \t// byte, zero-extend to int, acquire\n\t" + "TWI $dst\n\t" + "ISYNC" %} + size(12); + ins_encode( enc_lbz_ac(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load Unsigned Byte (8bit UNsigned) into a Long Register. +instruct loadUB2L(iRegLdst dst, memory mem) %{ + match(Set dst (ConvI2L (LoadUB mem))); + predicate(_kids[0]->_leaf->as_Load()->is_unordered() || followed_by_acquire(_kids[0]->_leaf)); + ins_cost(MEMORY_REF_COST); + + format %{ "LBZ $dst, $mem \t// byte, zero-extend to long" %} + size(4); + ins_encode( enc_lbz(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +instruct loadUB2L_ac(iRegLdst dst, memory mem) %{ + match(Set dst (ConvI2L (LoadUB mem))); + ins_cost(3*MEMORY_REF_COST); + + format %{ "LBZ $dst, $mem \t// byte, zero-extend to long, acquire\n\t" + "TWI $dst\n\t" + "ISYNC" %} + size(12); + ins_encode( enc_lbz_ac(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load Short (16bit signed) +instruct loadS(iRegIdst dst, memory mem) %{ + match(Set dst (LoadS mem)); + predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); + ins_cost(MEMORY_REF_COST); + + format %{ "LHA $dst, $mem" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_lha); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + __ lha($dst$$Register, Idisp, $mem$$base$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +// Load Short (16bit signed) acquire. +instruct loadS_ac(iRegIdst dst, memory mem) %{ + match(Set dst (LoadS mem)); + ins_cost(3*MEMORY_REF_COST); + + format %{ "LHA $dst, $mem\t acquire\n\t" + "TWI $dst\n\t" + "ISYNC" %} + size(12); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + __ lha($dst$$Register, Idisp, $mem$$base$$Register); + __ twi_0($dst$$Register); + __ isync(); + %} + ins_pipe(pipe_class_memory); +%} + +// Load Char (16bit unsigned) +instruct loadUS(iRegIdst dst, memory mem) %{ + match(Set dst (LoadUS mem)); + predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); + ins_cost(MEMORY_REF_COST); + + format %{ "LHZ $dst, $mem" %} + size(4); + ins_encode( enc_lhz(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load Char (16bit unsigned) acquire. +instruct loadUS_ac(iRegIdst dst, memory mem) %{ + match(Set dst (LoadUS mem)); + ins_cost(3*MEMORY_REF_COST); + + format %{ "LHZ $dst, $mem \t// acquire\n\t" + "TWI $dst\n\t" + "ISYNC" %} + size(12); + ins_encode( enc_lhz_ac(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load Unsigned Short/Char (16bit UNsigned) into a Long Register. +instruct loadUS2L(iRegLdst dst, memory mem) %{ + match(Set dst (ConvI2L (LoadUS mem))); + predicate(_kids[0]->_leaf->as_Load()->is_unordered() || followed_by_acquire(_kids[0]->_leaf)); + ins_cost(MEMORY_REF_COST); + + format %{ "LHZ $dst, $mem \t// short, zero-extend to long" %} + size(4); + ins_encode( enc_lhz(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load Unsigned Short/Char (16bit UNsigned) into a Long Register acquire. +instruct loadUS2L_ac(iRegLdst dst, memory mem) %{ + match(Set dst (ConvI2L (LoadUS mem))); + ins_cost(3*MEMORY_REF_COST); + + format %{ "LHZ $dst, $mem \t// short, zero-extend to long, acquire\n\t" + "TWI $dst\n\t" + "ISYNC" %} + size(12); + ins_encode( enc_lhz_ac(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load Integer. +instruct loadI(iRegIdst dst, memory mem) %{ + match(Set dst (LoadI mem)); + predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); + ins_cost(MEMORY_REF_COST); + + format %{ "LWZ $dst, $mem" %} + size(4); + ins_encode( enc_lwz(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load Integer acquire. +instruct loadI_ac(iRegIdst dst, memory mem) %{ + match(Set dst (LoadI mem)); + ins_cost(3*MEMORY_REF_COST); + + format %{ "LWZ $dst, $mem \t// load acquire\n\t" + "TWI $dst\n\t" + "ISYNC" %} + size(12); + ins_encode( enc_lwz_ac(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Match loading integer and casting it to unsigned int in +// long register. +// LoadI + ConvI2L + AndL 0xffffffff. +instruct loadUI2L(iRegLdst dst, memory mem, immL_32bits mask) %{ + match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); + predicate(_kids[0]->_kids[0]->_leaf->as_Load()->is_unordered()); + ins_cost(MEMORY_REF_COST); + + format %{ "LWZ $dst, $mem \t// zero-extend to long" %} + size(4); + ins_encode( enc_lwz(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Match loading integer and casting it to long. +instruct loadI2L(iRegLdst dst, memory mem) %{ + match(Set dst (ConvI2L (LoadI mem))); + predicate(_kids[0]->_leaf->as_Load()->is_unordered()); + ins_cost(MEMORY_REF_COST); + + format %{ "LWA $dst, $mem \t// loadI2L" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_lwa); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + __ lwa($dst$$Register, Idisp, $mem$$base$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +// Match loading integer and casting it to long - acquire. +instruct loadI2L_ac(iRegLdst dst, memory mem) %{ + match(Set dst (ConvI2L (LoadI mem))); + ins_cost(3*MEMORY_REF_COST); + + format %{ "LWA $dst, $mem \t// loadI2L acquire" + "TWI $dst\n\t" + "ISYNC" %} + size(12); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_lwa); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + __ lwa($dst$$Register, Idisp, $mem$$base$$Register); + __ twi_0($dst$$Register); + __ isync(); + %} + ins_pipe(pipe_class_memory); +%} + +// Load Long - aligned +instruct loadL(iRegLdst dst, memoryAlg4 mem) %{ + match(Set dst (LoadL mem)); + predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); + ins_cost(MEMORY_REF_COST); + + format %{ "LD $dst, $mem \t// long" %} + size(4); + ins_encode( enc_ld(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load Long - aligned acquire. +instruct loadL_ac(iRegLdst dst, memoryAlg4 mem) %{ + match(Set dst (LoadL mem)); + ins_cost(3*MEMORY_REF_COST); + + format %{ "LD $dst, $mem \t// long acquire\n\t" + "TWI $dst\n\t" + "ISYNC" %} + size(12); + ins_encode( enc_ld_ac(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load Long - UNaligned +instruct loadL_unaligned(iRegLdst dst, memoryAlg4 mem) %{ + match(Set dst (LoadL_unaligned mem)); + // predicate(...) // Unaligned_ac is not needed (and wouldn't make sense). + ins_cost(MEMORY_REF_COST); + + format %{ "LD $dst, $mem \t// unaligned long" %} + size(4); + ins_encode( enc_ld(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load nodes for superwords + +// Load Aligned Packed Byte +instruct loadV8(iRegLdst dst, memoryAlg4 mem) %{ + predicate(n->as_LoadVector()->memory_size() == 8); + match(Set dst (LoadVector mem)); + ins_cost(MEMORY_REF_COST); + + format %{ "LD $dst, $mem \t// load 8-byte Vector" %} + size(4); + ins_encode( enc_ld(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load Range, range = array length (=jint) +instruct loadRange(iRegIdst dst, memory mem) %{ + match(Set dst (LoadRange mem)); + ins_cost(MEMORY_REF_COST); + + format %{ "LWZ $dst, $mem \t// range" %} + size(4); + ins_encode( enc_lwz(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load Compressed Pointer +instruct loadN(iRegNdst dst, memory mem) %{ + match(Set dst (LoadN mem)); + predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); + ins_cost(MEMORY_REF_COST); + + format %{ "LWZ $dst, $mem \t// load compressed ptr" %} + size(4); + ins_encode( enc_lwz(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load Compressed Pointer acquire. +instruct loadN_ac(iRegNdst dst, memory mem) %{ + match(Set dst (LoadN mem)); + ins_cost(3*MEMORY_REF_COST); + + format %{ "LWZ $dst, $mem \t// load acquire compressed ptr\n\t" + "TWI $dst\n\t" + "ISYNC" %} + size(12); + ins_encode( enc_lwz_ac(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load Compressed Pointer and decode it if narrow_oop_shift == 0. +instruct loadN2P_unscaled(iRegPdst dst, memory mem) %{ + match(Set dst (DecodeN (LoadN mem))); + predicate(_kids[0]->_leaf->as_Load()->is_unordered() && Universe::narrow_oop_shift() == 0); + ins_cost(MEMORY_REF_COST); + + format %{ "LWZ $dst, $mem \t// DecodeN (unscaled)" %} + size(4); + ins_encode( enc_lwz(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load Pointer +instruct loadP(iRegPdst dst, memoryAlg4 mem) %{ + match(Set dst (LoadP mem)); + predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); + ins_cost(MEMORY_REF_COST); + + format %{ "LD $dst, $mem \t// ptr" %} + size(4); + ins_encode( enc_ld(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load Pointer acquire. +instruct loadP_ac(iRegPdst dst, memoryAlg4 mem) %{ + match(Set dst (LoadP mem)); + ins_cost(3*MEMORY_REF_COST); + + format %{ "LD $dst, $mem \t// ptr acquire\n\t" + "TWI $dst\n\t" + "ISYNC" %} + size(12); + ins_encode( enc_ld_ac(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// LoadP + CastP2L +instruct loadP2X(iRegLdst dst, memoryAlg4 mem) %{ + match(Set dst (CastP2X (LoadP mem))); + predicate(_kids[0]->_leaf->as_Load()->is_unordered()); + ins_cost(MEMORY_REF_COST); + + format %{ "LD $dst, $mem \t// ptr + p2x" %} + size(4); + ins_encode( enc_ld(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load compressed klass pointer. +instruct loadNKlass(iRegNdst dst, memory mem) %{ + match(Set dst (LoadNKlass mem)); + ins_cost(MEMORY_REF_COST); + + format %{ "LWZ $dst, $mem \t// compressed klass ptr" %} + size(4); + ins_encode( enc_lwz(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +//// Load compressed klass and decode it if narrow_klass_shift == 0. +//// TODO: will narrow_klass_shift ever be 0? +//instruct decodeNKlass2Klass(iRegPdst dst, memory mem) %{ +// match(Set dst (DecodeNKlass (LoadNKlass mem))); +// predicate(false /* TODO: PPC port Universe::narrow_klass_shift() == 0*); +// ins_cost(MEMORY_REF_COST); +// +// format %{ "LWZ $dst, $mem \t// DecodeNKlass (unscaled)" %} +// size(4); +// ins_encode( enc_lwz(dst, mem) ); +// ins_pipe(pipe_class_memory); +//%} + +// Load Klass Pointer +instruct loadKlass(iRegPdst dst, memoryAlg4 mem) %{ + match(Set dst (LoadKlass mem)); + ins_cost(MEMORY_REF_COST); + + format %{ "LD $dst, $mem \t// klass ptr" %} + size(4); + ins_encode( enc_ld(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load Float +instruct loadF(regF dst, memory mem) %{ + match(Set dst (LoadF mem)); + predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); + ins_cost(MEMORY_REF_COST); + + format %{ "LFS $dst, $mem" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_lfs); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + __ lfs($dst$$FloatRegister, Idisp, $mem$$base$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +// Load Float acquire. +instruct loadF_ac(regF dst, memory mem) %{ + match(Set dst (LoadF mem)); + ins_cost(3*MEMORY_REF_COST); + + format %{ "LFS $dst, $mem \t// acquire\n\t" + "FCMPU cr0, $dst, $dst\n\t" + "BNE cr0, next\n" + "next:\n\t" + "ISYNC" %} + size(16); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + Label next; + __ lfs($dst$$FloatRegister, Idisp, $mem$$base$$Register); + __ fcmpu(CCR0, $dst$$FloatRegister, $dst$$FloatRegister); + __ bne(CCR0, next); + __ bind(next); + __ isync(); + %} + ins_pipe(pipe_class_memory); +%} + +// Load Double - aligned +instruct loadD(regD dst, memory mem) %{ + match(Set dst (LoadD mem)); + predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); + ins_cost(MEMORY_REF_COST); + + format %{ "LFD $dst, $mem" %} + size(4); + ins_encode( enc_lfd(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load Double - aligned acquire. +instruct loadD_ac(regD dst, memory mem) %{ + match(Set dst (LoadD mem)); + ins_cost(3*MEMORY_REF_COST); + + format %{ "LFD $dst, $mem \t// acquire\n\t" + "FCMPU cr0, $dst, $dst\n\t" + "BNE cr0, next\n" + "next:\n\t" + "ISYNC" %} + size(16); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + Label next; + __ lfd($dst$$FloatRegister, Idisp, $mem$$base$$Register); + __ fcmpu(CCR0, $dst$$FloatRegister, $dst$$FloatRegister); + __ bne(CCR0, next); + __ bind(next); + __ isync(); + %} + ins_pipe(pipe_class_memory); +%} + +// Load Double - UNaligned +instruct loadD_unaligned(regD dst, memory mem) %{ + match(Set dst (LoadD_unaligned mem)); + // predicate(...) // Unaligned_ac is not needed (and wouldn't make sense). + ins_cost(MEMORY_REF_COST); + + format %{ "LFD $dst, $mem" %} + size(4); + ins_encode( enc_lfd(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +//----------Constants-------------------------------------------------------- + +// Load MachConstantTableBase: add hi offset to global toc. +// TODO: Handle hidden register r29 in bundler! +instruct loadToc_hi(iRegLdst dst) %{ + effect(DEF dst); + ins_cost(DEFAULT_COST); + + format %{ "ADDIS $dst, R29, DISP.hi \t// load TOC hi" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addis); + __ calculate_address_from_global_toc_hi16only($dst$$Register, __ method_toc()); + %} + ins_pipe(pipe_class_default); +%} + +// Load MachConstantTableBase: add lo offset to global toc. +instruct loadToc_lo(iRegLdst dst, iRegLdst src) %{ + effect(DEF dst, USE src); + ins_cost(DEFAULT_COST); + + format %{ "ADDI $dst, $src, DISP.lo \t// load TOC lo" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_ori); + __ calculate_address_from_global_toc_lo16only($dst$$Register, __ method_toc()); + %} + ins_pipe(pipe_class_default); +%} + +// Load 16-bit integer constant 0xssss???? +instruct loadConI16(iRegIdst dst, immI16 src) %{ + match(Set dst src); + + format %{ "LI $dst, $src" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addi); + __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF))); + %} + ins_pipe(pipe_class_default); +%} + +// Load integer constant 0x????0000 +instruct loadConIhi16(iRegIdst dst, immIhi16 src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST); + + format %{ "LIS $dst, $src.hi" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addis); + // Lis sign extends 16-bit src then shifts it 16 bit to the left. + __ lis($dst$$Register, (int)((short)(($src$$constant & 0xFFFF0000) >> 16))); + %} + ins_pipe(pipe_class_default); +%} + +// Part 2 of loading 32 bit constant: hi16 is is src1 (properly shifted +// and sign extended), this adds the low 16 bits. +instruct loadConI32_lo16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{ + // no match-rule, false predicate + effect(DEF dst, USE src1, USE src2); + predicate(false); + + format %{ "ORI $dst, $src1.hi, $src2.lo" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_ori); + __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF); + %} + ins_pipe(pipe_class_default); +%} + +instruct loadConI_Ex(iRegIdst dst, immI src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST*2); + + expand %{ + // Would like to use $src$$constant. + immI16 srcLo %{ _opnds[1]->constant() %} + // srcHi can be 0000 if srcLo sign-extends to a negative number. + immIhi16 srcHi %{ _opnds[1]->constant() %} + iRegIdst tmpI; + loadConIhi16(tmpI, srcHi); + loadConI32_lo16(dst, tmpI, srcLo); + %} +%} + +// No constant pool entries required. +instruct loadConL16(iRegLdst dst, immL16 src) %{ + match(Set dst src); + + format %{ "LI $dst, $src \t// long" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addi); + __ li($dst$$Register, (int)((short) ($src$$constant & 0xFFFF))); + %} + ins_pipe(pipe_class_default); +%} + +// Load long constant 0xssssssss????0000 +instruct loadConL32hi16(iRegLdst dst, immL32hi16 src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST); + + format %{ "LIS $dst, $src.hi \t// long" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addis); + __ lis($dst$$Register, (int)((short)(($src$$constant & 0xFFFF0000) >> 16))); + %} + ins_pipe(pipe_class_default); +%} + +// To load a 32 bit constant: merge lower 16 bits into already loaded +// high 16 bits. +instruct loadConL32_lo16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{ + // no match-rule, false predicate + effect(DEF dst, USE src1, USE src2); + predicate(false); + + format %{ "ORI $dst, $src1, $src2.lo" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_ori); + __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF); + %} + ins_pipe(pipe_class_default); +%} + +// Load 32-bit long constant +instruct loadConL32_Ex(iRegLdst dst, immL32 src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST*2); + + expand %{ + // Would like to use $src$$constant. + immL16 srcLo %{ _opnds[1]->constant() /*& 0x0000FFFFL */%} + // srcHi can be 0000 if srcLo sign-extends to a negative number. + immL32hi16 srcHi %{ _opnds[1]->constant() /*& 0xFFFF0000L */%} + iRegLdst tmpL; + loadConL32hi16(tmpL, srcHi); + loadConL32_lo16(dst, tmpL, srcLo); + %} +%} + +// Load long constant 0x????000000000000. +instruct loadConLhighest16_Ex(iRegLdst dst, immLhighest16 src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST); + + expand %{ + immL32hi16 srcHi %{ _opnds[1]->constant() >> 32 /*& 0xFFFF0000L */%} + immI shift32 %{ 32 %} + iRegLdst tmpL; + loadConL32hi16(tmpL, srcHi); + lshiftL_regL_immI(dst, tmpL, shift32); + %} +%} + +// Expand node for constant pool load: small offset. +instruct loadConL(iRegLdst dst, immL src, iRegLdst toc) %{ + effect(DEF dst, USE src, USE toc); + ins_cost(MEMORY_REF_COST); + + ins_num_consts(1); + // Needed so that CallDynamicJavaDirect can compute the address of this + // instruction for relocation. + ins_field_cbuf_insts_offset(int); + + format %{ "LD $dst, offset, $toc \t// load long $src from TOC" %} + size(4); + ins_encode( enc_load_long_constL(dst, src, toc) ); + ins_pipe(pipe_class_memory); +%} + +// Expand node for constant pool load: large offset. +instruct loadConL_hi(iRegLdst dst, immL src, iRegLdst toc) %{ + effect(DEF dst, USE src, USE toc); + predicate(false); + + ins_num_consts(1); + ins_field_const_toc_offset(int); + // Needed so that CallDynamicJavaDirect can compute the address of this + // instruction for relocation. + ins_field_cbuf_insts_offset(int); + + format %{ "ADDIS $dst, $toc, offset \t// load long $src from TOC (hi)" %} + size(4); + ins_encode( enc_load_long_constL_hi(dst, toc, src) ); + ins_pipe(pipe_class_default); +%} + +// Expand node for constant pool load: large offset. +// No constant pool entries required. +instruct loadConL_lo(iRegLdst dst, immL src, iRegLdst base) %{ + effect(DEF dst, USE src, USE base); + predicate(false); + + ins_field_const_toc_offset_hi_node(loadConL_hiNode*); + + format %{ "LD $dst, offset, $base \t// load long $src from TOC (lo)" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_ld); + int offset = ra_->C->in_scratch_emit_size() ? 0 : _const_toc_offset_hi_node->_const_toc_offset; + __ ld($dst$$Register, MacroAssembler::largeoffset_si16_si16_lo(offset), $base$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +// Load long constant from constant table. Expand in case of +// offset > 16 bit is needed. +// Adlc adds toc node MachConstantTableBase. +instruct loadConL_Ex(iRegLdst dst, immL src) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST); + + format %{ "LD $dst, offset, $constanttablebase\t// load long $src from table, postalloc expanded" %} + // We can not inline the enc_class for the expand as that does not support constanttablebase. + postalloc_expand( postalloc_expand_load_long_constant(dst, src, constanttablebase) ); +%} + +// Load NULL as compressed oop. +instruct loadConN0(iRegNdst dst, immN_0 src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST); + + format %{ "LI $dst, $src \t// compressed ptr" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addi); + __ li($dst$$Register, 0); + %} + ins_pipe(pipe_class_default); +%} + +// Load hi part of compressed oop constant. +instruct loadConN_hi(iRegNdst dst, immN src) %{ + effect(DEF dst, USE src); + ins_cost(DEFAULT_COST); + + format %{ "LIS $dst, $src \t// narrow oop hi" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addis); + __ lis($dst$$Register, (int)(short)(($src$$constant >> 16) & 0xffff)); + %} + ins_pipe(pipe_class_default); +%} + +// Add lo part of compressed oop constant to already loaded hi part. +instruct loadConN_lo(iRegNdst dst, iRegNsrc src1, immN src2) %{ + effect(DEF dst, USE src1, USE src2); + ins_cost(DEFAULT_COST); + + format %{ "ORI $dst, $src1, $src2 \t// narrow oop lo" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addi); + assert(__ oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int oop_index = __ oop_recorder()->find_index((jobject)$src2$$constant); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + __ relocate(rspec, 1); + __ ori($dst$$Register, $src1$$Register, $src2$$constant & 0xffff); + %} + ins_pipe(pipe_class_default); +%} + +// Needed to postalloc expand loadConN: ConN is loaded as ConI +// leaving the upper 32 bits with sign-extension bits. +// This clears these bits: dst = src & 0xFFFFFFFF. +// TODO: Eventually call this maskN_regN_FFFFFFFF. +instruct clearMs32b(iRegNdst dst, iRegNsrc src) %{ + effect(DEF dst, USE src); + predicate(false); + + format %{ "MASK $dst, $src, 0xFFFFFFFF" %} // mask + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); + __ clrldi($dst$$Register, $src$$Register, 0x20); + %} + ins_pipe(pipe_class_default); +%} + +// Loading ConN must be postalloc expanded so that edges between +// the nodes are safe. They may not interfere with a safepoint. +// GL TODO: This needs three instructions: better put this into the constant pool. +instruct loadConN_Ex(iRegNdst dst, immN src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST*2); + + format %{ "LoadN $dst, $src \t// postalloc expanded" %} // mask + postalloc_expand %{ + MachNode *m1 = new (C) loadConN_hiNode(); + MachNode *m2 = new (C) loadConN_loNode(); + MachNode *m3 = new (C) clearMs32bNode(); + m1->add_req(NULL); + m2->add_req(NULL, m1); + m3->add_req(NULL, m2); + m1->_opnds[0] = op_dst; + m1->_opnds[1] = op_src; + m2->_opnds[0] = op_dst; + m2->_opnds[1] = op_dst; + m2->_opnds[2] = op_src; + m3->_opnds[0] = op_dst; + m3->_opnds[1] = op_dst; + ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + ra_->set_pair(m3->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + nodes->push(m1); + nodes->push(m2); + nodes->push(m3); + %} +%} + +instruct loadConNKlass_hi(iRegNdst dst, immNKlass src) %{ + effect(DEF dst, USE src); + ins_cost(DEFAULT_COST); + + format %{ "LIS $dst, $src \t// narrow oop hi" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addis); + intptr_t Csrc = Klass::encode_klass((Klass *)$src$$constant); + __ lis($dst$$Register, (int)(short)((Csrc >> 16) & 0xffff)); + %} + ins_pipe(pipe_class_default); +%} + +// This needs a match rule so that build_oop_map knows this is +// not a narrow oop. +instruct loadConNKlass_lo(iRegNdst dst, immNKlass_NM src1, iRegNsrc src2) %{ + match(Set dst src1); + effect(TEMP src2); + ins_cost(DEFAULT_COST); + + format %{ "ADDI $dst, $src1, $src2 \t// narrow oop lo" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addi); + intptr_t Csrc = Klass::encode_klass((Klass *)$src1$$constant); + assert(__ oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int klass_index = __ oop_recorder()->find_index((Klass *)$src1$$constant); + RelocationHolder rspec = metadata_Relocation::spec(klass_index); + + __ relocate(rspec, 1); + __ ori($dst$$Register, $src2$$Register, Csrc & 0xffff); + %} + ins_pipe(pipe_class_default); +%} + +// Loading ConNKlass must be postalloc expanded so that edges between +// the nodes are safe. They may not interfere with a safepoint. +instruct loadConNKlass_Ex(iRegNdst dst, immNKlass src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST*2); + + format %{ "LoadN $dst, $src \t// postalloc expanded" %} // mask + postalloc_expand %{ + // Load high bits into register. Sign extended. + MachNode *m1 = new (C) loadConNKlass_hiNode(); + m1->add_req(NULL); + m1->_opnds[0] = op_dst; + m1->_opnds[1] = op_src; + ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + nodes->push(m1); + + MachNode *m2 = m1; + if (!Assembler::is_uimm((jlong)Klass::encode_klass((Klass *)op_src->constant()), 31)) { + // Value might be 1-extended. Mask out these bits. + m2 = new (C) clearMs32bNode(); + m2->add_req(NULL, m1); + m2->_opnds[0] = op_dst; + m2->_opnds[1] = op_dst; + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + nodes->push(m2); + } + + MachNode *m3 = new (C) loadConNKlass_loNode(); + m3->add_req(NULL, m2); + m3->_opnds[0] = op_dst; + m3->_opnds[1] = op_src; + m3->_opnds[2] = op_dst; + ra_->set_pair(m3->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + nodes->push(m3); + %} +%} + +// 0x1 is used in object initialization (initial object header). +// No constant pool entries required. +instruct loadConP0or1(iRegPdst dst, immP_0or1 src) %{ + match(Set dst src); + + format %{ "LI $dst, $src \t// ptr" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addi); + __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF))); + %} + ins_pipe(pipe_class_default); +%} + +// Expand node for constant pool load: small offset. +// The match rule is needed to generate the correct bottom_type(), +// however this node should never match. The use of predicate is not +// possible since ADLC forbids predicates for chain rules. The higher +// costs do not prevent matching in this case. For that reason the +// operand immP_NM with predicate(false) is used. +instruct loadConP(iRegPdst dst, immP_NM src, iRegLdst toc) %{ + match(Set dst src); + effect(TEMP toc); + + ins_num_consts(1); + + format %{ "LD $dst, offset, $toc \t// load ptr $src from TOC" %} + size(4); + ins_encode( enc_load_long_constP(dst, src, toc) ); + ins_pipe(pipe_class_memory); +%} + +// Expand node for constant pool load: large offset. +instruct loadConP_hi(iRegPdst dst, immP_NM src, iRegLdst toc) %{ + effect(DEF dst, USE src, USE toc); + predicate(false); + + ins_num_consts(1); + ins_field_const_toc_offset(int); + + format %{ "ADDIS $dst, $toc, offset \t// load ptr $src from TOC (hi)" %} + size(4); + ins_encode( enc_load_long_constP_hi(dst, src, toc) ); + ins_pipe(pipe_class_default); +%} + +// Expand node for constant pool load: large offset. +instruct loadConP_lo(iRegPdst dst, immP_NM src, iRegLdst base) %{ + match(Set dst src); + effect(TEMP base); + + ins_field_const_toc_offset_hi_node(loadConP_hiNode*); + + format %{ "LD $dst, offset, $base \t// load ptr $src from TOC (lo)" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_ld); + int offset = ra_->C->in_scratch_emit_size() ? 0 : MacroAssembler::largeoffset_si16_si16_lo(_const_toc_offset_hi_node->_const_toc_offset); + __ ld($dst$$Register, offset, $base$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +// Load pointer constant from constant table. Expand in case an +// offset > 16 bit is needed. +// Adlc adds toc node MachConstantTableBase. +instruct loadConP_Ex(iRegPdst dst, immP src) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST); + + // This rule does not use "expand" because then + // the result type is not known to be an Oop. An ADLC + // enhancement will be needed to make that work - not worth it! + + // If this instruction rematerializes, it prolongs the live range + // of the toc node, causing illegal graphs. + // assert(edge_from_to(_reg_node[reg_lo],def)) fails in verify_good_schedule(). + ins_cannot_rematerialize(true); + + format %{ "LD $dst, offset, $constanttablebase \t// load ptr $src from table, postalloc expanded" %} + postalloc_expand( postalloc_expand_load_ptr_constant(dst, src, constanttablebase) ); +%} + +// Expand node for constant pool load: small offset. +instruct loadConF(regF dst, immF src, iRegLdst toc) %{ + effect(DEF dst, USE src, USE toc); + ins_cost(MEMORY_REF_COST); + + ins_num_consts(1); + + format %{ "LFS $dst, offset, $toc \t// load float $src from TOC" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_lfs); + address float_address = __ float_constant($src$$constant); + __ lfs($dst$$FloatRegister, __ offset_to_method_toc(float_address), $toc$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +// Expand node for constant pool load: large offset. +instruct loadConFComp(regF dst, immF src, iRegLdst toc) %{ + effect(DEF dst, USE src, USE toc); + ins_cost(MEMORY_REF_COST); + + ins_num_consts(1); + + format %{ "ADDIS $toc, $toc, offset_hi\n\t" + "LFS $dst, offset_lo, $toc \t// load float $src from TOC (hi/lo)\n\t" + "ADDIS $toc, $toc, -offset_hi"%} + size(12); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + FloatRegister Rdst = $dst$$FloatRegister; + Register Rtoc = $toc$$Register; + address float_address = __ float_constant($src$$constant); + int offset = __ offset_to_method_toc(float_address); + int hi = (offset + (1<<15))>>16; + int lo = offset - hi * (1<<16); + + __ addis(Rtoc, Rtoc, hi); + __ lfs(Rdst, lo, Rtoc); + __ addis(Rtoc, Rtoc, -hi); + %} + ins_pipe(pipe_class_memory); +%} + +// Adlc adds toc node MachConstantTableBase. +instruct loadConF_Ex(regF dst, immF src) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST); + + // See loadConP. + ins_cannot_rematerialize(true); + + format %{ "LFS $dst, offset, $constanttablebase \t// load $src from table, postalloc expanded" %} + postalloc_expand( postalloc_expand_load_float_constant(dst, src, constanttablebase) ); +%} + +// Expand node for constant pool load: small offset. +instruct loadConD(regD dst, immD src, iRegLdst toc) %{ + effect(DEF dst, USE src, USE toc); + ins_cost(MEMORY_REF_COST); + + ins_num_consts(1); + + format %{ "LFD $dst, offset, $toc \t// load double $src from TOC" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_lfd); + int offset = __ offset_to_method_toc(__ double_constant($src$$constant)); + __ lfd($dst$$FloatRegister, offset, $toc$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +// Expand node for constant pool load: large offset. +instruct loadConDComp(regD dst, immD src, iRegLdst toc) %{ + effect(DEF dst, USE src, USE toc); + ins_cost(MEMORY_REF_COST); + + ins_num_consts(1); + + format %{ "ADDIS $toc, $toc, offset_hi\n\t" + "LFD $dst, offset_lo, $toc \t// load double $src from TOC (hi/lo)\n\t" + "ADDIS $toc, $toc, -offset_hi" %} + size(12); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + FloatRegister Rdst = $dst$$FloatRegister; + Register Rtoc = $toc$$Register; + address float_address = __ double_constant($src$$constant); + int offset = __ offset_to_method_toc(float_address); + int hi = (offset + (1<<15))>>16; + int lo = offset - hi * (1<<16); + + __ addis(Rtoc, Rtoc, hi); + __ lfd(Rdst, lo, Rtoc); + __ addis(Rtoc, Rtoc, -hi); + %} + ins_pipe(pipe_class_memory); +%} + +// Adlc adds toc node MachConstantTableBase. +instruct loadConD_Ex(regD dst, immD src) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST); + + // See loadConP. + ins_cannot_rematerialize(true); + + format %{ "ConD $dst, offset, $constanttablebase \t// load $src from table, postalloc expanded" %} + postalloc_expand( postalloc_expand_load_double_constant(dst, src, constanttablebase) ); +%} + +// Prefetch instructions. +// Must be safe to execute with invalid address (cannot fault). + +instruct prefetchr(indirectMemory mem, iRegLsrc src) %{ + match(PrefetchRead (AddP mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "PREFETCH $mem, 0, $src \t// Prefetch read-many" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_dcbt); + __ dcbt($src$$Register, $mem$$base$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +instruct prefetchr_no_offset(indirectMemory mem) %{ + match(PrefetchRead mem); + ins_cost(MEMORY_REF_COST); + + format %{ "PREFETCH $mem" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_dcbt); + __ dcbt($mem$$base$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +instruct prefetchw(indirectMemory mem, iRegLsrc src) %{ + match(PrefetchWrite (AddP mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many (and read)" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst); + __ dcbtst($src$$Register, $mem$$base$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +instruct prefetchw_no_offset(indirectMemory mem) %{ + match(PrefetchWrite mem); + ins_cost(MEMORY_REF_COST); + + format %{ "PREFETCH $mem" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst); + __ dcbtst($mem$$base$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +// Special prefetch versions which use the dcbz instruction. +instruct prefetch_alloc_zero(indirectMemory mem, iRegLsrc src) %{ + match(PrefetchAllocation (AddP mem src)); + predicate(AllocatePrefetchStyle == 3); + ins_cost(MEMORY_REF_COST); + + format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many with zero" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst); + __ dcbz($src$$Register, $mem$$base$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +instruct prefetch_alloc_zero_no_offset(indirectMemory mem) %{ + match(PrefetchAllocation mem); + predicate(AllocatePrefetchStyle == 3); + ins_cost(MEMORY_REF_COST); + + format %{ "PREFETCH $mem, 2 \t// Prefetch write-many with zero" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst); + __ dcbz($mem$$base$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +instruct prefetch_alloc(indirectMemory mem, iRegLsrc src) %{ + match(PrefetchAllocation (AddP mem src)); + predicate(AllocatePrefetchStyle != 3); + ins_cost(MEMORY_REF_COST); + + format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst); + __ dcbtst($src$$Register, $mem$$base$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +instruct prefetch_alloc_no_offset(indirectMemory mem) %{ + match(PrefetchAllocation mem); + predicate(AllocatePrefetchStyle != 3); + ins_cost(MEMORY_REF_COST); + + format %{ "PREFETCH $mem, 2 \t// Prefetch write-many" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_dcbtst); + __ dcbtst($mem$$base$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +//----------Store Instructions------------------------------------------------- + +// Store Byte +instruct storeB(memory mem, iRegIsrc src) %{ + match(Set mem (StoreB mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STB $src, $mem \t// byte" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_stb); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + __ stb($src$$Register, Idisp, $mem$$base$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +// Store Char/Short +instruct storeC(memory mem, iRegIsrc src) %{ + match(Set mem (StoreC mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STH $src, $mem \t// short" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_sth); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + __ sth($src$$Register, Idisp, $mem$$base$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +// Store Integer +instruct storeI(memory mem, iRegIsrc src) %{ + match(Set mem (StoreI mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STW $src, $mem" %} + size(4); + ins_encode( enc_stw(src, mem) ); + ins_pipe(pipe_class_memory); +%} + +// ConvL2I + StoreI. +instruct storeI_convL2I(memory mem, iRegLsrc src) %{ + match(Set mem (StoreI mem (ConvL2I src))); + ins_cost(MEMORY_REF_COST); + + format %{ "STW l2i($src), $mem" %} + size(4); + ins_encode( enc_stw(src, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Store Long +instruct storeL(memoryAlg4 mem, iRegLsrc src) %{ + match(Set mem (StoreL mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STD $src, $mem \t// long" %} + size(4); + ins_encode( enc_std(src, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Store super word nodes. + +// Store Aligned Packed Byte long register to memory +instruct storeA8B(memoryAlg4 mem, iRegLsrc src) %{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STD $mem, $src \t// packed8B" %} + size(4); + ins_encode( enc_std(src, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Store Compressed Oop +instruct storeN(memory dst, iRegN_P2N src) %{ + match(Set dst (StoreN dst src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STW $src, $dst \t// compressed oop" %} + size(4); + ins_encode( enc_stw(src, dst) ); + ins_pipe(pipe_class_memory); +%} + +// Store Compressed KLass +instruct storeNKlass(memory dst, iRegN_P2N src) %{ + match(Set dst (StoreNKlass dst src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STW $src, $dst \t// compressed klass" %} + size(4); + ins_encode( enc_stw(src, dst) ); + ins_pipe(pipe_class_memory); +%} + +// Store Pointer +instruct storeP(memoryAlg4 dst, iRegPsrc src) %{ + match(Set dst (StoreP dst src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STD $src, $dst \t// ptr" %} + size(4); + ins_encode( enc_std(src, dst) ); + ins_pipe(pipe_class_memory); +%} + +// Store Float +instruct storeF(memory mem, regF src) %{ + match(Set mem (StoreF mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STFS $src, $mem" %} + size(4); + ins_encode( enc_stfs(src, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Store Double +instruct storeD(memory mem, regD src) %{ + match(Set mem (StoreD mem src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STFD $src, $mem" %} + size(4); + ins_encode( enc_stfd(src, mem) ); + ins_pipe(pipe_class_memory); +%} + +//----------Store Instructions With Zeros-------------------------------------- + +// Card-mark for CMS garbage collection. +// This cardmark does an optimization so that it must not always +// do a releasing store. For this, it gets the address of +// CMSCollectorCardTableModRefBSExt::_requires_release as input. +// (Using releaseFieldAddr in the match rule is a hack.) +instruct storeCM_CMS(memory mem, iRegLdst releaseFieldAddr) %{ + match(Set mem (StoreCM mem releaseFieldAddr)); + predicate(false); + ins_cost(MEMORY_REF_COST); + + // See loadConP. + ins_cannot_rematerialize(true); + + format %{ "STB #0, $mem \t// CMS card-mark byte (must be 0!), checking requires_release in [$releaseFieldAddr]" %} + ins_encode( enc_cms_card_mark(mem, releaseFieldAddr) ); + ins_pipe(pipe_class_memory); +%} + +// Card-mark for CMS garbage collection. +// This cardmark does an optimization so that it must not always +// do a releasing store. For this, it needs the constant address of +// CMSCollectorCardTableModRefBSExt::_requires_release. +// This constant address is split off here by expand so we can use +// adlc / matcher functionality to load it from the constant section. +instruct storeCM_CMS_ExEx(memory mem, immI_0 zero) %{ + match(Set mem (StoreCM mem zero)); + predicate(UseConcMarkSweepGC); + + expand %{ + immL baseImm %{ 0 /* TODO: PPC port (jlong)CMSCollectorCardTableModRefBSExt::requires_release_address() */ %} + iRegLdst releaseFieldAddress; + loadConL_Ex(releaseFieldAddress, baseImm); + storeCM_CMS(mem, releaseFieldAddress); + %} +%} + +instruct storeCM_G1(memory mem, immI_0 zero) %{ + match(Set mem (StoreCM mem zero)); + predicate(UseG1GC); + ins_cost(MEMORY_REF_COST); + + ins_cannot_rematerialize(true); + + format %{ "STB #0, $mem \t// CMS card-mark byte store (G1)" %} + size(8); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ li(R0, 0); + //__ release(); // G1: oops are allowed to get visible after dirty marking + guarantee($mem$$base$$Register != R1_SP, "use frame_slots_bias"); + __ stb(R0, $mem$$disp, $mem$$base$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +// Convert oop pointer into compressed form. + +// Nodes for postalloc expand. + +// Shift node for expand. +instruct encodeP_shift(iRegNdst dst, iRegNsrc src) %{ + // The match rule is needed to make it a 'MachTypeNode'! + match(Set dst (EncodeP src)); + predicate(false); + + format %{ "SRDI $dst, $src, 3 \t// encode" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); + __ srdi($dst$$Register, $src$$Register, Universe::narrow_oop_shift() & 0x3f); + %} + ins_pipe(pipe_class_default); +%} + +// Add node for expand. +instruct encodeP_sub(iRegPdst dst, iRegPdst src) %{ + // The match rule is needed to make it a 'MachTypeNode'! + match(Set dst (EncodeP src)); + predicate(false); + + format %{ "SUB $dst, $src, oop_base \t// encode" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_subf); + __ subf($dst$$Register, R30, $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Conditional sub base. +instruct cond_sub_base(iRegNdst dst, flagsReg crx, iRegPsrc src1) %{ + // The match rule is needed to make it a 'MachTypeNode'! + match(Set dst (EncodeP (Binary crx src1))); + predicate(false); + + ins_variable_size_depending_on_alignment(true); + + format %{ "BEQ $crx, done\n\t" + "SUB $dst, $src1, R30 \t// encode: subtract base if != NULL\n" + "done:" %} + size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cmove); + Label done; + __ beq($crx$$CondRegister, done); + __ subf($dst$$Register, R30, $src1$$Register); + // TODO PPC port __ endgroup_if_needed(_size == 12); + __ bind(done); + %} + ins_pipe(pipe_class_default); +%} + +// Power 7 can use isel instruction +instruct cond_set_0_oop(iRegNdst dst, flagsReg crx, iRegPsrc src1) %{ + // The match rule is needed to make it a 'MachTypeNode'! + match(Set dst (EncodeP (Binary crx src1))); + predicate(false); + + format %{ "CMOVE $dst, $crx eq, 0, $src1 \t// encode: preserve 0" %} + size(4); + ins_encode %{ + // This is a Power7 instruction for which no machine description exists. + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// base != 0 +// 32G aligned narrow oop base. +instruct encodeP_32GAligned(iRegNdst dst, iRegPsrc src) %{ + match(Set dst (EncodeP src)); + predicate(false /* TODO: PPC port Universe::narrow_oop_base_disjoint()*/); + + format %{ "EXTRDI $dst, $src, #32, #3 \t// encode with 32G aligned base" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); + __ rldicl($dst$$Register, $src$$Register, 64-Universe::narrow_oop_shift(), 32); + %} + ins_pipe(pipe_class_default); +%} + +// shift != 0, base != 0 +instruct encodeP_Ex(iRegNdst dst, flagsReg crx, iRegPsrc src) %{ + match(Set dst (EncodeP src)); + effect(TEMP crx); + predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull && + Universe::narrow_oop_shift() != 0 && + true /* TODO: PPC port Universe::narrow_oop_base_overlaps()*/); + + format %{ "EncodeP $dst, $crx, $src \t// postalloc expanded" %} + postalloc_expand( postalloc_expand_encode_oop(dst, src, crx)); +%} + +// shift != 0, base != 0 +instruct encodeP_not_null_Ex(iRegNdst dst, iRegPsrc src) %{ + match(Set dst (EncodeP src)); + predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull && + Universe::narrow_oop_shift() != 0 && + true /* TODO: PPC port Universe::narrow_oop_base_overlaps()*/); + + format %{ "EncodeP $dst, $src\t// $src != Null, postalloc expanded" %} + postalloc_expand( postalloc_expand_encode_oop_not_null(dst, src) ); +%} + +// shift != 0, base == 0 +// TODO: This is the same as encodeP_shift. Merge! +instruct encodeP_not_null_base_null(iRegNdst dst, iRegPsrc src) %{ + match(Set dst (EncodeP src)); + predicate(Universe::narrow_oop_shift() != 0 && + Universe::narrow_oop_base() ==0); + + format %{ "SRDI $dst, $src, #3 \t// encodeP, $src != NULL" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); + __ srdi($dst$$Register, $src$$Register, Universe::narrow_oop_shift() & 0x3f); + %} + ins_pipe(pipe_class_default); +%} + +// Compressed OOPs with narrow_oop_shift == 0. +// shift == 0, base == 0 +instruct encodeP_narrow_oop_shift_0(iRegNdst dst, iRegPsrc src) %{ + match(Set dst (EncodeP src)); + predicate(Universe::narrow_oop_shift() == 0); + + format %{ "MR $dst, $src \t// Ptr->Narrow" %} + // variable size, 0 or 4. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_or); + __ mr_if_needed($dst$$Register, $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Decode nodes. + +// Shift node for expand. +instruct decodeN_shift(iRegPdst dst, iRegPsrc src) %{ + // The match rule is needed to make it a 'MachTypeNode'! + match(Set dst (DecodeN src)); + predicate(false); + + format %{ "SLDI $dst, $src, #3 \t// DecodeN" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldicr); + __ sldi($dst$$Register, $src$$Register, Universe::narrow_oop_shift()); + %} + ins_pipe(pipe_class_default); +%} + +// Add node for expand. +instruct decodeN_add(iRegPdst dst, iRegPdst src) %{ + // The match rule is needed to make it a 'MachTypeNode'! + match(Set dst (DecodeN src)); + predicate(false); + + format %{ "ADD $dst, $src, R30 \t// DecodeN, add oop base" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_add); + __ add($dst$$Register, $src$$Register, R30); + %} + ins_pipe(pipe_class_default); +%} + +// conditianal add base for expand +instruct cond_add_base(iRegPdst dst, flagsReg crx, iRegPsrc src1) %{ + // The match rule is needed to make it a 'MachTypeNode'! + // NOTICE that the rule is nonsense - we just have to make sure that: + // - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp) + // - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC. + match(Set dst (DecodeN (Binary crx src1))); + predicate(false); + + ins_variable_size_depending_on_alignment(true); + + format %{ "BEQ $crx, done\n\t" + "ADD $dst, $src1, R30 \t// DecodeN: add oop base if $src1 != NULL\n" + "done:" %} + size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling()) */? 12 : 8); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cmove); + Label done; + __ beq($crx$$CondRegister, done); + __ add($dst$$Register, $src1$$Register, R30); + // TODO PPC port __ endgroup_if_needed(_size == 12); + __ bind(done); + %} + ins_pipe(pipe_class_default); +%} + +instruct cond_set_0_ptr(iRegPdst dst, flagsReg crx, iRegPsrc src1) %{ + // The match rule is needed to make it a 'MachTypeNode'! + // NOTICE that the rule is nonsense - we just have to make sure that: + // - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp) + // - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC. + match(Set dst (DecodeN (Binary crx src1))); + predicate(false); + + format %{ "CMOVE $dst, $crx eq, 0, $src1 \t// decode: preserve 0" %} + size(4); + ins_encode %{ + // This is a Power7 instruction for which no machine description exists. + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// shift != 0, base != 0 +instruct decodeN_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{ + match(Set dst (DecodeN src)); + predicate((n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull && + n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant) && + Universe::narrow_oop_shift() != 0 && + Universe::narrow_oop_base() != 0); + effect(TEMP crx); + + format %{ "DecodeN $dst, $src \t// Kills $crx, postalloc expanded" %} + postalloc_expand( postalloc_expand_decode_oop(dst, src, crx) ); +%} + +// shift != 0, base == 0 +instruct decodeN_nullBase(iRegPdst dst, iRegNsrc src) %{ + match(Set dst (DecodeN src)); + predicate(Universe::narrow_oop_shift() != 0 && + Universe::narrow_oop_base() == 0); + + format %{ "SLDI $dst, $src, #3 \t// DecodeN (zerobased)" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldicr); + __ sldi($dst$$Register, $src$$Register, Universe::narrow_oop_shift()); + %} + ins_pipe(pipe_class_default); +%} + +// src != 0, shift != 0, base != 0 +instruct decodeN_notNull_addBase_Ex(iRegPdst dst, iRegNsrc src) %{ + match(Set dst (DecodeN src)); + predicate((n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull || + n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) && + Universe::narrow_oop_shift() != 0 && + Universe::narrow_oop_base() != 0); + + format %{ "DecodeN $dst, $src \t// $src != NULL, postalloc expanded" %} + postalloc_expand( postalloc_expand_decode_oop_not_null(dst, src)); +%} + +// Compressed OOPs with narrow_oop_shift == 0. +instruct decodeN_unscaled(iRegPdst dst, iRegNsrc src) %{ + match(Set dst (DecodeN src)); + predicate(Universe::narrow_oop_shift() == 0); + ins_cost(DEFAULT_COST); + + format %{ "MR $dst, $src \t// DecodeN (unscaled)" %} + // variable size, 0 or 4. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_or); + __ mr_if_needed($dst$$Register, $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Convert compressed oop into int for vectors alignment masking. +instruct decodeN2I_unscaled(iRegIdst dst, iRegNsrc src) %{ + match(Set dst (ConvL2I (CastP2X (DecodeN src)))); + predicate(Universe::narrow_oop_shift() == 0); + ins_cost(DEFAULT_COST); + + format %{ "MR $dst, $src \t// (int)DecodeN (unscaled)" %} + // variable size, 0 or 4. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_or); + __ mr_if_needed($dst$$Register, $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Convert klass pointer into compressed form. + +// Nodes for postalloc expand. + +// Shift node for expand. +instruct encodePKlass_shift(iRegNdst dst, iRegNsrc src) %{ + // The match rule is needed to make it a 'MachTypeNode'! + match(Set dst (EncodePKlass src)); + predicate(false); + + format %{ "SRDI $dst, $src, 3 \t// encode" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); + __ srdi($dst$$Register, $src$$Register, Universe::narrow_klass_shift()); + %} + ins_pipe(pipe_class_default); +%} + +// Add node for expand. +instruct encodePKlass_sub_base(iRegPdst dst, iRegLsrc base, iRegPdst src) %{ + // The match rule is needed to make it a 'MachTypeNode'! + match(Set dst (EncodePKlass (Binary base src))); + predicate(false); + + format %{ "SUB $dst, $base, $src \t// encode" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_subf); + __ subf($dst$$Register, $base$$Register, $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// base != 0 +// 32G aligned narrow oop base. +instruct encodePKlass_32GAligned(iRegNdst dst, iRegPsrc src) %{ + match(Set dst (EncodePKlass src)); + predicate(false /* TODO: PPC port Universe::narrow_klass_base_disjoint()*/); + + format %{ "EXTRDI $dst, $src, #32, #3 \t// encode with 32G aligned base" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); + __ rldicl($dst$$Register, $src$$Register, 64-Universe::narrow_oop_shift(), 32); + %} + ins_pipe(pipe_class_default); +%} + +// shift != 0, base != 0 +instruct encodePKlass_not_null_Ex(iRegNdst dst, iRegLsrc base, iRegPsrc src) %{ + match(Set dst (EncodePKlass (Binary base src))); + predicate(false); + + format %{ "EncodePKlass $dst, $src\t// $src != Null, postalloc expanded" %} + postalloc_expand %{ + encodePKlass_sub_baseNode *n1 = new (C) encodePKlass_sub_baseNode(); + n1->add_req(n_region, n_base, n_src); + n1->_opnds[0] = op_dst; + n1->_opnds[1] = op_base; + n1->_opnds[2] = op_src; + n1->_bottom_type = _bottom_type; + + encodePKlass_shiftNode *n2 = new (C) encodePKlass_shiftNode(); + n2->add_req(n_region, n1); + n2->_opnds[0] = op_dst; + n2->_opnds[1] = op_dst; + n2->_bottom_type = _bottom_type; + ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + + nodes->push(n1); + nodes->push(n2); + %} +%} + +// shift != 0, base != 0 +instruct encodePKlass_not_null_ExEx(iRegNdst dst, iRegPsrc src) %{ + match(Set dst (EncodePKlass src)); + //predicate(Universe::narrow_klass_shift() != 0 && + // true /* TODO: PPC port Universe::narrow_klass_base_overlaps()*/); + + //format %{ "EncodePKlass $dst, $src\t// $src != Null, postalloc expanded" %} + ins_cost(DEFAULT_COST*2); // Don't count constant. + expand %{ + immL baseImm %{ (jlong)(intptr_t)Universe::narrow_klass_base() %} + iRegLdst base; + loadConL_Ex(base, baseImm); + encodePKlass_not_null_Ex(dst, base, src); + %} +%} + +// Decode nodes. + +// Shift node for expand. +instruct decodeNKlass_shift(iRegPdst dst, iRegPsrc src) %{ + // The match rule is needed to make it a 'MachTypeNode'! + match(Set dst (DecodeNKlass src)); + predicate(false); + + format %{ "SLDI $dst, $src, #3 \t// DecodeNKlass" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldicr); + __ sldi($dst$$Register, $src$$Register, Universe::narrow_klass_shift()); + %} + ins_pipe(pipe_class_default); +%} + +// Add node for expand. + +instruct decodeNKlass_add_base(iRegPdst dst, iRegLsrc base, iRegPdst src) %{ + // The match rule is needed to make it a 'MachTypeNode'! + match(Set dst (DecodeNKlass (Binary base src))); + predicate(false); + + format %{ "ADD $dst, $base, $src \t// DecodeNKlass, add klass base" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_add); + __ add($dst$$Register, $base$$Register, $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// src != 0, shift != 0, base != 0 +instruct decodeNKlass_notNull_addBase_Ex(iRegPdst dst, iRegLsrc base, iRegNsrc src) %{ + match(Set dst (DecodeNKlass (Binary base src))); + //effect(kill src); // We need a register for the immediate result after shifting. + predicate(false); + + format %{ "DecodeNKlass $dst = $base + ($src << 3) \t// $src != NULL, postalloc expanded" %} + postalloc_expand %{ + decodeNKlass_add_baseNode *n1 = new (C) decodeNKlass_add_baseNode(); + n1->add_req(n_region, n_base, n_src); + n1->_opnds[0] = op_dst; + n1->_opnds[1] = op_base; + n1->_opnds[2] = op_src; + n1->_bottom_type = _bottom_type; + + decodeNKlass_shiftNode *n2 = new (C) decodeNKlass_shiftNode(); + n2->add_req(n_region, n2); + n2->_opnds[0] = op_dst; + n2->_opnds[1] = op_dst; + n2->_bottom_type = _bottom_type; + + ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); + + nodes->push(n1); + nodes->push(n2); + %} +%} + +// src != 0, shift != 0, base != 0 +instruct decodeNKlass_notNull_addBase_ExEx(iRegPdst dst, iRegNsrc src) %{ + match(Set dst (DecodeNKlass src)); + // predicate(Universe::narrow_klass_shift() != 0 && + // Universe::narrow_klass_base() != 0); + + //format %{ "DecodeNKlass $dst, $src \t// $src != NULL, expanded" %} + + ins_cost(DEFAULT_COST*2); // Don't count constant. + expand %{ + // We add first, then we shift. Like this, we can get along with one register less. + // But we have to load the base pre-shifted. + immL baseImm %{ (jlong)((intptr_t)Universe::narrow_klass_base() >> Universe::narrow_klass_shift()) %} + iRegLdst base; + loadConL_Ex(base, baseImm); + decodeNKlass_notNull_addBase_Ex(dst, base, src); + %} +%} + +//----------MemBar Instructions----------------------------------------------- +// Memory barrier flavors + +instruct membar_acquire() %{ + match(LoadFence); + ins_cost(4*MEMORY_REF_COST); + + format %{ "MEMBAR-acquire" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_lwsync); + __ acquire(); + %} + ins_pipe(pipe_class_default); +%} + +instruct unnecessary_membar_acquire() %{ + match(MemBarAcquire); + ins_cost(0); + + format %{ " -- \t// redundant MEMBAR-acquire - empty" %} + size(0); + ins_encode( /*empty*/ ); + ins_pipe(pipe_class_default); +%} + +instruct membar_acquire_lock() %{ + match(MemBarAcquireLock); + ins_cost(0); + + format %{ " -- \t// redundant MEMBAR-acquire - empty (acquire as part of CAS in prior FastLock)" %} + size(0); + ins_encode( /*empty*/ ); + ins_pipe(pipe_class_default); +%} + +instruct membar_release() %{ + match(MemBarRelease); + match(StoreFence); + ins_cost(4*MEMORY_REF_COST); + + format %{ "MEMBAR-release" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_lwsync); + __ release(); + %} + ins_pipe(pipe_class_default); +%} + +instruct membar_storestore() %{ + match(MemBarStoreStore); + ins_cost(4*MEMORY_REF_COST); + + format %{ "MEMBAR-store-store" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_lwsync); + __ membar(Assembler::StoreStore); + %} + ins_pipe(pipe_class_default); +%} + +instruct membar_release_lock() %{ + match(MemBarReleaseLock); + ins_cost(0); + + format %{ " -- \t// redundant MEMBAR-release - empty (release in FastUnlock)" %} + size(0); + ins_encode( /*empty*/ ); + ins_pipe(pipe_class_default); +%} + +instruct membar_volatile() %{ + match(MemBarVolatile); + ins_cost(4*MEMORY_REF_COST); + + format %{ "MEMBAR-volatile" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_sync); + __ fence(); + %} + ins_pipe(pipe_class_default); +%} + +// This optimization is wrong on PPC. The following pattern is not supported: +// MemBarVolatile +// ^ ^ +// | | +// CtrlProj MemProj +// ^ ^ +// | | +// | Load +// | +// MemBarVolatile +// +// The first MemBarVolatile could get optimized out! According to +// Vladimir, this pattern can not occur on Oracle platforms. +// However, it does occur on PPC64 (because of membars in +// inline_unsafe_load_store). +// +// Add this node again if we found a good solution for inline_unsafe_load_store(). +// Don't forget to look at the implementation of post_store_load_barrier again, +// we did other fixes in that method. +//instruct unnecessary_membar_volatile() %{ +// match(MemBarVolatile); +// predicate(Matcher::post_store_load_barrier(n)); +// ins_cost(0); +// +// format %{ " -- \t// redundant MEMBAR-volatile - empty" %} +// size(0); +// ins_encode( /*empty*/ ); +// ins_pipe(pipe_class_default); +//%} + +instruct membar_CPUOrder() %{ + match(MemBarCPUOrder); + ins_cost(0); + + format %{ " -- \t// MEMBAR-CPUOrder - empty: PPC64 processors are self-consistent." %} + size(0); + ins_encode( /*empty*/ ); + ins_pipe(pipe_class_default); +%} + +//----------Conditional Move--------------------------------------------------- + +// Cmove using isel. +instruct cmovI_reg_isel(cmpOp cmp, flagsReg crx, iRegIdst dst, iRegIsrc src) %{ + match(Set dst (CMoveI (Binary cmp crx) (Binary dst src))); + predicate(VM_Version::has_isel()); + ins_cost(DEFAULT_COST); + + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} + size(4); + ins_encode %{ + // This is a Power7 instruction for which no machine description + // exists. Anyways, the scheduler should be off on Power7. + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + int cc = $cmp$$cmpcode; + __ isel($dst$$Register, $crx$$CondRegister, + (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct cmovI_reg(cmpOp cmp, flagsReg crx, iRegIdst dst, iRegIsrc src) %{ + match(Set dst (CMoveI (Binary cmp crx) (Binary dst src))); + predicate(!VM_Version::has_isel()); + ins_cost(DEFAULT_COST+BRANCH_COST); + + ins_variable_size_depending_on_alignment(true); + + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} + // Worst case is branch + move + stop, no stop without scheduler + size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8); + ins_encode( enc_cmove_reg(dst, crx, src, cmp) ); + ins_pipe(pipe_class_default); +%} + +instruct cmovI_imm(cmpOp cmp, flagsReg crx, iRegIdst dst, immI16 src) %{ + match(Set dst (CMoveI (Binary cmp crx) (Binary dst src))); + ins_cost(DEFAULT_COST+BRANCH_COST); + + ins_variable_size_depending_on_alignment(true); + + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} + // Worst case is branch + move + stop, no stop without scheduler + size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8); + ins_encode( enc_cmove_imm(dst, crx, src, cmp) ); + ins_pipe(pipe_class_default); +%} + +// Cmove using isel. +instruct cmovL_reg_isel(cmpOp cmp, flagsReg crx, iRegLdst dst, iRegLsrc src) %{ + match(Set dst (CMoveL (Binary cmp crx) (Binary dst src))); + predicate(VM_Version::has_isel()); + ins_cost(DEFAULT_COST); + + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} + size(4); + ins_encode %{ + // This is a Power7 instruction for which no machine description + // exists. Anyways, the scheduler should be off on Power7. + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + int cc = $cmp$$cmpcode; + __ isel($dst$$Register, $crx$$CondRegister, + (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct cmovL_reg(cmpOp cmp, flagsReg crx, iRegLdst dst, iRegLsrc src) %{ + match(Set dst (CMoveL (Binary cmp crx) (Binary dst src))); + predicate(!VM_Version::has_isel()); + ins_cost(DEFAULT_COST+BRANCH_COST); + + ins_variable_size_depending_on_alignment(true); + + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} + // Worst case is branch + move + stop, no stop without scheduler. + size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8); + ins_encode( enc_cmove_reg(dst, crx, src, cmp) ); + ins_pipe(pipe_class_default); +%} + +instruct cmovL_imm(cmpOp cmp, flagsReg crx, iRegLdst dst, immL16 src) %{ + match(Set dst (CMoveL (Binary cmp crx) (Binary dst src))); + ins_cost(DEFAULT_COST+BRANCH_COST); + + ins_variable_size_depending_on_alignment(true); + + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} + // Worst case is branch + move + stop, no stop without scheduler. + size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8); + ins_encode( enc_cmove_imm(dst, crx, src, cmp) ); + ins_pipe(pipe_class_default); +%} + +// Cmove using isel. +instruct cmovN_reg_isel(cmpOp cmp, flagsReg crx, iRegNdst dst, iRegNsrc src) %{ + match(Set dst (CMoveN (Binary cmp crx) (Binary dst src))); + predicate(VM_Version::has_isel()); + ins_cost(DEFAULT_COST); + + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} + size(4); + ins_encode %{ + // This is a Power7 instruction for which no machine description + // exists. Anyways, the scheduler should be off on Power7. + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + int cc = $cmp$$cmpcode; + __ isel($dst$$Register, $crx$$CondRegister, + (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Conditional move for RegN. Only cmov(reg, reg). +instruct cmovN_reg(cmpOp cmp, flagsReg crx, iRegNdst dst, iRegNsrc src) %{ + match(Set dst (CMoveN (Binary cmp crx) (Binary dst src))); + predicate(!VM_Version::has_isel()); + ins_cost(DEFAULT_COST+BRANCH_COST); + + ins_variable_size_depending_on_alignment(true); + + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} + // Worst case is branch + move + stop, no stop without scheduler. + size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8); + ins_encode( enc_cmove_reg(dst, crx, src, cmp) ); + ins_pipe(pipe_class_default); +%} + +instruct cmovN_imm(cmpOp cmp, flagsReg crx, iRegNdst dst, immN_0 src) %{ + match(Set dst (CMoveN (Binary cmp crx) (Binary dst src))); + ins_cost(DEFAULT_COST+BRANCH_COST); + + ins_variable_size_depending_on_alignment(true); + + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} + // Worst case is branch + move + stop, no stop without scheduler. + size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8); + ins_encode( enc_cmove_imm(dst, crx, src, cmp) ); + ins_pipe(pipe_class_default); +%} + +// Cmove using isel. +instruct cmovP_reg_isel(cmpOp cmp, flagsReg crx, iRegPdst dst, iRegPsrc src) %{ + match(Set dst (CMoveP (Binary cmp crx) (Binary dst src))); + predicate(VM_Version::has_isel()); + ins_cost(DEFAULT_COST); + + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} + size(4); + ins_encode %{ + // This is a Power7 instruction for which no machine description + // exists. Anyways, the scheduler should be off on Power7. + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + int cc = $cmp$$cmpcode; + __ isel($dst$$Register, $crx$$CondRegister, + (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct cmovP_reg(cmpOp cmp, flagsReg crx, iRegPdst dst, iRegP_N2P src) %{ + match(Set dst (CMoveP (Binary cmp crx) (Binary dst src))); + predicate(!VM_Version::has_isel()); + ins_cost(DEFAULT_COST+BRANCH_COST); + + ins_variable_size_depending_on_alignment(true); + + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} + // Worst case is branch + move + stop, no stop without scheduler. + size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8); + ins_encode( enc_cmove_reg(dst, crx, src, cmp) ); + ins_pipe(pipe_class_default); +%} + +instruct cmovP_imm(cmpOp cmp, flagsReg crx, iRegPdst dst, immP_0 src) %{ + match(Set dst (CMoveP (Binary cmp crx) (Binary dst src))); + ins_cost(DEFAULT_COST+BRANCH_COST); + + ins_variable_size_depending_on_alignment(true); + + format %{ "CMOVE $cmp, $crx, $dst, $src\n\t" %} + // Worst case is branch + move + stop, no stop without scheduler. + size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8); + ins_encode( enc_cmove_imm(dst, crx, src, cmp) ); + ins_pipe(pipe_class_default); +%} + +instruct cmovF_reg(cmpOp cmp, flagsReg crx, regF dst, regF src) %{ + match(Set dst (CMoveF (Binary cmp crx) (Binary dst src))); + ins_cost(DEFAULT_COST+BRANCH_COST); + + ins_variable_size_depending_on_alignment(true); + + format %{ "CMOVEF $cmp, $crx, $dst, $src\n\t" %} + // Worst case is branch + move + stop, no stop without scheduler. + size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cmovef); + Label done; + assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding"); + // Branch if not (cmp crx). + __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done); + __ fmr($dst$$FloatRegister, $src$$FloatRegister); + // TODO PPC port __ endgroup_if_needed(_size == 12); + __ bind(done); + %} + ins_pipe(pipe_class_default); +%} + +instruct cmovD_reg(cmpOp cmp, flagsReg crx, regD dst, regD src) %{ + match(Set dst (CMoveD (Binary cmp crx) (Binary dst src))); + ins_cost(DEFAULT_COST+BRANCH_COST); + + ins_variable_size_depending_on_alignment(true); + + format %{ "CMOVEF $cmp, $crx, $dst, $src\n\t" %} + // Worst case is branch + move + stop, no stop without scheduler. + size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cmovef); + Label done; + assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding"); + // Branch if not (cmp crx). + __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done); + __ fmr($dst$$FloatRegister, $src$$FloatRegister); + // TODO PPC port __ endgroup_if_needed(_size == 12); + __ bind(done); + %} + ins_pipe(pipe_class_default); +%} + +//----------Conditional_store-------------------------------------------------- +// Conditional-store of the updated heap-top. +// Used during allocation of the shared heap. +// Sets flags (EQ) on success. Implemented with a CASA on Sparc. + +// As compareAndSwapL, but return flag register instead of boolean value in +// int register. +// Used by sun/misc/AtomicLongCSImpl.java. +// Mem_ptr must be a memory operand, else this node does not get +// Flag_needs_anti_dependence_check set by adlc. If this is not set this node +// can be rematerialized which leads to errors. +instruct storeLConditional_regP_regL_regL(flagsReg crx, indirect mem_ptr, iRegLsrc oldVal, iRegLsrc newVal) %{ + match(Set crx (StoreLConditional mem_ptr (Binary oldVal newVal))); + format %{ "CMPXCHGD if ($crx = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ cmpxchgd($crx$$CondRegister, R0, $oldVal$$Register, $newVal$$Register, $mem_ptr$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, NULL, true); + %} + ins_pipe(pipe_class_default); +%} + +// As compareAndSwapP, but return flag register instead of boolean value in +// int register. +// This instruction is matched if UseTLAB is off. +// Mem_ptr must be a memory operand, else this node does not get +// Flag_needs_anti_dependence_check set by adlc. If this is not set this node +// can be rematerialized which leads to errors. +instruct storePConditional_regP_regP_regP(flagsReg crx, indirect mem_ptr, iRegPsrc oldVal, iRegPsrc newVal) %{ + match(Set crx (StorePConditional mem_ptr (Binary oldVal newVal))); + format %{ "CMPXCHGD if ($crx = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ cmpxchgd($crx$$CondRegister, R0, $oldVal$$Register, $newVal$$Register, $mem_ptr$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, NULL, true); + %} + ins_pipe(pipe_class_default); +%} + +// Implement LoadPLocked. Must be ordered against changes of the memory location +// by storePConditional. +// Don't know whether this is ever used. +instruct loadPLocked(iRegPdst dst, memory mem) %{ + match(Set dst (LoadPLocked mem)); + ins_cost(MEMORY_REF_COST); + + format %{ "LD $dst, $mem \t// loadPLocked\n\t" + "TWI $dst\n\t" + "ISYNC" %} + size(12); + ins_encode( enc_ld_ac(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +//----------Compare-And-Swap--------------------------------------------------- + +// CompareAndSwap{P,I,L} have more than one output, therefore "CmpI +// (CompareAndSwap ...)" or "If (CmpI (CompareAndSwap ..))" cannot be +// matched. + +instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2) %{ + match(Set res (CompareAndSwapI mem_ptr (Binary src1 src2))); + format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, + MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(), + $res$$Register, true); + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2) %{ + match(Set res (CompareAndSwapN mem_ptr (Binary src1 src2))); + format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, + MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(), + $res$$Register, true); + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2) %{ + match(Set res (CompareAndSwapL mem_ptr (Binary src1 src2))); + format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, + MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(), + $res$$Register, NULL, true); + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2) %{ + match(Set res (CompareAndSwapP mem_ptr (Binary src1 src2))); + format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, + MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(), + $res$$Register, NULL, true); + %} + ins_pipe(pipe_class_default); +%} + +instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{ + match(Set res (GetAndAddI mem_ptr src)); + format %{ "GetAndAddI $res, $mem_ptr, $src" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode( enc_GetAndAddI(res, mem_ptr, src) ); + ins_pipe(pipe_class_default); +%} + +instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{ + match(Set res (GetAndAddL mem_ptr src)); + format %{ "GetAndAddL $res, $mem_ptr, $src" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode( enc_GetAndAddL(res, mem_ptr, src) ); + ins_pipe(pipe_class_default); +%} + +instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{ + match(Set res (GetAndSetI mem_ptr src)); + format %{ "GetAndSetI $res, $mem_ptr, $src" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode( enc_GetAndSetI(res, mem_ptr, src) ); + ins_pipe(pipe_class_default); +%} + +instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{ + match(Set res (GetAndSetL mem_ptr src)); + format %{ "GetAndSetL $res, $mem_ptr, $src" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode( enc_GetAndSetL(res, mem_ptr, src) ); + ins_pipe(pipe_class_default); +%} + +instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src) %{ + match(Set res (GetAndSetP mem_ptr src)); + format %{ "GetAndSetP $res, $mem_ptr, $src" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode( enc_GetAndSetL(res, mem_ptr, src) ); + ins_pipe(pipe_class_default); +%} + +instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src) %{ + match(Set res (GetAndSetN mem_ptr src)); + format %{ "GetAndSetN $res, $mem_ptr, $src" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode( enc_GetAndSetI(res, mem_ptr, src) ); + ins_pipe(pipe_class_default); +%} + +//----------Arithmetic Instructions-------------------------------------------- +// Addition Instructions + +// PPC has no instruction setting overflow of 32-bit integer. +//instruct addExactI_rReg(rarg4RegI dst, rRegI src, flagsReg cr) %{ +// match(AddExactI dst src); +// effect(DEF cr); +// +// format %{ "ADD $dst, $dst, $src \t// addExact int, sets $cr" %} +// ins_encode( enc_add(dst, dst, src) ); +// ins_pipe(pipe_class_default); +//%} + +// Register Addition +instruct addI_reg_reg(iRegIdst dst, iRegIsrc_iRegL2Isrc src1, iRegIsrc_iRegL2Isrc src2) %{ + match(Set dst (AddI src1 src2)); + format %{ "ADD $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_add); + __ add($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Expand does not work with above instruct. (??) +instruct addI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ + // no match-rule + effect(DEF dst, USE src1, USE src2); + format %{ "ADD $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_add); + __ add($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct tree_addI_addI_addI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{ + match(Set dst (AddI (AddI (AddI src1 src2) src3) src4)); + ins_cost(DEFAULT_COST*3); + + expand %{ + // FIXME: we should do this in the ideal world. + iRegIdst tmp1; + iRegIdst tmp2; + addI_reg_reg(tmp1, src1, src2); + addI_reg_reg_2(tmp2, src3, src4); // Adlc complains about addI_reg_reg. + addI_reg_reg(dst, tmp1, tmp2); + %} +%} + +// Immediate Addition +instruct addI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{ + match(Set dst (AddI src1 src2)); + format %{ "ADDI $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addi); + __ addi($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(pipe_class_default); +%} + +// Immediate Addition with 16-bit shifted operand +instruct addI_reg_immhi16(iRegIdst dst, iRegIsrc src1, immIhi16 src2) %{ + match(Set dst (AddI src1 src2)); + format %{ "ADDIS $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addis); + __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16); + %} + ins_pipe(pipe_class_default); +%} + +// Long Addition +instruct addL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ + match(Set dst (AddL src1 src2)); + format %{ "ADD $dst, $src1, $src2 \t// long" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_add); + __ add($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Expand does not work with above instruct. (??) +instruct addL_reg_reg_2(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ + // no match-rule + effect(DEF dst, USE src1, USE src2); + format %{ "ADD $dst, $src1, $src2 \t// long" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_add); + __ add($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct tree_addL_addL_addL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2, iRegLsrc src3, iRegLsrc src4) %{ + match(Set dst (AddL (AddL (AddL src1 src2) src3) src4)); + ins_cost(DEFAULT_COST*3); + + expand %{ + // FIXME: we should do this in the ideal world. + iRegLdst tmp1; + iRegLdst tmp2; + addL_reg_reg(tmp1, src1, src2); + addL_reg_reg_2(tmp2, src3, src4); // Adlc complains about orI_reg_reg. + addL_reg_reg(dst, tmp1, tmp2); + %} +%} + +// AddL + ConvL2I. +instruct addI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{ + match(Set dst (ConvL2I (AddL src1 src2))); + + format %{ "ADD $dst, $src1, $src2 \t// long + l2i" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_add); + __ add($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// No constant pool entries required. +instruct addL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{ + match(Set dst (AddL src1 src2)); + + format %{ "ADDI $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addi); + __ addi($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(pipe_class_default); +%} + +// Long Immediate Addition with 16-bit shifted operand. +// No constant pool entries required. +instruct addL_reg_immhi16(iRegLdst dst, iRegLsrc src1, immL32hi16 src2) %{ + match(Set dst (AddL src1 src2)); + + format %{ "ADDIS $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addis); + __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16); + %} + ins_pipe(pipe_class_default); +%} + +// Pointer Register Addition +instruct addP_reg_reg(iRegPdst dst, iRegP_N2P src1, iRegLsrc src2) %{ + match(Set dst (AddP src1 src2)); + format %{ "ADD $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_add); + __ add($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Pointer Immediate Addition +// No constant pool entries required. +instruct addP_reg_imm16(iRegPdst dst, iRegP_N2P src1, immL16 src2) %{ + match(Set dst (AddP src1 src2)); + + format %{ "ADDI $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addi); + __ addi($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(pipe_class_default); +%} + +// Pointer Immediate Addition with 16-bit shifted operand. +// No constant pool entries required. +instruct addP_reg_immhi16(iRegPdst dst, iRegP_N2P src1, immL32hi16 src2) %{ + match(Set dst (AddP src1 src2)); + + format %{ "ADDIS $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addis); + __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16); + %} + ins_pipe(pipe_class_default); +%} + +//--------------------- +// Subtraction Instructions + +// Register Subtraction +instruct subI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ + match(Set dst (SubI src1 src2)); + format %{ "SUBF $dst, $src2, $src1" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_subf); + __ subf($dst$$Register, $src2$$Register, $src1$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Immediate Subtraction +// The compiler converts "x-c0" into "x+ -c0" (see SubINode::Ideal), +// so this rule seems to be unused. +instruct subI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{ + match(Set dst (SubI src1 src2)); + format %{ "SUBI $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addi); + __ addi($dst$$Register, $src1$$Register, ($src2$$constant) * (-1)); + %} + ins_pipe(pipe_class_default); +%} + +// SubI from constant (using subfic). +instruct subI_imm16_reg(iRegIdst dst, immI16 src1, iRegIsrc src2) %{ + match(Set dst (SubI src1 src2)); + format %{ "SUBI $dst, $src1, $src2" %} + + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_subfic); + __ subfic($dst$$Register, $src2$$Register, $src1$$constant); + %} + ins_pipe(pipe_class_default); +%} + +// Turn the sign-bit of an integer into a 32-bit mask, 0x0...0 for +// positive integers and 0xF...F for negative ones. +instruct signmask32I_regI(iRegIdst dst, iRegIsrc src) %{ + // no match-rule, false predicate + effect(DEF dst, USE src); + predicate(false); + + format %{ "SRAWI $dst, $src, #31" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_srawi); + __ srawi($dst$$Register, $src$$Register, 0x1f); + %} + ins_pipe(pipe_class_default); +%} + +instruct absI_reg_Ex(iRegIdst dst, iRegIsrc src) %{ + match(Set dst (AbsI src)); + ins_cost(DEFAULT_COST*3); + + expand %{ + iRegIdst tmp1; + iRegIdst tmp2; + signmask32I_regI(tmp1, src); + xorI_reg_reg(tmp2, tmp1, src); + subI_reg_reg(dst, tmp2, tmp1); + %} +%} + +instruct negI_regI(iRegIdst dst, immI_0 zero, iRegIsrc src2) %{ + match(Set dst (SubI zero src2)); + format %{ "NEG $dst, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_neg); + __ neg($dst$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Long subtraction +instruct subL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ + match(Set dst (SubL src1 src2)); + format %{ "SUBF $dst, $src2, $src1 \t// long" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_subf); + __ subf($dst$$Register, $src2$$Register, $src1$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// SubL + convL2I. +instruct subI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{ + match(Set dst (ConvL2I (SubL src1 src2))); + + format %{ "SUBF $dst, $src2, $src1 \t// long + l2i" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_subf); + __ subf($dst$$Register, $src2$$Register, $src1$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Immediate Subtraction +// The compiler converts "x-c0" into "x+ -c0" (see SubLNode::Ideal), +// so this rule seems to be unused. +// No constant pool entries required. +instruct subL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{ + match(Set dst (SubL src1 src2)); + + format %{ "SUBI $dst, $src1, $src2 \t// long" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addi); + __ addi($dst$$Register, $src1$$Register, ($src2$$constant) * (-1)); + %} + ins_pipe(pipe_class_default); +%} + +// Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for +// positive longs and 0xF...F for negative ones. +instruct signmask64I_regI(iRegIdst dst, iRegIsrc src) %{ + // no match-rule, false predicate + effect(DEF dst, USE src); + predicate(false); + + format %{ "SRADI $dst, $src, #63" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_sradi); + __ sradi($dst$$Register, $src$$Register, 0x3f); + %} + ins_pipe(pipe_class_default); +%} + +// Long negation +instruct negL_reg_reg(iRegLdst dst, immL_0 zero, iRegLsrc src2) %{ + match(Set dst (SubL zero src2)); + format %{ "NEG $dst, $src2 \t// long" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_neg); + __ neg($dst$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// NegL + ConvL2I. +instruct negI_con0_regL(iRegIdst dst, immL_0 zero, iRegLsrc src2) %{ + match(Set dst (ConvL2I (SubL zero src2))); + + format %{ "NEG $dst, $src2 \t// long + l2i" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_neg); + __ neg($dst$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Multiplication Instructions +// Integer Multiplication + +// Register Multiplication +instruct mulI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ + match(Set dst (MulI src1 src2)); + ins_cost(DEFAULT_COST); + + format %{ "MULLW $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_mullw); + __ mullw($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Immediate Multiplication +instruct mulI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{ + match(Set dst (MulI src1 src2)); + ins_cost(DEFAULT_COST); + + format %{ "MULLI $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_mulli); + __ mulli($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(pipe_class_default); +%} + +instruct mulL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ + match(Set dst (MulL src1 src2)); + ins_cost(DEFAULT_COST); + + format %{ "MULLD $dst $src1, $src2 \t// long" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_mulld); + __ mulld($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Multiply high for optimized long division by constant. +instruct mulHighL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ + match(Set dst (MulHiL src1 src2)); + ins_cost(DEFAULT_COST); + + format %{ "MULHD $dst $src1, $src2 \t// long" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_mulhd); + __ mulhd($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Immediate Multiplication +instruct mulL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{ + match(Set dst (MulL src1 src2)); + ins_cost(DEFAULT_COST); + + format %{ "MULLI $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_mulli); + __ mulli($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(pipe_class_default); +%} + +// Integer Division with Immediate -1: Negate. +instruct divI_reg_immIvalueMinus1(iRegIdst dst, iRegIsrc src1, immI_minus1 src2) %{ + match(Set dst (DivI src1 src2)); + ins_cost(DEFAULT_COST); + + format %{ "NEG $dst, $src1 \t// /-1" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_neg); + __ neg($dst$$Register, $src1$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Integer Division with constant, but not -1. +// We should be able to improve this by checking the type of src2. +// It might well be that src2 is known to be positive. +instruct divI_reg_regnotMinus1(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ + match(Set dst (DivI src1 src2)); + predicate(n->in(2)->find_int_con(-1) != -1); // src2 is a constant, but not -1 + ins_cost(2*DEFAULT_COST); + + format %{ "DIVW $dst, $src1, $src2 \t// /not-1" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_divw); + __ divw($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct cmovI_bne_negI_reg(iRegIdst dst, flagsReg crx, iRegIsrc src1) %{ + effect(USE_DEF dst, USE src1, USE crx); + predicate(false); + + ins_variable_size_depending_on_alignment(true); + + format %{ "CMOVE $dst, neg($src1), $crx" %} + // Worst case is branch + move + stop, no stop without scheduler. + size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cmove); + Label done; + __ bne($crx$$CondRegister, done); + __ neg($dst$$Register, $src1$$Register); + // TODO PPC port __ endgroup_if_needed(_size == 12); + __ bind(done); + %} + ins_pipe(pipe_class_default); +%} + +// Integer Division with Registers not containing constants. +instruct divI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ + match(Set dst (DivI src1 src2)); + ins_cost(10*DEFAULT_COST); + + expand %{ + immI16 imm %{ (int)-1 %} + flagsReg tmp1; + cmpI_reg_imm16(tmp1, src2, imm); // check src2 == -1 + divI_reg_regnotMinus1(dst, src1, src2); // dst = src1 / src2 + cmovI_bne_negI_reg(dst, tmp1, src1); // cmove dst = neg(src1) if src2 == -1 + %} +%} + +// Long Division with Immediate -1: Negate. +instruct divL_reg_immLvalueMinus1(iRegLdst dst, iRegLsrc src1, immL_minus1 src2) %{ + match(Set dst (DivL src1 src2)); + ins_cost(DEFAULT_COST); + + format %{ "NEG $dst, $src1 \t// /-1, long" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_neg); + __ neg($dst$$Register, $src1$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Long Division with constant, but not -1. +instruct divL_reg_regnotMinus1(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ + match(Set dst (DivL src1 src2)); + predicate(n->in(2)->find_long_con(-1L) != -1L); // Src2 is a constant, but not -1. + ins_cost(2*DEFAULT_COST); + + format %{ "DIVD $dst, $src1, $src2 \t// /not-1, long" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_divd); + __ divd($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct cmovL_bne_negL_reg(iRegLdst dst, flagsReg crx, iRegLsrc src1) %{ + effect(USE_DEF dst, USE src1, USE crx); + predicate(false); + + ins_variable_size_depending_on_alignment(true); + + format %{ "CMOVE $dst, neg($src1), $crx" %} + // Worst case is branch + move + stop, no stop without scheduler. + size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cmove); + Label done; + __ bne($crx$$CondRegister, done); + __ neg($dst$$Register, $src1$$Register); + // TODO PPC port __ endgroup_if_needed(_size == 12); + __ bind(done); + %} + ins_pipe(pipe_class_default); +%} + +// Long Division with Registers not containing constants. +instruct divL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ + match(Set dst (DivL src1 src2)); + ins_cost(10*DEFAULT_COST); + + expand %{ + immL16 imm %{ (int)-1 %} + flagsReg tmp1; + cmpL_reg_imm16(tmp1, src2, imm); // check src2 == -1 + divL_reg_regnotMinus1(dst, src1, src2); // dst = src1 / src2 + cmovL_bne_negL_reg(dst, tmp1, src1); // cmove dst = neg(src1) if src2 == -1 + %} +%} + +// Integer Remainder with registers. +instruct modI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ + match(Set dst (ModI src1 src2)); + ins_cost(10*DEFAULT_COST); + + expand %{ + immI16 imm %{ (int)-1 %} + flagsReg tmp1; + iRegIdst tmp2; + iRegIdst tmp3; + cmpI_reg_imm16(tmp1, src2, imm); // check src2 == -1 + divI_reg_regnotMinus1(tmp2, src1, src2); // tmp2 = src1 / src2 + cmovI_bne_negI_reg(tmp2, tmp1, src1); // cmove tmp2 = neg(src1) if src2 == -1 + mulI_reg_reg(tmp3, src2, tmp2); // tmp3 = src2 * tmp2 + subI_reg_reg(dst, src1, tmp3); // dst = src1 - tmp3 + %} +%} + +// Long Remainder with registers +instruct modL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{ + match(Set dst (ModL src1 src2)); + ins_cost(10*DEFAULT_COST); + + expand %{ + immL16 imm %{ (int)-1 %} + flagsReg tmp1; + iRegLdst tmp2; + iRegLdst tmp3; + cmpL_reg_imm16(tmp1, src2, imm); // check src2 == -1 + divL_reg_regnotMinus1(tmp2, src1, src2); // tmp2 = src1 / src2 + cmovL_bne_negL_reg(tmp2, tmp1, src1); // cmove tmp2 = neg(src1) if src2 == -1 + mulL_reg_reg(tmp3, src2, tmp2); // tmp3 = src2 * tmp2 + subL_reg_reg(dst, src1, tmp3); // dst = src1 - tmp3 + %} +%} + +// Integer Shift Instructions + +// Register Shift Left + +// Clear all but the lowest #mask bits. +// Used to normalize shift amounts in registers. +instruct maskI_reg_imm(iRegIdst dst, iRegIsrc src, uimmI6 mask) %{ + // no match-rule, false predicate + effect(DEF dst, USE src, USE mask); + predicate(false); + + format %{ "MASK $dst, $src, $mask \t// clear $mask upper bits" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); + __ clrldi($dst$$Register, $src$$Register, $mask$$constant); + %} + ins_pipe(pipe_class_default); +%} + +instruct lShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ + // no match-rule, false predicate + effect(DEF dst, USE src1, USE src2); + predicate(false); + + format %{ "SLW $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_slw); + __ slw($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct lShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ + match(Set dst (LShiftI src1 src2)); + ins_cost(DEFAULT_COST*2); + expand %{ + uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %} + iRegIdst tmpI; + maskI_reg_imm(tmpI, src2, mask); + lShiftI_reg_reg(dst, src1, tmpI); + %} +%} + +// Register Shift Left Immediate +instruct lShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{ + match(Set dst (LShiftI src1 src2)); + + format %{ "SLWI $dst, $src1, ($src2 & 0x1f)" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm); + __ slwi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f); + %} + ins_pipe(pipe_class_default); +%} + +// AndI with negpow2-constant + LShiftI +instruct lShiftI_andI_immInegpow2_imm5(iRegIdst dst, iRegIsrc src1, immInegpow2 src2, uimmI5 src3) %{ + match(Set dst (LShiftI (AndI src1 src2) src3)); + predicate(UseRotateAndMaskInstructionsPPC64); + + format %{ "RLWINM $dst, lShiftI(AndI($src1, $src2), $src3)" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm); // FIXME: assert that rlwinm is equal to addi + long src2 = $src2$$constant; + long src3 = $src3$$constant; + long maskbits = src3 + log2_long((jlong) (julong) (juint) -src2); + if (maskbits >= 32) { + __ li($dst$$Register, 0); // addi + } else { + __ rlwinm($dst$$Register, $src1$$Register, src3 & 0x1f, 0, (31-maskbits) & 0x1f); + } + %} + ins_pipe(pipe_class_default); +%} + +// RShiftI + AndI with negpow2-constant + LShiftI +instruct lShiftI_andI_immInegpow2_rShiftI_imm5(iRegIdst dst, iRegIsrc src1, immInegpow2 src2, uimmI5 src3) %{ + match(Set dst (LShiftI (AndI (RShiftI src1 src3) src2) src3)); + predicate(UseRotateAndMaskInstructionsPPC64); + + format %{ "RLWINM $dst, lShiftI(AndI(RShiftI($src1, $src3), $src2), $src3)" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm); // FIXME: assert that rlwinm is equal to addi + long src2 = $src2$$constant; + long src3 = $src3$$constant; + long maskbits = src3 + log2_long((jlong) (julong) (juint) -src2); + if (maskbits >= 32) { + __ li($dst$$Register, 0); // addi + } else { + __ rlwinm($dst$$Register, $src1$$Register, 0, 0, (31-maskbits) & 0x1f); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct lShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{ + // no match-rule, false predicate + effect(DEF dst, USE src1, USE src2); + predicate(false); + + format %{ "SLD $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_sld); + __ sld($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Register Shift Left +instruct lShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{ + match(Set dst (LShiftL src1 src2)); + ins_cost(DEFAULT_COST*2); + expand %{ + uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %} + iRegIdst tmpI; + maskI_reg_imm(tmpI, src2, mask); + lShiftL_regL_regI(dst, src1, tmpI); + %} +%} + +// Register Shift Left Immediate +instruct lshiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{ + match(Set dst (LShiftL src1 src2)); + format %{ "SLDI $dst, $src1, ($src2 & 0x3f)" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldicr); + __ sldi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f); + %} + ins_pipe(pipe_class_default); +%} + +// If we shift more than 32 bits, we need not convert I2L. +instruct lShiftL_regI_immGE32(iRegLdst dst, iRegIsrc src1, uimmI6_ge32 src2) %{ + match(Set dst (LShiftL (ConvI2L src1) src2)); + ins_cost(DEFAULT_COST); + + size(4); + format %{ "SLDI $dst, i2l($src1), $src2" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldicr); + __ sldi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f); + %} + ins_pipe(pipe_class_default); +%} + +// Shift a postivie int to the left. +// Clrlsldi clears the upper 32 bits and shifts. +instruct scaledPositiveI2L_lShiftL_convI2L_reg_imm6(iRegLdst dst, iRegIsrc src1, uimmI6 src2) %{ + match(Set dst (LShiftL (ConvI2L src1) src2)); + predicate(((ConvI2LNode*)(_kids[0]->_leaf))->type()->is_long()->is_positive_int()); + + format %{ "SLDI $dst, i2l(positive_int($src1)), $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldic); + __ clrlsldi($dst$$Register, $src1$$Register, 0x20, $src2$$constant); + %} + ins_pipe(pipe_class_default); +%} + +instruct arShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ + // no match-rule, false predicate + effect(DEF dst, USE src1, USE src2); + predicate(false); + + format %{ "SRAW $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_sraw); + __ sraw($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Register Arithmetic Shift Right +instruct arShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ + match(Set dst (RShiftI src1 src2)); + ins_cost(DEFAULT_COST*2); + expand %{ + uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %} + iRegIdst tmpI; + maskI_reg_imm(tmpI, src2, mask); + arShiftI_reg_reg(dst, src1, tmpI); + %} +%} + +// Register Arithmetic Shift Right Immediate +instruct arShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{ + match(Set dst (RShiftI src1 src2)); + + format %{ "SRAWI $dst, $src1, ($src2 & 0x1f)" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_srawi); + __ srawi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f); + %} + ins_pipe(pipe_class_default); +%} + +instruct arShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{ + // no match-rule, false predicate + effect(DEF dst, USE src1, USE src2); + predicate(false); + + format %{ "SRAD $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_srad); + __ srad($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Register Shift Right Arithmetic Long +instruct arShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{ + match(Set dst (RShiftL src1 src2)); + ins_cost(DEFAULT_COST*2); + + expand %{ + uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %} + iRegIdst tmpI; + maskI_reg_imm(tmpI, src2, mask); + arShiftL_regL_regI(dst, src1, tmpI); + %} +%} + +// Register Shift Right Immediate +instruct arShiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{ + match(Set dst (RShiftL src1 src2)); + + format %{ "SRADI $dst, $src1, ($src2 & 0x3f)" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_sradi); + __ sradi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f); + %} + ins_pipe(pipe_class_default); +%} + +// RShiftL + ConvL2I +instruct convL2I_arShiftL_regL_immI(iRegIdst dst, iRegLsrc src1, immI src2) %{ + match(Set dst (ConvL2I (RShiftL src1 src2))); + + format %{ "SRADI $dst, $src1, ($src2 & 0x3f) \t// long + l2i" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_sradi); + __ sradi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f); + %} + ins_pipe(pipe_class_default); +%} + +instruct urShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ + // no match-rule, false predicate + effect(DEF dst, USE src1, USE src2); + predicate(false); + + format %{ "SRW $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_srw); + __ srw($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Register Shift Right +instruct urShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ + match(Set dst (URShiftI src1 src2)); + ins_cost(DEFAULT_COST*2); + + expand %{ + uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %} + iRegIdst tmpI; + maskI_reg_imm(tmpI, src2, mask); + urShiftI_reg_reg(dst, src1, tmpI); + %} +%} + +// Register Shift Right Immediate +instruct urShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{ + match(Set dst (URShiftI src1 src2)); + + format %{ "SRWI $dst, $src1, ($src2 & 0x1f)" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm); + __ srwi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f); + %} + ins_pipe(pipe_class_default); +%} + +instruct urShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{ + // no match-rule, false predicate + effect(DEF dst, USE src1, USE src2); + predicate(false); + + format %{ "SRD $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_srd); + __ srd($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Register Shift Right +instruct urShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{ + match(Set dst (URShiftL src1 src2)); + ins_cost(DEFAULT_COST*2); + + expand %{ + uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %} + iRegIdst tmpI; + maskI_reg_imm(tmpI, src2, mask); + urShiftL_regL_regI(dst, src1, tmpI); + %} +%} + +// Register Shift Right Immediate +instruct urShiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{ + match(Set dst (URShiftL src1 src2)); + + format %{ "SRDI $dst, $src1, ($src2 & 0x3f)" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); + __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f); + %} + ins_pipe(pipe_class_default); +%} + +// URShiftL + ConvL2I. +instruct convL2I_urShiftL_regL_immI(iRegIdst dst, iRegLsrc src1, immI src2) %{ + match(Set dst (ConvL2I (URShiftL src1 src2))); + + format %{ "SRDI $dst, $src1, ($src2 & 0x3f) \t// long + l2i" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); + __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f); + %} + ins_pipe(pipe_class_default); +%} + +// Register Shift Right Immediate with a CastP2X +instruct shrP_convP2X_reg_imm6(iRegLdst dst, iRegP_N2P src1, uimmI6 src2) %{ + match(Set dst (URShiftL (CastP2X src1) src2)); + + format %{ "SRDI $dst, $src1, $src2 \t// Cast ptr $src1 to long and shift" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); + __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f); + %} + ins_pipe(pipe_class_default); +%} + +instruct sxtI_reg(iRegIdst dst, iRegIsrc src) %{ + match(Set dst (ConvL2I (ConvI2L src))); + + format %{ "EXTSW $dst, $src \t// int->int" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_extsw); + __ extsw($dst$$Register, $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +//----------Rotate Instructions------------------------------------------------ + +// Rotate Left by 8-bit immediate +instruct rotlI_reg_immi8(iRegIdst dst, iRegIsrc src, immI8 lshift, immI8 rshift) %{ + match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift))); + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); + + format %{ "ROTLWI $dst, $src, $lshift" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm); + __ rotlwi($dst$$Register, $src$$Register, $lshift$$constant); + %} + ins_pipe(pipe_class_default); +%} + +// Rotate Right by 8-bit immediate +instruct rotrI_reg_immi8(iRegIdst dst, iRegIsrc src, immI8 rshift, immI8 lshift) %{ + match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift))); + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); + + format %{ "ROTRWI $dst, $rshift" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm); + __ rotrwi($dst$$Register, $src$$Register, $rshift$$constant); + %} + ins_pipe(pipe_class_default); +%} + +//----------Floating Point Arithmetic Instructions----------------------------- + +// Add float single precision +instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (AddF src1 src2)); + + format %{ "FADDS $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fadds); + __ fadds($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +// Add float double precision +instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (AddD src1 src2)); + + format %{ "FADD $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fadd); + __ fadd($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +// Sub float single precision +instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (SubF src1 src2)); + + format %{ "FSUBS $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fsubs); + __ fsubs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +// Sub float double precision +instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (SubD src1 src2)); + format %{ "FSUB $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fsub); + __ fsub($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +// Mul float single precision +instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (MulF src1 src2)); + format %{ "FMULS $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fmuls); + __ fmuls($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +// Mul float double precision +instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (MulD src1 src2)); + format %{ "FMUL $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fmul); + __ fmul($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +// Div float single precision +instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (DivF src1 src2)); + format %{ "FDIVS $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fdivs); + __ fdivs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +// Div float double precision +instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (DivD src1 src2)); + format %{ "FDIV $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fdiv); + __ fdiv($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +// Absolute float single precision +instruct absF_reg(regF dst, regF src) %{ + match(Set dst (AbsF src)); + format %{ "FABS $dst, $src \t// float" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fabs); + __ fabs($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +// Absolute float double precision +instruct absD_reg(regD dst, regD src) %{ + match(Set dst (AbsD src)); + format %{ "FABS $dst, $src \t// double" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fabs); + __ fabs($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +instruct negF_reg(regF dst, regF src) %{ + match(Set dst (NegF src)); + format %{ "FNEG $dst, $src \t// float" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fneg); + __ fneg($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +instruct negD_reg(regD dst, regD src) %{ + match(Set dst (NegD src)); + format %{ "FNEG $dst, $src \t// double" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fneg); + __ fneg($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +// AbsF + NegF. +instruct negF_absF_reg(regF dst, regF src) %{ + match(Set dst (NegF (AbsF src))); + format %{ "FNABS $dst, $src \t// float" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fnabs); + __ fnabs($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +// AbsD + NegD. +instruct negD_absD_reg(regD dst, regD src) %{ + match(Set dst (NegD (AbsD src))); + format %{ "FNABS $dst, $src \t// double" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fnabs); + __ fnabs($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +// VM_Version::has_sqrt() decides if this node will be used. +// Sqrt float double precision +instruct sqrtD_reg(regD dst, regD src) %{ + match(Set dst (SqrtD src)); + format %{ "FSQRT $dst, $src" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fsqrt); + __ fsqrt($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +// Single-precision sqrt. +instruct sqrtF_reg(regF dst, regF src) %{ + match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); + ins_cost(DEFAULT_COST); + + format %{ "FSQRTS $dst, $src" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fsqrts); + __ fsqrts($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +instruct roundDouble_nop(regD dst) %{ + match(Set dst (RoundDouble dst)); + ins_cost(0); + + format %{ " -- \t// RoundDouble not needed - empty" %} + size(0); + // PPC results are already "rounded" (i.e., normal-format IEEE). + ins_encode( /*empty*/ ); + ins_pipe(pipe_class_default); +%} + +instruct roundFloat_nop(regF dst) %{ + match(Set dst (RoundFloat dst)); + ins_cost(0); + + format %{ " -- \t// RoundFloat not needed - empty" %} + size(0); + // PPC results are already "rounded" (i.e., normal-format IEEE). + ins_encode( /*empty*/ ); + ins_pipe(pipe_class_default); +%} + +//----------Logical Instructions----------------------------------------------- + +// And Instructions + +// Register And +instruct andI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ + match(Set dst (AndI src1 src2)); + format %{ "AND $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_and); + __ andr($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Immediate And +instruct andI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2, flagsRegCR0 cr0) %{ + match(Set dst (AndI src1 src2)); + effect(KILL cr0); + + format %{ "ANDI $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_andi_); + // FIXME: avoid andi_ ? + __ andi_($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(pipe_class_default); +%} + +// Immediate And where the immediate is a negative power of 2. +instruct andI_reg_immInegpow2(iRegIdst dst, iRegIsrc src1, immInegpow2 src2) %{ + match(Set dst (AndI src1 src2)); + format %{ "ANDWI $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldicr); + __ clrrdi($dst$$Register, $src1$$Register, log2_long((jlong)(julong)(juint)-($src2$$constant))); + %} + ins_pipe(pipe_class_default); +%} + +instruct andI_reg_immIpow2minus1(iRegIdst dst, iRegIsrc src1, immIpow2minus1 src2) %{ + match(Set dst (AndI src1 src2)); + format %{ "ANDWI $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); + __ clrldi($dst$$Register, $src1$$Register, 64-log2_long((((jlong) $src2$$constant)+1))); + %} + ins_pipe(pipe_class_default); +%} + +instruct andI_reg_immIpowerOf2(iRegIdst dst, iRegIsrc src1, immIpowerOf2 src2) %{ + match(Set dst (AndI src1 src2)); + predicate(UseRotateAndMaskInstructionsPPC64); + format %{ "ANDWI $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm); + __ rlwinm($dst$$Register, $src1$$Register, 0, + (31-log2_long((jlong) $src2$$constant)) & 0x1f, (31-log2_long((jlong) $src2$$constant)) & 0x1f); + %} + ins_pipe(pipe_class_default); +%} + +// Register And Long +instruct andL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ + match(Set dst (AndL src1 src2)); + ins_cost(DEFAULT_COST); + + format %{ "AND $dst, $src1, $src2 \t// long" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_and); + __ andr($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Immediate And long +instruct andL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2, flagsRegCR0 cr0) %{ + match(Set dst (AndL src1 src2)); + effect(KILL cr0); + ins_cost(DEFAULT_COST); + + format %{ "ANDI $dst, $src1, $src2 \t// long" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_andi_); + // FIXME: avoid andi_ ? + __ andi_($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(pipe_class_default); +%} + +// Immediate And Long where the immediate is a negative power of 2. +instruct andL_reg_immLnegpow2(iRegLdst dst, iRegLsrc src1, immLnegpow2 src2) %{ + match(Set dst (AndL src1 src2)); + format %{ "ANDDI $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldicr); + __ clrrdi($dst$$Register, $src1$$Register, log2_long((jlong)-$src2$$constant)); + %} + ins_pipe(pipe_class_default); +%} + +instruct andL_reg_immLpow2minus1(iRegLdst dst, iRegLsrc src1, immLpow2minus1 src2) %{ + match(Set dst (AndL src1 src2)); + format %{ "ANDDI $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); + __ clrldi($dst$$Register, $src1$$Register, 64-log2_long((((jlong) $src2$$constant)+1))); + %} + ins_pipe(pipe_class_default); +%} + +// AndL + ConvL2I. +instruct convL2I_andL_reg_immLpow2minus1(iRegIdst dst, iRegLsrc src1, immLpow2minus1 src2) %{ + match(Set dst (ConvL2I (AndL src1 src2))); + ins_cost(DEFAULT_COST); + + format %{ "ANDDI $dst, $src1, $src2 \t// long + l2i" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); + __ clrldi($dst$$Register, $src1$$Register, 64-log2_long((((jlong) $src2$$constant)+1))); + %} + ins_pipe(pipe_class_default); +%} + +// Or Instructions + +// Register Or +instruct orI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ + match(Set dst (OrI src1 src2)); + format %{ "OR $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_or); + __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Expand does not work with above instruct. (??) +instruct orI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ + // no match-rule + effect(DEF dst, USE src1, USE src2); + format %{ "OR $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_or); + __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct tree_orI_orI_orI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{ + match(Set dst (OrI (OrI (OrI src1 src2) src3) src4)); + ins_cost(DEFAULT_COST*3); + + expand %{ + // FIXME: we should do this in the ideal world. + iRegIdst tmp1; + iRegIdst tmp2; + orI_reg_reg(tmp1, src1, src2); + orI_reg_reg_2(tmp2, src3, src4); // Adlc complains about orI_reg_reg. + orI_reg_reg(dst, tmp1, tmp2); + %} +%} + +// Immediate Or +instruct orI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2) %{ + match(Set dst (OrI src1 src2)); + format %{ "ORI $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_ori); + __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF); + %} + ins_pipe(pipe_class_default); +%} + +// Register Or Long +instruct orL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ + match(Set dst (OrL src1 src2)); + ins_cost(DEFAULT_COST); + + size(4); + format %{ "OR $dst, $src1, $src2 \t// long" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_or); + __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// OrL + ConvL2I. +instruct orI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{ + match(Set dst (ConvL2I (OrL src1 src2))); + ins_cost(DEFAULT_COST); + + format %{ "OR $dst, $src1, $src2 \t// long + l2i" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_or); + __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Immediate Or long +instruct orL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 con) %{ + match(Set dst (OrL src1 con)); + ins_cost(DEFAULT_COST); + + format %{ "ORI $dst, $src1, $con \t// long" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_ori); + __ ori($dst$$Register, $src1$$Register, ($con$$constant) & 0xFFFF); + %} + ins_pipe(pipe_class_default); +%} + +// Xor Instructions + +// Register Xor +instruct xorI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ + match(Set dst (XorI src1 src2)); + format %{ "XOR $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_xor); + __ xorr($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Expand does not work with above instruct. (??) +instruct xorI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ + // no match-rule + effect(DEF dst, USE src1, USE src2); + format %{ "XOR $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_xor); + __ xorr($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct tree_xorI_xorI_xorI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{ + match(Set dst (XorI (XorI (XorI src1 src2) src3) src4)); + ins_cost(DEFAULT_COST*3); + + expand %{ + // FIXME: we should do this in the ideal world. + iRegIdst tmp1; + iRegIdst tmp2; + xorI_reg_reg(tmp1, src1, src2); + xorI_reg_reg_2(tmp2, src3, src4); // Adlc complains about xorI_reg_reg. + xorI_reg_reg(dst, tmp1, tmp2); + %} +%} + +// Immediate Xor +instruct xorI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2) %{ + match(Set dst (XorI src1 src2)); + format %{ "XORI $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_xori); + __ xori($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(pipe_class_default); +%} + +// Register Xor Long +instruct xorL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ + match(Set dst (XorL src1 src2)); + ins_cost(DEFAULT_COST); + + format %{ "XOR $dst, $src1, $src2 \t// long" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_xor); + __ xorr($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// XorL + ConvL2I. +instruct xorI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{ + match(Set dst (ConvL2I (XorL src1 src2))); + ins_cost(DEFAULT_COST); + + format %{ "XOR $dst, $src1, $src2 \t// long + l2i" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_xor); + __ xorr($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Immediate Xor Long +instruct xorL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2) %{ + match(Set dst (XorL src1 src2)); + ins_cost(DEFAULT_COST); + + format %{ "XORI $dst, $src1, $src2 \t// long" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_xori); + __ xori($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(pipe_class_default); +%} + +instruct notI_reg(iRegIdst dst, iRegIsrc src1, immI_minus1 src2) %{ + match(Set dst (XorI src1 src2)); + ins_cost(DEFAULT_COST); + + format %{ "NOT $dst, $src1 ($src2)" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_nor); + __ nor($dst$$Register, $src1$$Register, $src1$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct notL_reg(iRegLdst dst, iRegLsrc src1, immL_minus1 src2) %{ + match(Set dst (XorL src1 src2)); + ins_cost(DEFAULT_COST); + + format %{ "NOT $dst, $src1 ($src2) \t// long" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_nor); + __ nor($dst$$Register, $src1$$Register, $src1$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// And-complement +instruct andcI_reg_reg(iRegIdst dst, iRegIsrc src1, immI_minus1 src2, iRegIsrc src3) %{ + match(Set dst (AndI (XorI src1 src2) src3)); + ins_cost(DEFAULT_COST); + + format %{ "ANDW $dst, xori($src1, $src2), $src3" %} + size(4); + ins_encode( enc_andc(dst, src3, src1) ); + ins_pipe(pipe_class_default); +%} + +// And-complement +instruct andcL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ + // no match-rule, false predicate + effect(DEF dst, USE src1, USE src2); + predicate(false); + + format %{ "ANDC $dst, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_andc); + __ andc($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +//----------Moves between int/long and float/double---------------------------- +// +// The following rules move values from int/long registers/stack-locations +// to float/double registers/stack-locations and vice versa, without doing any +// conversions. These rules are used to implement the bit-conversion methods +// of java.lang.Float etc., e.g. +// int floatToIntBits(float value) +// float intBitsToFloat(int bits) +// +// Notes on the implementation on ppc64: +// We only provide rules which move between a register and a stack-location, +// because we always have to go through memory when moving between a float +// register and an integer register. + +//---------- Chain stack slots between similar types -------- + +// These are needed so that the rules below can match. + +// Load integer from stack slot +instruct stkI_to_regI(iRegIdst dst, stackSlotI src) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST); + + format %{ "LWZ $dst, $src" %} + size(4); + ins_encode( enc_lwz(dst, src) ); + ins_pipe(pipe_class_memory); +%} + +// Store integer to stack slot +instruct regI_to_stkI(stackSlotI dst, iRegIsrc src) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST); + + format %{ "STW $src, $dst \t// stk" %} + size(4); + ins_encode( enc_stw(src, dst) ); // rs=rt + ins_pipe(pipe_class_memory); +%} + +// Load long from stack slot +instruct stkL_to_regL(iRegLdst dst, stackSlotL src) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST); + + format %{ "LD $dst, $src \t// long" %} + size(4); + ins_encode( enc_ld(dst, src) ); + ins_pipe(pipe_class_memory); +%} + +// Store long to stack slot +instruct regL_to_stkL(stackSlotL dst, iRegLsrc src) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST); + + format %{ "STD $src, $dst \t// long" %} + size(4); + ins_encode( enc_std(src, dst) ); // rs=rt + ins_pipe(pipe_class_memory); +%} + +//----------Moves between int and float + +// Move float value from float stack-location to integer register. +instruct moveF2I_stack_reg(iRegIdst dst, stackSlotF src) %{ + match(Set dst (MoveF2I src)); + ins_cost(MEMORY_REF_COST); + + format %{ "LWZ $dst, $src \t// MoveF2I" %} + size(4); + ins_encode( enc_lwz(dst, src) ); + ins_pipe(pipe_class_memory); +%} + +// Move float value from float register to integer stack-location. +instruct moveF2I_reg_stack(stackSlotI dst, regF src) %{ + match(Set dst (MoveF2I src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STFS $src, $dst \t// MoveF2I" %} + size(4); + ins_encode( enc_stfs(src, dst) ); + ins_pipe(pipe_class_memory); +%} + +// Move integer value from integer stack-location to float register. +instruct moveI2F_stack_reg(regF dst, stackSlotI src) %{ + match(Set dst (MoveI2F src)); + ins_cost(MEMORY_REF_COST); + + format %{ "LFS $dst, $src \t// MoveI2F" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_lfs); + int Idisp = $src$$disp + frame_slots_bias($src$$base, ra_); + __ lfs($dst$$FloatRegister, Idisp, $src$$base$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +// Move integer value from integer register to float stack-location. +instruct moveI2F_reg_stack(stackSlotF dst, iRegIsrc src) %{ + match(Set dst (MoveI2F src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STW $src, $dst \t// MoveI2F" %} + size(4); + ins_encode( enc_stw(src, dst) ); + ins_pipe(pipe_class_memory); +%} + +//----------Moves between long and float + +instruct moveF2L_reg_stack(stackSlotL dst, regF src) %{ + // no match-rule, false predicate + effect(DEF dst, USE src); + predicate(false); + + format %{ "storeD $src, $dst \t// STACK" %} + size(4); + ins_encode( enc_stfd(src, dst) ); + ins_pipe(pipe_class_default); +%} + +//----------Moves between long and double + +// Move double value from double stack-location to long register. +instruct moveD2L_stack_reg(iRegLdst dst, stackSlotD src) %{ + match(Set dst (MoveD2L src)); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "LD $dst, $src \t// MoveD2L" %} + ins_encode( enc_ld(dst, src) ); + ins_pipe(pipe_class_memory); +%} + +// Move double value from double register to long stack-location. +instruct moveD2L_reg_stack(stackSlotL dst, regD src) %{ + match(Set dst (MoveD2L src)); + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST); + + format %{ "STFD $src, $dst \t// MoveD2L" %} + size(4); + ins_encode( enc_stfd(src, dst) ); + ins_pipe(pipe_class_memory); +%} + +// Move long value from long stack-location to double register. +instruct moveL2D_stack_reg(regD dst, stackSlotL src) %{ + match(Set dst (MoveL2D src)); + ins_cost(MEMORY_REF_COST); + + format %{ "LFD $dst, $src \t// MoveL2D" %} + size(4); + ins_encode( enc_lfd(dst, src) ); + ins_pipe(pipe_class_memory); +%} + +// Move long value from long register to double stack-location. +instruct moveL2D_reg_stack(stackSlotD dst, iRegLsrc src) %{ + match(Set dst (MoveL2D src)); + ins_cost(MEMORY_REF_COST); + + format %{ "STD $src, $dst \t// MoveL2D" %} + size(4); + ins_encode( enc_std(src, dst) ); + ins_pipe(pipe_class_memory); +%} + +//----------Register Move Instructions----------------------------------------- + +// Replicate for Superword + +instruct moveReg(iRegLdst dst, iRegIsrc src) %{ + predicate(false); + effect(DEF dst, USE src); + + format %{ "MR $dst, $src \t// replicate " %} + // variable size, 0 or 4. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_or); + __ mr_if_needed($dst$$Register, $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +//----------Cast instructions (Java-level type cast)--------------------------- + +// Cast Long to Pointer for unsafe natives. +instruct castX2P(iRegPdst dst, iRegLsrc src) %{ + match(Set dst (CastX2P src)); + + format %{ "MR $dst, $src \t// Long->Ptr" %} + // variable size, 0 or 4. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_or); + __ mr_if_needed($dst$$Register, $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Cast Pointer to Long for unsafe natives. +instruct castP2X(iRegLdst dst, iRegP_N2P src) %{ + match(Set dst (CastP2X src)); + + format %{ "MR $dst, $src \t// Ptr->Long" %} + // variable size, 0 or 4. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_or); + __ mr_if_needed($dst$$Register, $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct castPP(iRegPdst dst) %{ + match(Set dst (CastPP dst)); + format %{ " -- \t// castPP of $dst" %} + size(0); + ins_encode( /*empty*/ ); + ins_pipe(pipe_class_default); +%} + +instruct castII(iRegIdst dst) %{ + match(Set dst (CastII dst)); + format %{ " -- \t// castII of $dst" %} + size(0); + ins_encode( /*empty*/ ); + ins_pipe(pipe_class_default); +%} + +instruct checkCastPP(iRegPdst dst) %{ + match(Set dst (CheckCastPP dst)); + format %{ " -- \t// checkcastPP of $dst" %} + size(0); + ins_encode( /*empty*/ ); + ins_pipe(pipe_class_default); +%} + +//----------Convert instructions----------------------------------------------- + +// Convert to boolean. + +// int_to_bool(src) : { 1 if src != 0 +// { 0 else +// +// strategy: +// 1) Count leading zeros of 32 bit-value src, +// this returns 32 (0b10.0000) iff src == 0 and <32 otherwise. +// 2) Shift 5 bits to the right, result is 0b1 iff src == 0, 0b0 otherwise. +// 3) Xori the result to get 0b1 if src != 0 and 0b0 if src == 0. + +// convI2Bool +instruct convI2Bool_reg__cntlz_Ex(iRegIdst dst, iRegIsrc src) %{ + match(Set dst (Conv2B src)); + predicate(UseCountLeadingZerosInstructionsPPC64); + ins_cost(DEFAULT_COST); + + expand %{ + immI shiftAmount %{ 0x5 %} + uimmI16 mask %{ 0x1 %} + iRegIdst tmp1; + iRegIdst tmp2; + countLeadingZerosI(tmp1, src); + urShiftI_reg_imm(tmp2, tmp1, shiftAmount); + xorI_reg_uimm16(dst, tmp2, mask); + %} +%} + +instruct convI2Bool_reg__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx) %{ + match(Set dst (Conv2B src)); + effect(TEMP crx); + predicate(!UseCountLeadingZerosInstructionsPPC64); + ins_cost(DEFAULT_COST); + + format %{ "CMPWI $crx, $src, #0 \t// convI2B" + "LI $dst, #0\n\t" + "BEQ $crx, done\n\t" + "LI $dst, #1\n" + "done:" %} + size(16); + ins_encode( enc_convI2B_regI__cmove(dst, src, crx, 0x0, 0x1) ); + ins_pipe(pipe_class_compare); +%} + +// ConvI2B + XorI +instruct xorI_convI2Bool_reg_immIvalue1__cntlz_Ex(iRegIdst dst, iRegIsrc src, immI_1 mask) %{ + match(Set dst (XorI (Conv2B src) mask)); + predicate(UseCountLeadingZerosInstructionsPPC64); + ins_cost(DEFAULT_COST); + + expand %{ + immI shiftAmount %{ 0x5 %} + iRegIdst tmp1; + countLeadingZerosI(tmp1, src); + urShiftI_reg_imm(dst, tmp1, shiftAmount); + %} +%} + +instruct xorI_convI2Bool_reg_immIvalue1__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx, immI_1 mask) %{ + match(Set dst (XorI (Conv2B src) mask)); + effect(TEMP crx); + predicate(!UseCountLeadingZerosInstructionsPPC64); + ins_cost(DEFAULT_COST); + + format %{ "CMPWI $crx, $src, #0 \t// Xor(convI2B($src), $mask)" + "LI $dst, #1\n\t" + "BEQ $crx, done\n\t" + "LI $dst, #0\n" + "done:" %} + size(16); + ins_encode( enc_convI2B_regI__cmove(dst, src, crx, 0x1, 0x0) ); + ins_pipe(pipe_class_compare); +%} + +// AndI 0b0..010..0 + ConvI2B +instruct convI2Bool_andI_reg_immIpowerOf2(iRegIdst dst, iRegIsrc src, immIpowerOf2 mask) %{ + match(Set dst (Conv2B (AndI src mask))); + predicate(UseRotateAndMaskInstructionsPPC64); + ins_cost(DEFAULT_COST); + + format %{ "RLWINM $dst, $src, $mask \t// convI2B(AndI($src, $mask))" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm); + __ rlwinm($dst$$Register, $src$$Register, (32-log2_long((jlong)$mask$$constant)) & 0x1f, 31, 31); + %} + ins_pipe(pipe_class_default); +%} + +// Convert pointer to boolean. +// +// ptr_to_bool(src) : { 1 if src != 0 +// { 0 else +// +// strategy: +// 1) Count leading zeros of 64 bit-value src, +// this returns 64 (0b100.0000) iff src == 0 and <64 otherwise. +// 2) Shift 6 bits to the right, result is 0b1 iff src == 0, 0b0 otherwise. +// 3) Xori the result to get 0b1 if src != 0 and 0b0 if src == 0. + +// ConvP2B +instruct convP2Bool_reg__cntlz_Ex(iRegIdst dst, iRegP_N2P src) %{ + match(Set dst (Conv2B src)); + predicate(UseCountLeadingZerosInstructionsPPC64); + ins_cost(DEFAULT_COST); + + expand %{ + immI shiftAmount %{ 0x6 %} + uimmI16 mask %{ 0x1 %} + iRegIdst tmp1; + iRegIdst tmp2; + countLeadingZerosP(tmp1, src); + urShiftI_reg_imm(tmp2, tmp1, shiftAmount); + xorI_reg_uimm16(dst, tmp2, mask); + %} +%} + +instruct convP2Bool_reg__cmove(iRegIdst dst, iRegP_N2P src, flagsReg crx) %{ + match(Set dst (Conv2B src)); + effect(TEMP crx); + predicate(!UseCountLeadingZerosInstructionsPPC64); + ins_cost(DEFAULT_COST); + + format %{ "CMPDI $crx, $src, #0 \t// convP2B" + "LI $dst, #0\n\t" + "BEQ $crx, done\n\t" + "LI $dst, #1\n" + "done:" %} + size(16); + ins_encode( enc_convP2B_regP__cmove(dst, src, crx, 0x0, 0x1) ); + ins_pipe(pipe_class_compare); +%} + +// ConvP2B + XorI +instruct xorI_convP2Bool_reg__cntlz_Ex(iRegIdst dst, iRegP_N2P src, immI_1 mask) %{ + match(Set dst (XorI (Conv2B src) mask)); + predicate(UseCountLeadingZerosInstructionsPPC64); + ins_cost(DEFAULT_COST); + + expand %{ + immI shiftAmount %{ 0x6 %} + iRegIdst tmp1; + countLeadingZerosP(tmp1, src); + urShiftI_reg_imm(dst, tmp1, shiftAmount); + %} +%} + +instruct xorI_convP2Bool_reg_immIvalue1__cmove(iRegIdst dst, iRegP_N2P src, flagsReg crx, immI_1 mask) %{ + match(Set dst (XorI (Conv2B src) mask)); + effect(TEMP crx); + predicate(!UseCountLeadingZerosInstructionsPPC64); + ins_cost(DEFAULT_COST); + + format %{ "CMPDI $crx, $src, #0 \t// XorI(convP2B($src), $mask)" + "LI $dst, #1\n\t" + "BEQ $crx, done\n\t" + "LI $dst, #0\n" + "done:" %} + size(16); + ins_encode( enc_convP2B_regP__cmove(dst, src, crx, 0x1, 0x0) ); + ins_pipe(pipe_class_compare); +%} + +// if src1 < src2, return -1 else return 0 +instruct cmpLTMask_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ + match(Set dst (CmpLTMask src1 src2)); + ins_cost(DEFAULT_COST*4); + + expand %{ + iRegIdst src1s; + iRegIdst src2s; + iRegIdst diff; + sxtI_reg(src1s, src1); // ensure proper sign extention + sxtI_reg(src2s, src2); // ensure proper sign extention + subI_reg_reg(diff, src1s, src2s); + // Need to consider >=33 bit result, therefore we need signmaskL. + signmask64I_regI(dst, diff); + %} +%} + +instruct cmpLTMask_reg_immI0(iRegIdst dst, iRegIsrc src1, immI_0 src2) %{ + match(Set dst (CmpLTMask src1 src2)); // if src1 < src2, return -1 else return 0 + format %{ "SRAWI $dst, $src1, $src2 \t// CmpLTMask" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_srawi); + __ srawi($dst$$Register, $src1$$Register, 0x1f); + %} + ins_pipe(pipe_class_default); +%} + +//----------Arithmetic Conversion Instructions--------------------------------- + +// Convert to Byte -- nop +// Convert to Short -- nop + +// Convert to Int + +instruct convB2I_reg(iRegIdst dst, iRegIsrc src, immI_24 amount) %{ + match(Set dst (RShiftI (LShiftI src amount) amount)); + format %{ "EXTSB $dst, $src \t// byte->int" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_extsb); + __ extsb($dst$$Register, $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// LShiftI 16 + RShiftI 16 converts short to int. +instruct convS2I_reg(iRegIdst dst, iRegIsrc src, immI_16 amount) %{ + match(Set dst (RShiftI (LShiftI src amount) amount)); + format %{ "EXTSH $dst, $src \t// short->int" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_extsh); + __ extsh($dst$$Register, $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// ConvL2I + ConvI2L: Sign extend int in long register. +instruct sxtI_L2L_reg(iRegLdst dst, iRegLsrc src) %{ + match(Set dst (ConvI2L (ConvL2I src))); + + format %{ "EXTSW $dst, $src \t// long->long" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_extsw); + __ extsw($dst$$Register, $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct convL2I_reg(iRegIdst dst, iRegLsrc src) %{ + match(Set dst (ConvL2I src)); + format %{ "MR $dst, $src \t// long->int" %} + // variable size, 0 or 4 + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_or); + __ mr_if_needed($dst$$Register, $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct convD2IRaw_regD(regD dst, regD src) %{ + // no match-rule, false predicate + effect(DEF dst, USE src); + predicate(false); + + format %{ "FCTIWZ $dst, $src \t// convD2I, $src != NaN" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fctiwz);; + __ fctiwz($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +instruct cmovI_bso_stackSlotL(iRegIdst dst, flagsReg crx, stackSlotL src) %{ + // no match-rule, false predicate + effect(DEF dst, USE crx, USE src); + predicate(false); + + ins_variable_size_depending_on_alignment(true); + + format %{ "cmovI $crx, $dst, $src" %} + // Worst case is branch + move + stop, no stop without scheduler. + size(false /* TODO: PPC PORT(InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8); + ins_encode( enc_cmove_bso_stackSlotL(dst, crx, src) ); + ins_pipe(pipe_class_default); +%} + +instruct cmovI_bso_stackSlotL_conLvalue0_Ex(iRegIdst dst, flagsReg crx, stackSlotL mem) %{ + // no match-rule, false predicate + effect(DEF dst, USE crx, USE mem); + predicate(false); + + format %{ "CmovI $dst, $crx, $mem \t// postalloc expanded" %} + postalloc_expand %{ + // + // replaces + // + // region dst crx mem + // \ | | / + // dst=cmovI_bso_stackSlotL_conLvalue0 + // + // with + // + // region dst + // \ / + // dst=loadConI16(0) + // | + // ^ region dst crx mem + // | \ | | / + // dst=cmovI_bso_stackSlotL + // + + // Create new nodes. + MachNode *m1 = new (C) loadConI16Node(); + MachNode *m2 = new (C) cmovI_bso_stackSlotLNode(); + + // inputs for new nodes + m1->add_req(n_region); + m2->add_req(n_region, n_crx, n_mem); + + // precedences for new nodes + m2->add_prec(m1); + + // operands for new nodes + m1->_opnds[0] = op_dst; + m1->_opnds[1] = new (C) immI16Oper(0); + + m2->_opnds[0] = op_dst; + m2->_opnds[1] = op_crx; + m2->_opnds[2] = op_mem; + + // registers for new nodes + ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst + + // Insert new nodes. + nodes->push(m1); + nodes->push(m2); + %} +%} + +// Double to Int conversion, NaN is mapped to 0. +instruct convD2I_reg_ExEx(iRegIdst dst, regD src) %{ + match(Set dst (ConvD2I src)); + ins_cost(DEFAULT_COST); + + expand %{ + regD tmpD; + stackSlotL tmpS; + flagsReg crx; + cmpDUnordered_reg_reg(crx, src, src); // Check whether src is NaN. + convD2IRaw_regD(tmpD, src); // Convert float to int (speculated). + moveD2L_reg_stack(tmpS, tmpD); // Store float to stack (speculated). + cmovI_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check. + %} +%} + +instruct convF2IRaw_regF(regF dst, regF src) %{ + // no match-rule, false predicate + effect(DEF dst, USE src); + predicate(false); + + format %{ "FCTIWZ $dst, $src \t// convF2I, $src != NaN" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fctiwz); + __ fctiwz($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +// Float to Int conversion, NaN is mapped to 0. +instruct convF2I_regF_ExEx(iRegIdst dst, regF src) %{ + match(Set dst (ConvF2I src)); + ins_cost(DEFAULT_COST); + + expand %{ + regF tmpF; + stackSlotL tmpS; + flagsReg crx; + cmpFUnordered_reg_reg(crx, src, src); // Check whether src is NaN. + convF2IRaw_regF(tmpF, src); // Convert float to int (speculated). + moveF2L_reg_stack(tmpS, tmpF); // Store float to stack (speculated). + cmovI_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check. + %} +%} + +// Convert to Long + +instruct convI2L_reg(iRegLdst dst, iRegIsrc src) %{ + match(Set dst (ConvI2L src)); + format %{ "EXTSW $dst, $src \t// int->long" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_extsw); + __ extsw($dst$$Register, $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Zero-extend: convert unsigned int to long (convUI2L). +instruct zeroExtendL_regI(iRegLdst dst, iRegIsrc src, immL_32bits mask) %{ + match(Set dst (AndL (ConvI2L src) mask)); + ins_cost(DEFAULT_COST); + + format %{ "CLRLDI $dst, $src, #32 \t// zero-extend int to long" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); + __ clrldi($dst$$Register, $src$$Register, 32); + %} + ins_pipe(pipe_class_default); +%} + +// Zero-extend: convert unsigned int to long in long register. +instruct zeroExtendL_regL(iRegLdst dst, iRegLsrc src, immL_32bits mask) %{ + match(Set dst (AndL src mask)); + ins_cost(DEFAULT_COST); + + format %{ "CLRLDI $dst, $src, #32 \t// zero-extend int to long" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldicl); + __ clrldi($dst$$Register, $src$$Register, 32); + %} + ins_pipe(pipe_class_default); +%} + +instruct convF2LRaw_regF(regF dst, regF src) %{ + // no match-rule, false predicate + effect(DEF dst, USE src); + predicate(false); + + format %{ "FCTIDZ $dst, $src \t// convF2L, $src != NaN" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fctiwz); + __ fctidz($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +instruct cmovL_bso_stackSlotL(iRegLdst dst, flagsReg crx, stackSlotL src) %{ + // no match-rule, false predicate + effect(DEF dst, USE crx, USE src); + predicate(false); + + ins_variable_size_depending_on_alignment(true); + + format %{ "cmovL $crx, $dst, $src" %} + // Worst case is branch + move + stop, no stop without scheduler. + size(false /* TODO: PPC PORT Compile::current()->do_hb_scheduling()*/ ? 12 : 8); + ins_encode( enc_cmove_bso_stackSlotL(dst, crx, src) ); + ins_pipe(pipe_class_default); +%} + +instruct cmovL_bso_stackSlotL_conLvalue0_Ex(iRegLdst dst, flagsReg crx, stackSlotL mem) %{ + // no match-rule, false predicate + effect(DEF dst, USE crx, USE mem); + predicate(false); + + format %{ "CmovL $dst, $crx, $mem \t// postalloc expanded" %} + postalloc_expand %{ + // + // replaces + // + // region dst crx mem + // \ | | / + // dst=cmovL_bso_stackSlotL_conLvalue0 + // + // with + // + // region dst + // \ / + // dst=loadConL16(0) + // | + // ^ region dst crx mem + // | \ | | / + // dst=cmovL_bso_stackSlotL + // + + // Create new nodes. + MachNode *m1 = new (C) loadConL16Node(); + MachNode *m2 = new (C) cmovL_bso_stackSlotLNode(); + + // inputs for new nodes + m1->add_req(n_region); + m2->add_req(n_region, n_crx, n_mem); + m2->add_prec(m1); + + // operands for new nodes + m1->_opnds[0] = op_dst; + m1->_opnds[1] = new (C) immL16Oper(0); + m2->_opnds[0] = op_dst; + m2->_opnds[1] = op_crx; + m2->_opnds[2] = op_mem; + + // registers for new nodes + ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst + + // Insert new nodes. + nodes->push(m1); + nodes->push(m2); + %} +%} + +// Float to Long conversion, NaN is mapped to 0. +instruct convF2L_reg_ExEx(iRegLdst dst, regF src) %{ + match(Set dst (ConvF2L src)); + ins_cost(DEFAULT_COST); + + expand %{ + regF tmpF; + stackSlotL tmpS; + flagsReg crx; + cmpFUnordered_reg_reg(crx, src, src); // Check whether src is NaN. + convF2LRaw_regF(tmpF, src); // Convert float to long (speculated). + moveF2L_reg_stack(tmpS, tmpF); // Store float to stack (speculated). + cmovL_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check. + %} +%} + +instruct convD2LRaw_regD(regD dst, regD src) %{ + // no match-rule, false predicate + effect(DEF dst, USE src); + predicate(false); + + format %{ "FCTIDZ $dst, $src \t// convD2L $src != NaN" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fctiwz); + __ fctidz($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +// Double to Long conversion, NaN is mapped to 0. +instruct convD2L_reg_ExEx(iRegLdst dst, regD src) %{ + match(Set dst (ConvD2L src)); + ins_cost(DEFAULT_COST); + + expand %{ + regD tmpD; + stackSlotL tmpS; + flagsReg crx; + cmpDUnordered_reg_reg(crx, src, src); // Check whether src is NaN. + convD2LRaw_regD(tmpD, src); // Convert float to long (speculated). + moveD2L_reg_stack(tmpS, tmpD); // Store float to stack (speculated). + cmovL_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check. + %} +%} + +// Convert to Float + +// Placed here as needed in expand. +instruct convL2DRaw_regD(regD dst, regD src) %{ + // no match-rule, false predicate + effect(DEF dst, USE src); + predicate(false); + + format %{ "FCFID $dst, $src \t// convL2D" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fcfid); + __ fcfid($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +// Placed here as needed in expand. +instruct convD2F_reg(regF dst, regD src) %{ + match(Set dst (ConvD2F src)); + format %{ "FRSP $dst, $src \t// convD2F" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_frsp); + __ frsp($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +// Integer to Float conversion. +instruct convI2F_ireg_Ex(regF dst, iRegIsrc src) %{ + match(Set dst (ConvI2F src)); + predicate(!VM_Version::has_fcfids()); + ins_cost(DEFAULT_COST); + + expand %{ + iRegLdst tmpL; + stackSlotL tmpS; + regD tmpD; + regD tmpD2; + convI2L_reg(tmpL, src); // Sign-extension int to long. + regL_to_stkL(tmpS, tmpL); // Store long to stack. + moveL2D_stack_reg(tmpD, tmpS); // Load long into double register. + convL2DRaw_regD(tmpD2, tmpD); // Convert to double. + convD2F_reg(dst, tmpD2); // Convert double to float. + %} +%} + +instruct convL2FRaw_regF(regF dst, regD src) %{ + // no match-rule, false predicate + effect(DEF dst, USE src); + predicate(false); + + format %{ "FCFIDS $dst, $src \t// convL2F" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fcfid); + __ fcfids($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +// Integer to Float conversion. Special version for Power7. +instruct convI2F_ireg_fcfids_Ex(regF dst, iRegIsrc src) %{ + match(Set dst (ConvI2F src)); + predicate(VM_Version::has_fcfids()); + ins_cost(DEFAULT_COST); + + expand %{ + iRegLdst tmpL; + stackSlotL tmpS; + regD tmpD; + convI2L_reg(tmpL, src); // Sign-extension int to long. + regL_to_stkL(tmpS, tmpL); // Store long to stack. + moveL2D_stack_reg(tmpD, tmpS); // Load long into double register. + convL2FRaw_regF(dst, tmpD); // Convert to float. + %} +%} + +// L2F to avoid runtime call. +instruct convL2F_ireg_fcfids_Ex(regF dst, iRegLsrc src) %{ + match(Set dst (ConvL2F src)); + predicate(VM_Version::has_fcfids()); + ins_cost(DEFAULT_COST); + + expand %{ + stackSlotL tmpS; + regD tmpD; + regL_to_stkL(tmpS, src); // Store long to stack. + moveL2D_stack_reg(tmpD, tmpS); // Load long into double register. + convL2FRaw_regF(dst, tmpD); // Convert to float. + %} +%} + +// Moved up as used in expand. +//instruct convD2F_reg(regF dst, regD src) %{%} + +// Convert to Double + +// Integer to Double conversion. +instruct convI2D_reg_Ex(regD dst, iRegIsrc src) %{ + match(Set dst (ConvI2D src)); + ins_cost(DEFAULT_COST); + + expand %{ + iRegLdst tmpL; + stackSlotL tmpS; + regD tmpD; + convI2L_reg(tmpL, src); // Sign-extension int to long. + regL_to_stkL(tmpS, tmpL); // Store long to stack. + moveL2D_stack_reg(tmpD, tmpS); // Load long into double register. + convL2DRaw_regD(dst, tmpD); // Convert to double. + %} +%} + +// Long to Double conversion +instruct convL2D_reg_Ex(regD dst, stackSlotL src) %{ + match(Set dst (ConvL2D src)); + ins_cost(DEFAULT_COST + MEMORY_REF_COST); + + expand %{ + regD tmpD; + moveL2D_stack_reg(tmpD, src); + convL2DRaw_regD(dst, tmpD); + %} +%} + +instruct convF2D_reg(regD dst, regF src) %{ + match(Set dst (ConvF2D src)); + format %{ "FMR $dst, $src \t// float->double" %} + // variable size, 0 or 4 + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fmr); + __ fmr_if_needed($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +//----------Control Flow Instructions------------------------------------------ +// Compare Instructions + +// Compare Integers +instruct cmpI_reg_reg(flagsReg crx, iRegIsrc src1, iRegIsrc src2) %{ + match(Set crx (CmpI src1 src2)); + size(4); + format %{ "CMPW $crx, $src1, $src2" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cmp); + __ cmpw($crx$$CondRegister, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_compare); +%} + +instruct cmpI_reg_imm16(flagsReg crx, iRegIsrc src1, immI16 src2) %{ + match(Set crx (CmpI src1 src2)); + format %{ "CMPWI $crx, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cmpi); + __ cmpwi($crx$$CondRegister, $src1$$Register, $src2$$constant); + %} + ins_pipe(pipe_class_compare); +%} + +// (src1 & src2) == 0? +instruct testI_reg_imm(flagsRegCR0 cr0, iRegIsrc src1, uimmI16 src2, immI_0 zero) %{ + match(Set cr0 (CmpI (AndI src1 src2) zero)); + // r0 is killed + format %{ "ANDI R0, $src1, $src2 \t// BTST int" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_andi_); + // FIXME: avoid andi_ ? + __ andi_(R0, $src1$$Register, $src2$$constant); + %} + ins_pipe(pipe_class_compare); +%} + +instruct cmpL_reg_reg(flagsReg crx, iRegLsrc src1, iRegLsrc src2) %{ + match(Set crx (CmpL src1 src2)); + format %{ "CMPD $crx, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cmp); + __ cmpd($crx$$CondRegister, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_compare); +%} + +instruct cmpL_reg_imm16(flagsReg crx, iRegLsrc src1, immL16 src2) %{ + match(Set crx (CmpL src1 src2)); + format %{ "CMPDI $crx, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cmpi); + __ cmpdi($crx$$CondRegister, $src1$$Register, $src2$$constant); + %} + ins_pipe(pipe_class_compare); +%} + +instruct testL_reg_reg(flagsRegCR0 cr0, iRegLsrc src1, iRegLsrc src2, immL_0 zero) %{ + match(Set cr0 (CmpL (AndL src1 src2) zero)); + // r0 is killed + format %{ "AND R0, $src1, $src2 \t// BTST long" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_and_); + __ and_(R0, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_compare); +%} + +instruct testL_reg_imm(flagsRegCR0 cr0, iRegLsrc src1, uimmL16 src2, immL_0 zero) %{ + match(Set cr0 (CmpL (AndL src1 src2) zero)); + // r0 is killed + format %{ "ANDI R0, $src1, $src2 \t// BTST long" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_andi_); + // FIXME: avoid andi_ ? + __ andi_(R0, $src1$$Register, $src2$$constant); + %} + ins_pipe(pipe_class_compare); +%} + +instruct cmovI_conIvalueMinus1_conIvalue1(iRegIdst dst, flagsReg crx) %{ + // no match-rule, false predicate + effect(DEF dst, USE crx); + predicate(false); + + ins_variable_size_depending_on_alignment(true); + + format %{ "cmovI $crx, $dst, -1, 0, +1" %} + // Worst case is branch + move + branch + move + stop, no stop without scheduler. + size(false /* TODO: PPC PORTInsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 20 : 16); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cmove); + Label done; + // li(Rdst, 0); // equal -> 0 + __ beq($crx$$CondRegister, done); + __ li($dst$$Register, 1); // greater -> +1 + __ bgt($crx$$CondRegister, done); + __ li($dst$$Register, -1); // unordered or less -> -1 + // TODO: PPC port__ endgroup_if_needed(_size == 20); + __ bind(done); + %} + ins_pipe(pipe_class_compare); +%} + +instruct cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(iRegIdst dst, flagsReg crx) %{ + // no match-rule, false predicate + effect(DEF dst, USE crx); + predicate(false); + + format %{ "CmovI $crx, $dst, -1, 0, +1 \t// postalloc expanded" %} + postalloc_expand %{ + // + // replaces + // + // region crx + // \ | + // dst=cmovI_conIvalueMinus1_conIvalue0_conIvalue1 + // + // with + // + // region + // \ + // dst=loadConI16(0) + // | + // ^ region crx + // | \ | + // dst=cmovI_conIvalueMinus1_conIvalue1 + // + + // Create new nodes. + MachNode *m1 = new (C) loadConI16Node(); + MachNode *m2 = new (C) cmovI_conIvalueMinus1_conIvalue1Node(); + + // inputs for new nodes + m1->add_req(n_region); + m2->add_req(n_region, n_crx); + m2->add_prec(m1); + + // operands for new nodes + m1->_opnds[0] = op_dst; + m1->_opnds[1] = new (C) immI16Oper(0); + m2->_opnds[0] = op_dst; + m2->_opnds[1] = op_crx; + + // registers for new nodes + ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst + + // Insert new nodes. + nodes->push(m1); + nodes->push(m2); + %} +%} + +// Manifest a CmpL3 result in an integer register. Very painful. +// This is the test to avoid. +// (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0) +instruct cmpL3_reg_reg_ExEx(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{ + match(Set dst (CmpL3 src1 src2)); + ins_cost(DEFAULT_COST*5+BRANCH_COST); + + expand %{ + flagsReg tmp1; + cmpL_reg_reg(tmp1, src1, src2); + cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(dst, tmp1); + %} +%} + +// Implicit range checks. +// A range check in the ideal world has one of the following shapes: +// - (If le (CmpU length index)), (IfTrue throw exception) +// - (If lt (CmpU index length)), (IfFalse throw exception) +// +// Match range check 'If le (CmpU length index)'. +instruct rangeCheck_iReg_uimm15(cmpOp cmp, iRegIsrc src_length, uimmI15 index, label labl) %{ + match(If cmp (CmpU src_length index)); + effect(USE labl); + predicate(TrapBasedRangeChecks && + _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le && + PROB_UNLIKELY(_leaf->as_If()->_prob) >= PROB_ALWAYS && + (Matcher::branches_to_uncommon_trap(_leaf))); + + ins_is_TrapBasedCheckNode(true); + + format %{ "TWI $index $cmp $src_length \t// RangeCheck => trap $labl" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_twi); + if ($cmp$$cmpcode == 0x1 /* less_equal */) { + __ trap_range_check_le($src_length$$Register, $index$$constant); + } else { + // Both successors are uncommon traps, probability is 0. + // Node got flipped during fixup flow. + assert($cmp$$cmpcode == 0x9, "must be greater"); + __ trap_range_check_g($src_length$$Register, $index$$constant); + } + %} + ins_pipe(pipe_class_trap); +%} + +// Match range check 'If lt (CmpU index length)'. +instruct rangeCheck_iReg_iReg(cmpOp cmp, iRegIsrc src_index, iRegIsrc src_length, label labl) %{ + match(If cmp (CmpU src_index src_length)); + effect(USE labl); + predicate(TrapBasedRangeChecks && + _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt && + _leaf->as_If()->_prob >= PROB_ALWAYS && + (Matcher::branches_to_uncommon_trap(_leaf))); + + ins_is_TrapBasedCheckNode(true); + + format %{ "TW $src_index $cmp $src_length \t// RangeCheck => trap $labl" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_tw); + if ($cmp$$cmpcode == 0x0 /* greater_equal */) { + __ trap_range_check_ge($src_index$$Register, $src_length$$Register); + } else { + // Both successors are uncommon traps, probability is 0. + // Node got flipped during fixup flow. + assert($cmp$$cmpcode == 0x8, "must be less"); + __ trap_range_check_l($src_index$$Register, $src_length$$Register); + } + %} + ins_pipe(pipe_class_trap); +%} + +// Match range check 'If lt (CmpU index length)'. +instruct rangeCheck_uimm15_iReg(cmpOp cmp, iRegIsrc src_index, uimmI15 length, label labl) %{ + match(If cmp (CmpU src_index length)); + effect(USE labl); + predicate(TrapBasedRangeChecks && + _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt && + _leaf->as_If()->_prob >= PROB_ALWAYS && + (Matcher::branches_to_uncommon_trap(_leaf))); + + ins_is_TrapBasedCheckNode(true); + + format %{ "TWI $src_index $cmp $length \t// RangeCheck => trap $labl" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_twi); + if ($cmp$$cmpcode == 0x0 /* greater_equal */) { + __ trap_range_check_ge($src_index$$Register, $length$$constant); + } else { + // Both successors are uncommon traps, probability is 0. + // Node got flipped during fixup flow. + assert($cmp$$cmpcode == 0x8, "must be less"); + __ trap_range_check_l($src_index$$Register, $length$$constant); + } + %} + ins_pipe(pipe_class_trap); +%} + +instruct compU_reg_reg(flagsReg crx, iRegIsrc src1, iRegIsrc src2) %{ + match(Set crx (CmpU src1 src2)); + format %{ "CMPLW $crx, $src1, $src2 \t// unsigned" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cmpl); + __ cmplw($crx$$CondRegister, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_compare); +%} + +instruct compU_reg_uimm16(flagsReg crx, iRegIsrc src1, uimmI16 src2) %{ + match(Set crx (CmpU src1 src2)); + size(4); + format %{ "CMPLWI $crx, $src1, $src2" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cmpli); + __ cmplwi($crx$$CondRegister, $src1$$Register, $src2$$constant); + %} + ins_pipe(pipe_class_compare); +%} + +// Implicit zero checks (more implicit null checks). +// No constant pool entries required. +instruct zeroCheckN_iReg_imm0(cmpOp cmp, iRegNsrc value, immN_0 zero, label labl) %{ + match(If cmp (CmpN value zero)); + effect(USE labl); + predicate(TrapBasedNullChecks && + _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne && + _leaf->as_If()->_prob >= PROB_LIKELY_MAG(4) && + Matcher::branches_to_uncommon_trap(_leaf)); + ins_cost(1); + + ins_is_TrapBasedCheckNode(true); + + format %{ "TDI $value $cmp $zero \t// ZeroCheckN => trap $labl" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_tdi); + if ($cmp$$cmpcode == 0xA) { + __ trap_null_check($value$$Register); + } else { + // Both successors are uncommon traps, probability is 0. + // Node got flipped during fixup flow. + assert($cmp$$cmpcode == 0x2 , "must be equal(0xA) or notEqual(0x2)"); + __ trap_null_check($value$$Register, Assembler::traptoGreaterThanUnsigned); + } + %} + ins_pipe(pipe_class_trap); +%} + +// Compare narrow oops. +instruct cmpN_reg_reg(flagsReg crx, iRegNsrc src1, iRegNsrc src2) %{ + match(Set crx (CmpN src1 src2)); + + size(4); + ins_cost(DEFAULT_COST); + format %{ "CMPLW $crx, $src1, $src2 \t// compressed ptr" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cmpl); + __ cmplw($crx$$CondRegister, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_compare); +%} + +instruct cmpN_reg_imm0(flagsReg crx, iRegNsrc src1, immN_0 src2) %{ + match(Set crx (CmpN src1 src2)); + // Make this more expensive than zeroCheckN_iReg_imm0. + ins_cost(DEFAULT_COST); + + format %{ "CMPLWI $crx, $src1, $src2 \t// compressed ptr" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cmpli); + __ cmplwi($crx$$CondRegister, $src1$$Register, $src2$$constant); + %} + ins_pipe(pipe_class_compare); +%} + +// Implicit zero checks (more implicit null checks). +// No constant pool entries required. +instruct zeroCheckP_reg_imm0(cmpOp cmp, iRegP_N2P value, immP_0 zero, label labl) %{ + match(If cmp (CmpP value zero)); + effect(USE labl); + predicate(TrapBasedNullChecks && + _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne && + _leaf->as_If()->_prob >= PROB_LIKELY_MAG(4) && + Matcher::branches_to_uncommon_trap(_leaf)); + + ins_is_TrapBasedCheckNode(true); + + format %{ "TDI $value $cmp $zero \t// ZeroCheckP => trap $labl" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_tdi); + if ($cmp$$cmpcode == 0xA) { + __ trap_null_check($value$$Register); + } else { + // Both successors are uncommon traps, probability is 0. + // Node got flipped during fixup flow. + assert($cmp$$cmpcode == 0x2 , "must be equal(0xA) or notEqual(0x2)"); + __ trap_null_check($value$$Register, Assembler::traptoGreaterThanUnsigned); + } + %} + ins_pipe(pipe_class_trap); +%} + +// Compare Pointers +instruct cmpP_reg_reg(flagsReg crx, iRegP_N2P src1, iRegP_N2P src2) %{ + match(Set crx (CmpP src1 src2)); + format %{ "CMPLD $crx, $src1, $src2 \t// ptr" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cmpl); + __ cmpld($crx$$CondRegister, $src1$$Register, $src2$$Register); + %} + ins_pipe(pipe_class_compare); +%} + +// Used in postalloc expand. +instruct cmpP_reg_imm16(flagsReg crx, iRegPsrc src1, immL16 src2) %{ + // This match rule prevents reordering of node before a safepoint. + // This only makes sense if this instructions is used exclusively + // for the expansion of EncodeP! + match(Set crx (CmpP src1 src2)); + predicate(false); + + format %{ "CMPDI $crx, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cmpi); + __ cmpdi($crx$$CondRegister, $src1$$Register, $src2$$constant); + %} + ins_pipe(pipe_class_compare); +%} + +//----------Float Compares---------------------------------------------------- + +instruct cmpFUnordered_reg_reg(flagsReg crx, regF src1, regF src2) %{ + // no match-rule, false predicate + effect(DEF crx, USE src1, USE src2); + predicate(false); + + format %{ "cmpFUrd $crx, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fcmpu); + __ fcmpu($crx$$CondRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +instruct cmov_bns_less(flagsReg crx) %{ + // no match-rule, false predicate + effect(DEF crx); + predicate(false); + + ins_variable_size_depending_on_alignment(true); + + format %{ "cmov $crx" %} + // Worst case is branch + move + stop, no stop without scheduler. + size(false /* TODO: PPC PORT(InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 16 : 12); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cmovecr); + Label done; + __ bns($crx$$CondRegister, done); // not unordered -> keep crx + __ li(R0, 0); + __ cmpwi($crx$$CondRegister, R0, 1); // unordered -> set crx to 'less' + // TODO PPC port __ endgroup_if_needed(_size == 16); + __ bind(done); + %} + ins_pipe(pipe_class_default); +%} + +// Compare floating, generate condition code. +instruct cmpF_reg_reg_Ex(flagsReg crx, regF src1, regF src2) %{ + // FIXME: should we match 'If cmp (CmpF src1 src2))' ?? + // + // The following code sequence occurs a lot in mpegaudio: + // + // block BXX: + // 0: instruct cmpFUnordered_reg_reg (cmpF_reg_reg-0): + // cmpFUrd CCR6, F11, F9 + // 4: instruct cmov_bns_less (cmpF_reg_reg-1): + // cmov CCR6 + // 8: instruct branchConSched: + // B_FARle CCR6, B56 P=0.500000 C=-1.000000 + match(Set crx (CmpF src1 src2)); + ins_cost(DEFAULT_COST+BRANCH_COST); + + format %{ "CmpF $crx, $src1, $src2 \t// postalloc expanded" %} + postalloc_expand %{ + // + // replaces + // + // region src1 src2 + // \ | | + // crx=cmpF_reg_reg + // + // with + // + // region src1 src2 + // \ | | + // crx=cmpFUnordered_reg_reg + // | + // ^ region + // | \ + // crx=cmov_bns_less + // + + // Create new nodes. + MachNode *m1 = new (C) cmpFUnordered_reg_regNode(); + MachNode *m2 = new (C) cmov_bns_lessNode(); + + // inputs for new nodes + m1->add_req(n_region, n_src1, n_src2); + m2->add_req(n_region); + m2->add_prec(m1); + + // operands for new nodes + m1->_opnds[0] = op_crx; + m1->_opnds[1] = op_src1; + m1->_opnds[2] = op_src2; + m2->_opnds[0] = op_crx; + + // registers for new nodes + ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx + + // Insert new nodes. + nodes->push(m1); + nodes->push(m2); + %} +%} + +// Compare float, generate -1,0,1 +instruct cmpF3_reg_reg_ExEx(iRegIdst dst, regF src1, regF src2) %{ + match(Set dst (CmpF3 src1 src2)); + ins_cost(DEFAULT_COST*5+BRANCH_COST); + + expand %{ + flagsReg tmp1; + cmpFUnordered_reg_reg(tmp1, src1, src2); + cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(dst, tmp1); + %} +%} + +instruct cmpDUnordered_reg_reg(flagsReg crx, regD src1, regD src2) %{ + // no match-rule, false predicate + effect(DEF crx, USE src1, USE src2); + predicate(false); + + format %{ "cmpFUrd $crx, $src1, $src2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fcmpu); + __ fcmpu($crx$$CondRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + +instruct cmpD_reg_reg_Ex(flagsReg crx, regD src1, regD src2) %{ + match(Set crx (CmpD src1 src2)); + ins_cost(DEFAULT_COST+BRANCH_COST); + + format %{ "CmpD $crx, $src1, $src2 \t// postalloc expanded" %} + postalloc_expand %{ + // + // replaces + // + // region src1 src2 + // \ | | + // crx=cmpD_reg_reg + // + // with + // + // region src1 src2 + // \ | | + // crx=cmpDUnordered_reg_reg + // | + // ^ region + // | \ + // crx=cmov_bns_less + // + + // create new nodes + MachNode *m1 = new (C) cmpDUnordered_reg_regNode(); + MachNode *m2 = new (C) cmov_bns_lessNode(); + + // inputs for new nodes + m1->add_req(n_region, n_src1, n_src2); + m2->add_req(n_region); + m2->add_prec(m1); + + // operands for new nodes + m1->_opnds[0] = op_crx; + m1->_opnds[1] = op_src1; + m1->_opnds[2] = op_src2; + m2->_opnds[0] = op_crx; + + // registers for new nodes + ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx + ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx + + // Insert new nodes. + nodes->push(m1); + nodes->push(m2); + %} +%} + +// Compare double, generate -1,0,1 +instruct cmpD3_reg_reg_ExEx(iRegIdst dst, regD src1, regD src2) %{ + match(Set dst (CmpD3 src1 src2)); + ins_cost(DEFAULT_COST*5+BRANCH_COST); + + expand %{ + flagsReg tmp1; + cmpDUnordered_reg_reg(tmp1, src1, src2); + cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(dst, tmp1); + %} +%} + +//----------Branches--------------------------------------------------------- +// Jump + +// Direct Branch. +instruct branch(label labl) %{ + match(Goto); + effect(USE labl); + ins_cost(BRANCH_COST); + + format %{ "B $labl" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_b); + Label d; // dummy + __ bind(d); + Label* p = $labl$$label; + // `p' is `NULL' when this encoding class is used only to + // determine the size of the encoded instruction. + Label& l = (NULL == p)? d : *(p); + __ b(l); + %} + ins_pipe(pipe_class_default); +%} + +// Conditional Near Branch +instruct branchCon(cmpOp cmp, flagsReg crx, label lbl) %{ + // Same match rule as `branchConFar'. + match(If cmp crx); + effect(USE lbl); + ins_cost(BRANCH_COST); + + // If set to 1 this indicates that the current instruction is a + // short variant of a long branch. This avoids using this + // instruction in first-pass matching. It will then only be used in + // the `Shorten_branches' pass. + ins_short_branch(1); + + format %{ "B$cmp $crx, $lbl" %} + size(4); + ins_encode( enc_bc(crx, cmp, lbl) ); + ins_pipe(pipe_class_default); +%} + +// This is for cases when the ppc64 `bc' instruction does not +// reach far enough. So we emit a far branch here, which is more +// expensive. +// +// Conditional Far Branch +instruct branchConFar(cmpOp cmp, flagsReg crx, label lbl) %{ + // Same match rule as `branchCon'. + match(If cmp crx); + effect(USE crx, USE lbl); + predicate(!false /* TODO: PPC port HB_Schedule*/); + // Higher cost than `branchCon'. + ins_cost(5*BRANCH_COST); + + // This is not a short variant of a branch, but the long variant. + ins_short_branch(0); + + format %{ "B_FAR$cmp $crx, $lbl" %} + size(8); + ins_encode( enc_bc_far(crx, cmp, lbl) ); + ins_pipe(pipe_class_default); +%} + +// Conditional Branch used with Power6 scheduler (can be far or short). +instruct branchConSched(cmpOp cmp, flagsReg crx, label lbl) %{ + // Same match rule as `branchCon'. + match(If cmp crx); + effect(USE crx, USE lbl); + predicate(false /* TODO: PPC port HB_Schedule*/); + // Higher cost than `branchCon'. + ins_cost(5*BRANCH_COST); + + // Actually size doesn't depend on alignment but on shortening. + ins_variable_size_depending_on_alignment(true); + // long variant. + ins_short_branch(0); + + format %{ "B_FAR$cmp $crx, $lbl" %} + size(8); // worst case + ins_encode( enc_bc_short_far(crx, cmp, lbl) ); + ins_pipe(pipe_class_default); +%} + +instruct branchLoopEnd(cmpOp cmp, flagsReg crx, label labl) %{ + match(CountedLoopEnd cmp crx); + effect(USE labl); + ins_cost(BRANCH_COST); + + // short variant. + ins_short_branch(1); + + format %{ "B$cmp $crx, $labl \t// counted loop end" %} + size(4); + ins_encode( enc_bc(crx, cmp, labl) ); + ins_pipe(pipe_class_default); +%} + +instruct branchLoopEndFar(cmpOp cmp, flagsReg crx, label labl) %{ + match(CountedLoopEnd cmp crx); + effect(USE labl); + predicate(!false /* TODO: PPC port HB_Schedule */); + ins_cost(BRANCH_COST); + + // Long variant. + ins_short_branch(0); + + format %{ "B_FAR$cmp $crx, $labl \t// counted loop end" %} + size(8); + ins_encode( enc_bc_far(crx, cmp, labl) ); + ins_pipe(pipe_class_default); +%} + +// Conditional Branch used with Power6 scheduler (can be far or short). +instruct branchLoopEndSched(cmpOp cmp, flagsReg crx, label labl) %{ + match(CountedLoopEnd cmp crx); + effect(USE labl); + predicate(false /* TODO: PPC port HB_Schedule */); + // Higher cost than `branchCon'. + ins_cost(5*BRANCH_COST); + + // Actually size doesn't depend on alignment but on shortening. + ins_variable_size_depending_on_alignment(true); + // Long variant. + ins_short_branch(0); + + format %{ "B_FAR$cmp $crx, $labl \t// counted loop end" %} + size(8); // worst case + ins_encode( enc_bc_short_far(crx, cmp, labl) ); + ins_pipe(pipe_class_default); +%} + +// ============================================================================ +// Java runtime operations, intrinsics and other complex operations. + +// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass +// array for an instance of the superklass. Set a hidden internal cache on a +// hit (cache is checked with exposed code in gen_subtype_check()). Return +// not zero for a miss or zero for a hit. The encoding ALSO sets flags. +// +// GL TODO: Improve this. +// - result should not be a TEMP +// - Add match rule as on sparc avoiding additional Cmp. +instruct partialSubtypeCheck(iRegPdst result, iRegP_N2P subklass, iRegP_N2P superklass, + iRegPdst tmp_klass, iRegPdst tmp_arrayptr) %{ + match(Set result (PartialSubtypeCheck subklass superklass)); + effect(TEMP result, TEMP tmp_klass, TEMP tmp_arrayptr); + ins_cost(DEFAULT_COST*10); + + format %{ "PartialSubtypeCheck $result = ($subklass instanceOf $superklass) tmp: $tmp_klass, $tmp_arrayptr" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ check_klass_subtype_slow_path($subklass$$Register, $superklass$$Register, $tmp_arrayptr$$Register, + $tmp_klass$$Register, NULL, $result$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// inlined locking and unlocking + +instruct cmpFastLock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{ + match(Set crx (FastLock oop box)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); + // TODO PPC port predicate(!UseNewFastLockPPC64 || UseBiasedLocking); + + format %{ "FASTLOCK $oop, $box, $tmp1, $tmp2, $tmp3" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register, + $tmp3$$Register, $tmp1$$Register, $tmp2$$Register); + // If locking was successfull, crx should indicate 'EQ'. + // The compiler generates a branch to the runtime call to + // _complete_monitor_locking_Java for the case where crx is 'NE'. + %} + ins_pipe(pipe_class_compare); +%} + +instruct cmpFastUnlock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{ + match(Set crx (FastUnlock oop box)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ "FASTUNLOCK $oop, $box, $tmp1, $tmp2" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register, + $tmp3$$Register, $tmp1$$Register, $tmp2$$Register); + // If unlocking was successfull, crx should indicate 'EQ'. + // The compiler generates a branch to the runtime call to + // _complete_monitor_unlocking_Java for the case where crx is 'NE'. + %} + ins_pipe(pipe_class_compare); +%} + +// Align address. +instruct align_addr(iRegPdst dst, iRegPsrc src, immLnegpow2 mask) %{ + match(Set dst (CastX2P (AndL (CastP2X src) mask))); + + format %{ "ANDDI $dst, $src, $mask \t// next aligned address" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldicr); + __ clrrdi($dst$$Register, $src$$Register, log2_long((jlong)-$mask$$constant)); + %} + ins_pipe(pipe_class_default); +%} + +// Array size computation. +instruct array_size(iRegLdst dst, iRegPsrc end, iRegPsrc start) %{ + match(Set dst (SubL (CastP2X end) (CastP2X start))); + + format %{ "SUB $dst, $end, $start \t// array size in bytes" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_subf); + __ subf($dst$$Register, $start$$Register, $end$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Clear-array with dynamic array-size. +instruct inlineCallClearArray(rarg1RegL cnt, rarg2RegP base, Universe dummy, regCTR ctr) %{ + match(Set dummy (ClearArray cnt base)); + effect(USE_KILL cnt, USE_KILL base, KILL ctr); + ins_cost(MEMORY_REF_COST); + + ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted. + + format %{ "ClearArray $cnt, $base" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ clear_memory_doubleword($base$$Register, $cnt$$Register); // kills cnt, base, R0 + %} + ins_pipe(pipe_class_default); +%} + +// String_IndexOf for needle of length 1. +// +// Match needle into immediate operands: no loadConP node needed. Saves one +// register and two instructions over string_indexOf_imm1Node. +// +// Assumes register result differs from all input registers. +// +// Preserves registers haystack, haycnt +// Kills registers tmp1, tmp2 +// Defines registers result +// +// Use dst register classes if register gets killed, as it is the case for tmp registers! +// +// Unfortunately this does not match too often. In many situations the AddP is used +// by several nodes, even several StrIndexOf nodes, breaking the match tree. +instruct string_indexOf_imm1_char(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt, + immP needleImm, immL offsetImm, immI_1 needlecntImm, + iRegIdst tmp1, iRegIdst tmp2, + flagsRegCR0 cr0, flagsRegCR1 cr1) %{ + predicate(SpecialStringIndexOf); // type check implicit by parameter type, See Matcher::match_rule_supported + match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm))); + + effect(TEMP result, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1); + + ins_cost(150); + format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]" + "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %} + + ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + immPOper *needleOper = (immPOper *)$needleImm; + const TypeOopPtr *t = needleOper->type()->isa_oopptr(); + ciTypeArray* needle_values = t->const_oop()->as_type_array(); // Pointer to live char * + + __ string_indexof_1($result$$Register, + $haystack$$Register, $haycnt$$Register, + R0, needle_values->char_at(0), + $tmp1$$Register, $tmp2$$Register); + %} + ins_pipe(pipe_class_compare); +%} + +// String_IndexOf for needle of length 1. +// +// Special case requires less registers and emits less instructions. +// +// Assumes register result differs from all input registers. +// +// Preserves registers haystack, haycnt +// Kills registers tmp1, tmp2, needle +// Defines registers result +// +// Use dst register classes if register gets killed, as it is the case for tmp registers! +instruct string_indexOf_imm1(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt, + rscratch2RegP needle, immI_1 needlecntImm, + iRegIdst tmp1, iRegIdst tmp2, + flagsRegCR0 cr0, flagsRegCR1 cr1) %{ + match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm))); + effect(USE_KILL needle, /* TDEF needle, */ TEMP result, + TEMP tmp1, TEMP tmp2); + // Required for EA: check if it is still a type_array. + predicate(SpecialStringIndexOf && n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() && + n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array()); + ins_cost(180); + + ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted. + + format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]" + " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + Node *ndl = in(operand_index($needle)); // The node that defines needle. + ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array(); + guarantee(needle_values, "sanity"); + if (needle_values != NULL) { + __ string_indexof_1($result$$Register, + $haystack$$Register, $haycnt$$Register, + R0, needle_values->char_at(0), + $tmp1$$Register, $tmp2$$Register); + } else { + __ string_indexof_1($result$$Register, + $haystack$$Register, $haycnt$$Register, + $needle$$Register, 0, + $tmp1$$Register, $tmp2$$Register); + } + %} + ins_pipe(pipe_class_compare); +%} + +// String_IndexOf. +// +// Length of needle as immediate. This saves instruction loading constant needle +// length. +// @@@ TODO Specify rules for length < 8 or so, and roll out comparison of needle +// completely or do it in vector instruction. This should save registers for +// needlecnt and needle. +// +// Assumes register result differs from all input registers. +// Overwrites haycnt, needlecnt. +// Use dst register classes if register gets killed, as it is the case for tmp registers! +instruct string_indexOf_imm(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, + iRegPsrc needle, uimmI15 needlecntImm, + iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5, + flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6) %{ + match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm))); + effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP result, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6); + // Required for EA: check if it is still a type_array. + predicate(SpecialStringIndexOf && n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() && + n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array()); + ins_cost(250); + + ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted. + + format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]" + " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + Node *ndl = in(operand_index($needle)); // The node that defines needle. + ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array(); + + __ string_indexof($result$$Register, + $haystack$$Register, $haycnt$$Register, + $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant, + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register); + %} + ins_pipe(pipe_class_compare); +%} + +// StrIndexOf node. +// +// Assumes register result differs from all input registers. +// Overwrites haycnt, needlecnt. +// Use dst register classes if register gets killed, as it is the case for tmp registers! +instruct string_indexOf(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt, + iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, + flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6) %{ + match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt))); + effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/ + TEMP result, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6); + predicate(SpecialStringIndexOf); // See Matcher::match_rule_supported. + ins_cost(300); + + ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted. + + format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]" + " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ string_indexof($result$$Register, + $haystack$$Register, $haycnt$$Register, + $needle$$Register, NULL, $needlecnt$$Register, 0, // needlecnt not constant. + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register); + %} + ins_pipe(pipe_class_compare); +%} + +// String equals with immediate. +instruct string_equals_imm(iRegPsrc str1, iRegPsrc str2, uimmI15 cntImm, iRegIdst result, + iRegPdst tmp1, iRegPdst tmp2, + flagsRegCR0 cr0, flagsRegCR6 cr6, regCTR ctr) %{ + match(Set result (StrEquals (Binary str1 str2) cntImm)); + effect(TEMP result, TEMP tmp1, TEMP tmp2, + KILL cr0, KILL cr6, KILL ctr); + predicate(SpecialStringEquals); // See Matcher::match_rule_supported. + ins_cost(250); + + ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted. + + format %{ "String Equals SCL [0..$cntImm]($str1),[0..$cntImm]($str2)" + " -> $result \t// KILL $cr0, $cr6, $ctr, TEMP $result, $tmp1, $tmp2" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ char_arrays_equalsImm($str1$$Register, $str2$$Register, $cntImm$$constant, + $result$$Register, $tmp1$$Register, $tmp2$$Register); + %} + ins_pipe(pipe_class_compare); +%} + +// String equals. +// Use dst register classes if register gets killed, as it is the case for TEMP operands! +instruct string_equals(iRegPsrc str1, iRegPsrc str2, iRegIsrc cnt, iRegIdst result, + iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3, iRegPdst tmp4, iRegPdst tmp5, + flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{ + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(TEMP result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, + KILL cr0, KILL cr1, KILL cr6, KILL ctr); + predicate(SpecialStringEquals); // See Matcher::match_rule_supported. + ins_cost(300); + + ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted. + + format %{ "String Equals [0..$cnt]($str1),[0..$cnt]($str2) -> $result" + " \t// KILL $cr0, $cr1, $cr6, $ctr, TEMP $result, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ char_arrays_equals($str1$$Register, $str2$$Register, $cnt$$Register, $result$$Register, + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, $tmp5$$Register); + %} + ins_pipe(pipe_class_compare); +%} + +// String compare. +// Char[] pointers are passed in. +// Use dst register classes if register gets killed, as it is the case for TEMP operands! +instruct string_compare(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result, + iRegPdst tmp, flagsRegCR0 cr0, regCTR ctr) %{ + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL cnt1, USE_KILL cnt2, USE_KILL str1, USE_KILL str2, TEMP result, TEMP tmp, KILL cr0, KILL ctr); + ins_cost(300); + + ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted. + + format %{ "String Compare $str1[0..$cnt1], $str2[0..$cnt2] -> $result" + " \t// TEMP $tmp, $result KILLs $str1, $cnt1, $str2, $cnt2, $cr0, $ctr" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ string_compare($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, + $result$$Register, $tmp$$Register); + %} + ins_pipe(pipe_class_compare); +%} + +//---------- Min/Max Instructions --------------------------------------------- + +instruct minI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ + match(Set dst (MinI src1 src2)); + ins_cost(DEFAULT_COST*6); + + expand %{ + iRegIdst src1s; + iRegIdst src2s; + iRegIdst diff; + iRegIdst sm; + iRegIdst doz; // difference or zero + sxtI_reg(src1s, src1); // Ensure proper sign extention. + sxtI_reg(src2s, src2); // Ensure proper sign extention. + subI_reg_reg(diff, src2s, src1s); + // Need to consider >=33 bit result, therefore we need signmaskL. + signmask64I_regI(sm, diff); + andI_reg_reg(doz, diff, sm); // <=0 + addI_reg_reg(dst, doz, src1s); + %} +%} + +instruct maxI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ + match(Set dst (MaxI src1 src2)); + ins_cost(DEFAULT_COST*6); + + expand %{ + immI_minus1 m1 %{ -1 %} + iRegIdst src1s; + iRegIdst src2s; + iRegIdst diff; + iRegIdst sm; + iRegIdst doz; // difference or zero + sxtI_reg(src1s, src1); // Ensure proper sign extention. + sxtI_reg(src2s, src2); // Ensure proper sign extention. + subI_reg_reg(diff, src2s, src1s); + // Need to consider >=33 bit result, therefore we need signmaskL. + signmask64I_regI(sm, diff); + andcI_reg_reg(doz, sm, m1, diff); // >=0 + addI_reg_reg(dst, doz, src1s); + %} +%} + +//---------- Population Count Instructions ------------------------------------ + +// Popcnt for Power7. +instruct popCountI(iRegIdst dst, iRegIsrc src) %{ + match(Set dst (PopCountI src)); + predicate(UsePopCountInstruction && VM_Version::has_popcntw()); + ins_cost(DEFAULT_COST); + + format %{ "POPCNTW $dst, $src" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_popcntb); + __ popcntw($dst$$Register, $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +// Popcnt for Power7. +instruct popCountL(iRegIdst dst, iRegLsrc src) %{ + predicate(UsePopCountInstruction && VM_Version::has_popcntw()); + match(Set dst (PopCountL src)); + ins_cost(DEFAULT_COST); + + format %{ "POPCNTD $dst, $src" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_popcntb); + __ popcntd($dst$$Register, $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct countLeadingZerosI(iRegIdst dst, iRegIsrc src) %{ + match(Set dst (CountLeadingZerosI src)); + predicate(UseCountLeadingZerosInstructionsPPC64); // See Matcher::match_rule_supported. + ins_cost(DEFAULT_COST); + + format %{ "CNTLZW $dst, $src" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cntlzw); + __ cntlzw($dst$$Register, $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct countLeadingZerosL(iRegIdst dst, iRegLsrc src) %{ + match(Set dst (CountLeadingZerosL src)); + predicate(UseCountLeadingZerosInstructionsPPC64); // See Matcher::match_rule_supported. + ins_cost(DEFAULT_COST); + + format %{ "CNTLZD $dst, $src" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cntlzd); + __ cntlzd($dst$$Register, $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct countLeadingZerosP(iRegIdst dst, iRegPsrc src) %{ + // no match-rule, false predicate + effect(DEF dst, USE src); + predicate(false); + + format %{ "CNTLZD $dst, $src" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cntlzd); + __ cntlzd($dst$$Register, $src$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct countTrailingZerosI_Ex(iRegIdst dst, iRegIsrc src) %{ + match(Set dst (CountTrailingZerosI src)); + predicate(UseCountLeadingZerosInstructionsPPC64); + ins_cost(DEFAULT_COST); + + expand %{ + immI16 imm1 %{ (int)-1 %} + immI16 imm2 %{ (int)32 %} + immI_minus1 m1 %{ -1 %} + iRegIdst tmpI1; + iRegIdst tmpI2; + iRegIdst tmpI3; + addI_reg_imm16(tmpI1, src, imm1); + andcI_reg_reg(tmpI2, src, m1, tmpI1); + countLeadingZerosI(tmpI3, tmpI2); + subI_imm16_reg(dst, imm2, tmpI3); + %} +%} + +instruct countTrailingZerosL_Ex(iRegIdst dst, iRegLsrc src) %{ + match(Set dst (CountTrailingZerosL src)); + predicate(UseCountLeadingZerosInstructionsPPC64); + ins_cost(DEFAULT_COST); + + expand %{ + immL16 imm1 %{ (long)-1 %} + immI16 imm2 %{ (int)64 %} + iRegLdst tmpL1; + iRegLdst tmpL2; + iRegIdst tmpL3; + addL_reg_imm16(tmpL1, src, imm1); + andcL_reg_reg(tmpL2, tmpL1, src); + countLeadingZerosL(tmpL3, tmpL2); + subI_imm16_reg(dst, imm2, tmpL3); + %} +%} + +// Expand nodes for byte_reverse_int. +instruct insrwi_a(iRegIdst dst, iRegIsrc src, immI16 pos, immI16 shift) %{ + effect(DEF dst, USE src, USE pos, USE shift); + predicate(false); + + format %{ "INSRWI $dst, $src, $pos, $shift" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rlwimi); + __ insrwi($dst$$Register, $src$$Register, $shift$$constant, $pos$$constant); + %} + ins_pipe(pipe_class_default); +%} + +// As insrwi_a, but with USE_DEF. +instruct insrwi(iRegIdst dst, iRegIsrc src, immI16 pos, immI16 shift) %{ + effect(USE_DEF dst, USE src, USE pos, USE shift); + predicate(false); + + format %{ "INSRWI $dst, $src, $pos, $shift" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rlwimi); + __ insrwi($dst$$Register, $src$$Register, $shift$$constant, $pos$$constant); + %} + ins_pipe(pipe_class_default); +%} + +// Just slightly faster than java implementation. +instruct bytes_reverse_int_Ex(iRegIdst dst, iRegIsrc src) %{ + match(Set dst (ReverseBytesI src)); + predicate(UseCountLeadingZerosInstructionsPPC64); + ins_cost(DEFAULT_COST); + + expand %{ + immI16 imm24 %{ (int) 24 %} + immI16 imm16 %{ (int) 16 %} + immI16 imm8 %{ (int) 8 %} + immI16 imm4 %{ (int) 4 %} + immI16 imm0 %{ (int) 0 %} + iRegLdst tmpI1; + iRegLdst tmpI2; + iRegLdst tmpI3; + + urShiftI_reg_imm(tmpI1, src, imm24); + insrwi_a(dst, tmpI1, imm24, imm8); + urShiftI_reg_imm(tmpI2, src, imm16); + insrwi(dst, tmpI2, imm8, imm16); + urShiftI_reg_imm(tmpI3, src, imm8); + insrwi(dst, tmpI3, imm8, imm8); + insrwi(dst, src, imm0, imm8); + %} +%} + +//---------- Replicate Vector Instructions ------------------------------------ + +// Insrdi does replicate if src == dst. +instruct repl32(iRegLdst dst) %{ + predicate(false); + effect(USE_DEF dst); + + format %{ "INSRDI $dst, #0, $dst, #32 \t// replicate" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldimi); + __ insrdi($dst$$Register, $dst$$Register, 32, 0); + %} + ins_pipe(pipe_class_default); +%} + +// Insrdi does replicate if src == dst. +instruct repl48(iRegLdst dst) %{ + predicate(false); + effect(USE_DEF dst); + + format %{ "INSRDI $dst, #0, $dst, #48 \t// replicate" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldimi); + __ insrdi($dst$$Register, $dst$$Register, 48, 0); + %} + ins_pipe(pipe_class_default); +%} + +// Insrdi does replicate if src == dst. +instruct repl56(iRegLdst dst) %{ + predicate(false); + effect(USE_DEF dst); + + format %{ "INSRDI $dst, #0, $dst, #56 \t// replicate" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_rldimi); + __ insrdi($dst$$Register, $dst$$Register, 56, 0); + %} + ins_pipe(pipe_class_default); +%} + +instruct repl8B_reg_Ex(iRegLdst dst, iRegIsrc src) %{ + match(Set dst (ReplicateB src)); + predicate(n->as_Vector()->length() == 8); + expand %{ + moveReg(dst, src); + repl56(dst); + repl48(dst); + repl32(dst); + %} +%} + +instruct repl8B_immI0(iRegLdst dst, immI_0 zero) %{ + match(Set dst (ReplicateB zero)); + predicate(n->as_Vector()->length() == 8); + format %{ "LI $dst, #0 \t// replicate8B" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addi); + __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF))); + %} + ins_pipe(pipe_class_default); +%} + +instruct repl8B_immIminus1(iRegLdst dst, immI_minus1 src) %{ + match(Set dst (ReplicateB src)); + predicate(n->as_Vector()->length() == 8); + format %{ "LI $dst, #-1 \t// replicate8B" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addi); + __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF))); + %} + ins_pipe(pipe_class_default); +%} + +instruct repl4S_reg_Ex(iRegLdst dst, iRegIsrc src) %{ + match(Set dst (ReplicateS src)); + predicate(n->as_Vector()->length() == 4); + expand %{ + moveReg(dst, src); + repl48(dst); + repl32(dst); + %} +%} + +instruct repl4S_immI0(iRegLdst dst, immI_0 zero) %{ + match(Set dst (ReplicateS zero)); + predicate(n->as_Vector()->length() == 4); + format %{ "LI $dst, #0 \t// replicate4C" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addi); + __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF))); + %} + ins_pipe(pipe_class_default); +%} + +instruct repl4S_immIminus1(iRegLdst dst, immI_minus1 src) %{ + match(Set dst (ReplicateS src)); + predicate(n->as_Vector()->length() == 4); + format %{ "LI $dst, -1 \t// replicate4C" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addi); + __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF))); + %} + ins_pipe(pipe_class_default); +%} + +instruct repl2I_reg_Ex(iRegLdst dst, iRegIsrc src) %{ + match(Set dst (ReplicateI src)); + predicate(n->as_Vector()->length() == 2); + ins_cost(2 * DEFAULT_COST); + expand %{ + moveReg(dst, src); + repl32(dst); + %} +%} + +instruct repl2I_immI0(iRegLdst dst, immI_0 zero) %{ + match(Set dst (ReplicateI zero)); + predicate(n->as_Vector()->length() == 2); + format %{ "LI $dst, #0 \t// replicate4C" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addi); + __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF))); + %} + ins_pipe(pipe_class_default); +%} + +instruct repl2I_immIminus1(iRegLdst dst, immI_minus1 src) %{ + match(Set dst (ReplicateI src)); + predicate(n->as_Vector()->length() == 2); + format %{ "LI $dst, -1 \t// replicate4C" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addi); + __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF))); + %} + ins_pipe(pipe_class_default); +%} + +// Move float to int register via stack, replicate. +instruct repl2F_reg_Ex(iRegLdst dst, regF src) %{ + match(Set dst (ReplicateF src)); + predicate(n->as_Vector()->length() == 2); + ins_cost(2 * MEMORY_REF_COST + DEFAULT_COST); + expand %{ + stackSlotL tmpS; + iRegIdst tmpI; + moveF2I_reg_stack(tmpS, src); // Move float to stack. + moveF2I_stack_reg(tmpI, tmpS); // Move stack to int reg. + moveReg(dst, tmpI); // Move int to long reg. + repl32(dst); // Replicate bitpattern. + %} +%} + +// Replicate scalar constant to packed float values in Double register +instruct repl2F_immF_Ex(iRegLdst dst, immF src) %{ + match(Set dst (ReplicateF src)); + predicate(n->as_Vector()->length() == 2); + ins_cost(5 * DEFAULT_COST); + + format %{ "LD $dst, offset, $constanttablebase\t// load replicated float $src $src from table, postalloc expanded" %} + postalloc_expand( postalloc_expand_load_replF_constant(dst, src, constanttablebase) ); +%} + +// Replicate scalar zero constant to packed float values in Double register +instruct repl2F_immF0(iRegLdst dst, immF_0 zero) %{ + match(Set dst (ReplicateF zero)); + predicate(n->as_Vector()->length() == 2); + + format %{ "LI $dst, #0 \t// replicate2F" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_addi); + __ li($dst$$Register, 0x0); + %} + ins_pipe(pipe_class_default); +%} + +// ============================================================================ +// Safepoint Instruction + +instruct safePoint_poll(iRegPdst poll) %{ + match(SafePoint poll); + predicate(LoadPollAddressFromThread); + + // It caused problems to add the effect that r0 is killed, but this + // effect no longer needs to be mentioned, since r0 is not contained + // in a reg_class. + + format %{ "LD R0, #0, $poll \t// Safepoint poll for GC" %} + size(4); + ins_encode( enc_poll(0x0, poll) ); + ins_pipe(pipe_class_default); +%} + +// Safepoint without per-thread support. Load address of page to poll +// as constant. +// Rscratch2RegP is R12. +// LoadConPollAddr node is added in pd_post_matching_hook(). It must be +// a seperate node so that the oop map is at the right location. +instruct safePoint_poll_conPollAddr(rscratch2RegP poll) %{ + match(SafePoint poll); + predicate(!LoadPollAddressFromThread); + + // It caused problems to add the effect that r0 is killed, but this + // effect no longer needs to be mentioned, since r0 is not contained + // in a reg_class. + + format %{ "LD R12, addr of polling page\n\t" + "LD R0, #0, R12 \t// Safepoint poll for GC" %} + ins_encode( enc_poll(0x0, poll) ); + ins_pipe(pipe_class_default); +%} + +// ============================================================================ +// Call Instructions + +// Call Java Static Instruction + +// Schedulable version of call static node. +instruct CallStaticJavaDirect(method meth) %{ + match(CallStaticJava); + effect(USE meth); + predicate(!((CallStaticJavaNode*)n)->is_method_handle_invoke()); + ins_cost(CALL_COST); + + ins_num_consts(3 /* up to 3 patchable constants: inline cache, 2 call targets. */); + + format %{ "CALL,static $meth \t// ==> " %} + size(4); + ins_encode( enc_java_static_call(meth) ); + ins_pipe(pipe_class_call); +%} + +// Schedulable version of call static node. +instruct CallStaticJavaDirectHandle(method meth) %{ + match(CallStaticJava); + effect(USE meth); + predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke()); + ins_cost(CALL_COST); + + ins_num_consts(3 /* up to 3 patchable constants: inline cache, 2 call targets. */); + + format %{ "CALL,static $meth \t// ==> " %} + ins_encode( enc_java_handle_call(meth) ); + ins_pipe(pipe_class_call); +%} + +// Call Java Dynamic Instruction + +// Used by postalloc expand of CallDynamicJavaDirectSchedEx (actual call). +// Loading of IC was postalloc expanded. The nodes loading the IC are reachable +// via fields ins_field_load_ic_hi_node and ins_field_load_ic_node. +// The call destination must still be placed in the constant pool. +instruct CallDynamicJavaDirectSched(method meth) %{ + match(CallDynamicJava); // To get all the data fields we need ... + effect(USE meth); + predicate(false); // ... but never match. + + ins_field_load_ic_hi_node(loadConL_hiNode*); + ins_field_load_ic_node(loadConLNode*); + ins_num_consts(1 /* 1 patchable constant: call destination */); + + format %{ "BL \t// dynamic $meth ==> " %} + size(4); + ins_encode( enc_java_dynamic_call_sched(meth) ); + ins_pipe(pipe_class_call); +%} + +// Schedulable (i.e. postalloc expanded) version of call dynamic java. +// We use postalloc expanded calls if we use inline caches +// and do not update method data. +// +// This instruction has two constants: inline cache (IC) and call destination. +// Loading the inline cache will be postalloc expanded, thus leaving a call with +// one constant. +instruct CallDynamicJavaDirectSched_Ex(method meth) %{ + match(CallDynamicJava); + effect(USE meth); + predicate(UseInlineCaches); + ins_cost(CALL_COST); + + ins_num_consts(2 /* 2 patchable constants: inline cache, call destination. */); + + format %{ "CALL,dynamic $meth \t// postalloc expanded" %} + postalloc_expand( postalloc_expand_java_dynamic_call_sched(meth, constanttablebase) ); +%} + +// Compound version of call dynamic java +// We use postalloc expanded calls if we use inline caches +// and do not update method data. +instruct CallDynamicJavaDirect(method meth) %{ + match(CallDynamicJava); + effect(USE meth); + predicate(!UseInlineCaches); + ins_cost(CALL_COST); + + // Enc_java_to_runtime_call needs up to 4 constants (method data oop). + ins_num_consts(4); + + format %{ "CALL,dynamic $meth \t// ==> " %} + ins_encode( enc_java_dynamic_call(meth, constanttablebase) ); + ins_pipe(pipe_class_call); +%} + +// Call Runtime Instruction + +instruct CallRuntimeDirect(method meth) %{ + match(CallRuntime); + effect(USE meth); + ins_cost(CALL_COST); + + // Enc_java_to_runtime_call needs up to 3 constants: call target, + // env for callee, C-toc. + ins_num_consts(3); + + format %{ "CALL,runtime" %} + ins_encode( enc_java_to_runtime_call(meth) ); + ins_pipe(pipe_class_call); +%} + +// Call Leaf + +// Used by postalloc expand of CallLeafDirect_Ex (mtctr). +instruct CallLeafDirect_mtctr(iRegLdst dst, iRegLsrc src) %{ + effect(DEF dst, USE src); + + ins_num_consts(1); + + format %{ "MTCTR $src" %} + size(4); + ins_encode( enc_leaf_call_mtctr(src) ); + ins_pipe(pipe_class_default); +%} + +// Used by postalloc expand of CallLeafDirect_Ex (actual call). +instruct CallLeafDirect(method meth) %{ + match(CallLeaf); // To get the data all the data fields we need ... + effect(USE meth); + predicate(false); // but never match. + + format %{ "BCTRL \t// leaf call $meth ==> " %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_bctrl); + __ bctrl(); + %} + ins_pipe(pipe_class_call); +%} + +// postalloc expand of CallLeafDirect. +// Load adress to call from TOC, then bl to it. +instruct CallLeafDirect_Ex(method meth) %{ + match(CallLeaf); + effect(USE meth); + ins_cost(CALL_COST); + + // Postalloc_expand_java_to_runtime_call needs up to 3 constants: call target, + // env for callee, C-toc. + ins_num_consts(3); + + format %{ "CALL,runtime leaf $meth \t// postalloc expanded" %} + postalloc_expand( postalloc_expand_java_to_runtime_call(meth, constanttablebase) ); +%} + +// Call runtime without safepoint - same as CallLeaf. +// postalloc expand of CallLeafNoFPDirect. +// Load adress to call from TOC, then bl to it. +instruct CallLeafNoFPDirect_Ex(method meth) %{ + match(CallLeafNoFP); + effect(USE meth); + ins_cost(CALL_COST); + + // Enc_java_to_runtime_call needs up to 3 constants: call target, + // env for callee, C-toc. + ins_num_consts(3); + + format %{ "CALL,runtime leaf nofp $meth \t// postalloc expanded" %} + postalloc_expand( postalloc_expand_java_to_runtime_call(meth, constanttablebase) ); +%} + +// Tail Call; Jump from runtime stub to Java code. +// Also known as an 'interprocedural jump'. +// Target of jump will eventually return to caller. +// TailJump below removes the return address. +instruct TailCalljmpInd(iRegPdstNoScratch jump_target, inline_cache_regP method_oop) %{ + match(TailCall jump_target method_oop); + ins_cost(CALL_COST); + + format %{ "MTCTR $jump_target \t// $method_oop holds method oop\n\t" + "BCTR \t// tail call" %} + size(8); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ mtctr($jump_target$$Register); + __ bctr(); + %} + ins_pipe(pipe_class_call); +%} + +// Return Instruction +instruct Ret() %{ + match(Return); + format %{ "BLR \t// branch to link register" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_blr); + // LR is restored in MachEpilogNode. Just do the RET here. + __ blr(); + %} + ins_pipe(pipe_class_default); +%} + +// Tail Jump; remove the return address; jump to target. +// TailCall above leaves the return address around. +// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2). +// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a +// "restore" before this instruction (in Epilogue), we need to materialize it +// in %i0. +instruct tailjmpInd(iRegPdstNoScratch jump_target, rarg1RegP ex_oop) %{ + match(TailJump jump_target ex_oop); + ins_cost(CALL_COST); + + format %{ "LD R4_ARG2 = LR\n\t" + "MTCTR $jump_target\n\t" + "BCTR \t// TailJump, exception oop: $ex_oop" %} + size(12); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ ld(R4_ARG2/* issuing pc */, _abi(lr), R1_SP); + __ mtctr($jump_target$$Register); + __ bctr(); + %} + ins_pipe(pipe_class_call); +%} + +// Create exception oop: created by stack-crawling runtime code. +// Created exception is now available to this handler, and is setup +// just prior to jumping to this handler. No code emitted. +instruct CreateException(rarg1RegP ex_oop) %{ + match(Set ex_oop (CreateEx)); + ins_cost(0); + + format %{ " -- \t// exception oop; no code emitted" %} + size(0); + ins_encode( /*empty*/ ); + ins_pipe(pipe_class_default); +%} + +// Rethrow exception: The exception oop will come in the first +// argument position. Then JUMP (not call) to the rethrow stub code. +instruct RethrowException() %{ + match(Rethrow); + ins_cost(CALL_COST); + + format %{ "Jmp rethrow_stub" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + cbuf.set_insts_mark(); + __ b64_patchable((address)OptoRuntime::rethrow_stub(), relocInfo::runtime_call_type); + %} + ins_pipe(pipe_class_call); +%} + +// Die now. +instruct ShouldNotReachHere() %{ + match(Halt); + ins_cost(CALL_COST); + + format %{ "ShouldNotReachHere" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_tdi); + __ trap_should_not_reach_here(); + %} + ins_pipe(pipe_class_default); +%} + +// This name is KNOWN by the ADLC and cannot be changed. The ADLC +// forces a 'TypeRawPtr::BOTTOM' output type for this guy. +// Get a DEF on threadRegP, no costs, no encoding, use +// 'ins_should_rematerialize(true)' to avoid spilling. +instruct tlsLoadP(threadRegP dst) %{ + match(Set dst (ThreadLocal)); + ins_cost(0); + + ins_should_rematerialize(true); + + format %{ " -- \t// $dst=Thread::current(), empty" %} + size(0); + ins_encode( /*empty*/ ); + ins_pipe(pipe_class_empty); +%} + +//---Some PPC specific nodes--------------------------------------------------- + +// Stop a group. +instruct endGroup() %{ + ins_cost(0); + + ins_is_nop(true); + + format %{ "End Bundle (ori r1, r1, 0)" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_endgroup); + __ endgroup(); + %} + ins_pipe(pipe_class_default); +%} + +// Nop instructions + +instruct fxNop() %{ + ins_cost(0); + + ins_is_nop(true); + + format %{ "fxNop" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fmr); + __ nop(); + %} + ins_pipe(pipe_class_default); +%} + +instruct fpNop0() %{ + ins_cost(0); + + ins_is_nop(true); + + format %{ "fpNop0" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fmr); + __ fpnop0(); + %} + ins_pipe(pipe_class_default); +%} + +instruct fpNop1() %{ + ins_cost(0); + + ins_is_nop(true); + + format %{ "fpNop1" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_fmr); + __ fpnop1(); + %} + ins_pipe(pipe_class_default); +%} + +instruct brNop0() %{ + ins_cost(0); + size(4); + format %{ "brNop0" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_mcrf); + __ brnop0(); + %} + ins_is_nop(true); + ins_pipe(pipe_class_default); +%} + +instruct brNop1() %{ + ins_cost(0); + + ins_is_nop(true); + + format %{ "brNop1" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_mcrf); + __ brnop1(); + %} + ins_pipe(pipe_class_default); +%} + +instruct brNop2() %{ + ins_cost(0); + + ins_is_nop(true); + + format %{ "brNop2" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_mcrf); + __ brnop2(); + %} + ins_pipe(pipe_class_default); +%} + +//----------PEEPHOLE RULES----------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions. +// +// peepmatch ( root_instr_name [preceeding_instruction]* ); +// +// peepconstraint %{ +// (instruction_number.operand_name relational_op instruction_number.operand_name +// [, ...] ); +// // instruction numbers are zero-based using left to right order in peepmatch +// +// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); +// // provide an instruction_number.operand_name for each operand that appears +// // in the replacement instruction's match rule +// +// ---------VM FLAGS--------------------------------------------------------- +// +// All peephole optimizations can be turned off using -XX:-OptoPeephole +// +// Each peephole rule is given an identifying number starting with zero and +// increasing by one in the order seen by the parser. An individual peephole +// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# +// on the command-line. +// +// ---------CURRENT LIMITATIONS---------------------------------------------- +// +// Only match adjacent instructions in same basic block +// Only equality constraints +// Only constraints between operands, not (0.dest_reg == EAX_enc) +// Only one replacement instruction +// +// ---------EXAMPLE---------------------------------------------------------- +// +// // pertinent parts of existing instructions in architecture description +// instruct movI(eRegI dst, eRegI src) %{ +// match(Set dst (CopyI src)); +// %} +// +// instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{ +// match(Set dst (AddI dst src)); +// effect(KILL cr); +// %} +// +// // Change (inc mov) to lea +// peephole %{ +// // increment preceeded by register-register move +// peepmatch ( incI_eReg movI ); +// // require that the destination register of the increment +// // match the destination register of the move +// peepconstraint ( 0.dst == 1.dst ); +// // construct a replacement instruction that sets +// // the destination to ( move's source register + one ) +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); +// %} +// +// Implementation no longer uses movX instructions since +// machine-independent system no longer uses CopyX nodes. +// +// peephole %{ +// peepmatch ( incI_eReg movI ); +// peepconstraint ( 0.dst == 1.dst ); +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); +// %} +// +// peephole %{ +// peepmatch ( decI_eReg movI ); +// peepconstraint ( 0.dst == 1.dst ); +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); +// %} +// +// peephole %{ +// peepmatch ( addI_eReg_imm movI ); +// peepconstraint ( 0.dst == 1.dst ); +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); +// %} +// +// peephole %{ +// peepmatch ( addP_eReg_imm movP ); +// peepconstraint ( 0.dst == 1.dst ); +// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); +// %} + +// // Change load of spilled value to only a spill +// instruct storeI(memory mem, eRegI src) %{ +// match(Set mem (StoreI mem src)); +// %} +// +// instruct loadI(eRegI dst, memory mem) %{ +// match(Set dst (LoadI mem)); +// %} +// +peephole %{ + peepmatch ( loadI storeI ); + peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); + peepreplace ( storeI( 1.mem 1.mem 1.src ) ); +%} + +peephole %{ + peepmatch ( loadL storeL ); + peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); + peepreplace ( storeL( 1.mem 1.mem 1.src ) ); +%} + +peephole %{ + peepmatch ( loadP storeP ); + peepconstraint ( 1.src == 0.dst, 1.dst == 0.mem ); + peepreplace ( storeP( 1.dst 1.dst 1.src ) ); +%} + +//----------SMARTSPILL RULES--------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions.