# HG changeset patch # User S.Bharadwaj Yadavalli # Date 1379475342 14400 # Node ID 03fe11f5f18694f13be20e67f1912f4f3da1ff37 # Parent 4eec2ac671c22b67410da1fac3ae439c44599d57 PTX Codegen: predicate register materialization and declaration; conditional branch generation; fix register declaration. diff -r 4eec2ac671c2 -r 03fe11f5f186 graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java --- a/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java Tue Sep 17 18:36:54 2013 -0700 +++ b/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java Tue Sep 17 23:35:42 2013 -0400 @@ -39,14 +39,6 @@ super(target); } - public final void at() { - emitString("@%p" + " " + ""); - } - - public final void atq() { - emitString("@%q" + " " + ""); - } - public static class StandardFormat { protected Kind valueKind; @@ -287,8 +279,8 @@ } // Checkstyle: stop method name check - public final void bra(String tgt) { - emitString("bra" + " " + tgt + ";" + ""); + public final void bra(String tgt, int pred) { + emitString((pred >= 0) ? "" : ("@%p" + pred + " ") + "bra" + " " + tgt + ";" + ""); } public final void bra_uni(String tgt) { @@ -573,228 +565,228 @@ emitString("ret.uni;" + " " + ""); } - public final void setp_eq_f32(Register a, Register b) { - emitString("setp.eq.f32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void setp_eq_f32(Register a, Register b, int p) { + emitString("setp.eq.f32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void setp_ne_f32(Register a, Register b) { - emitString("setp.ne.f32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void setp_ne_f32(Register a, Register b, int p) { + emitString("setp.ne.f32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void setp_lt_f32(Register a, Register b) { - emitString("setp.lt.f32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void setp_lt_f32(Register a, Register b, int p) { + emitString("setp.lt.f32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void setp_le_f32(Register a, Register b) { - emitString("setp.le.f32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void setp_le_f32(Register a, Register b, int p) { + emitString("setp.le.f32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void setp_gt_f32(Register a, Register b) { - emitString("setp.gt.f32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void setp_gt_f32(Register a, Register b, int p) { + emitString("setp.gt.f32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void setp_ge_f32(Register a, Register b) { - emitString("setp.ge.f32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void setp_ge_f32(Register a, Register b, int p) { + emitString("setp.ge.f32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void setp_eq_f32(float f32, Register b) { - emitString("setp.eq.f32" + " " + "%p" + ", " + f32 + ", %r" + b.encoding() + ";" + ""); + public final void setp_eq_f32(float f32, Register b, int p) { + emitString("setp.eq.f32" + " " + "%p" + p + ", " + f32 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_ne_f32(float f32, Register b) { - emitString("setp.ne.f32" + " " + "%p" + ", " + f32 + ", %r" + b.encoding() + ";" + ""); + public final void setp_ne_f32(float f32, Register b, int p) { + emitString("setp.ne.f32" + " " + "%p" + p + ", " + f32 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_lt_f32(float f32, Register b) { - emitString("setp.lt.f32" + " " + "%p" + ", " + f32 + ", %r" + b.encoding() + ";" + ""); + public final void setp_lt_f32(float f32, Register b, int p) { + emitString("setp.lt.f32" + " " + "%p" + p + ", " + f32 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_le_f32(float f32, Register b) { - emitString("setp.le.f32" + " " + "%p" + ", " + f32 + ", %r" + b.encoding() + ";" + ""); + public final void setp_le_f32(float f32, Register b, int p) { + emitString("setp.le.f32" + " " + "%p" + p + ", " + f32 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_gt_f32(float f32, Register b) { - emitString("setp.gt.f32" + " " + "%p" + ", " + f32 + ", %r" + b.encoding() + ";" + ""); + public final void setp_gt_f32(float f32, Register b, int p) { + emitString("setp.gt.f32" + " " + "%p" + p + ", " + f32 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_ge_f32(float f32, Register b) { - emitString("setp.ge.f32" + " " + "%p" + ", " + f32 + ", %r" + b.encoding() + ";" + ""); + public final void setp_ge_f32(float f32, Register b, int p) { + emitString("setp.ge.f32" + " " + "%p" + p + ", " + f32 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_eq_f64(double f64, Register b) { - emitString("setp.eq.f64" + " " + "%p" + ", " + f64 + ", %r" + b.encoding() + ";" + ""); + public final void setp_eq_f64(double f64, Register b, int p) { + emitString("setp.eq.f64" + " " + "%p" + p + ", " + f64 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_ne_f64(double f64, Register b) { - emitString("setp.ne.f64" + " " + "%p" + ", " + f64 + ", %r" + b.encoding() + ";" + ""); + public final void setp_ne_f64(double f64, Register b, int p) { + emitString("setp.ne.f64" + " " + "%p" + p + ", " + f64 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_lt_f64(double f64, Register b) { - emitString("setp.lt.f64" + " " + "%p" + ", " + f64 + ", %r" + b.encoding() + ";" + ""); + public final void setp_lt_f64(double f64, Register b, int p) { + emitString("setp.lt.f64" + " " + "%p" + p + ", " + f64 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_le_f64(double f64, Register b) { - emitString("setp.le.f64" + " " + "%p" + ", " + f64 + ", %r" + b.encoding() + ";" + ""); + public final void setp_le_f64(double f64, Register b, int p) { + emitString("setp.le.f64" + " " + "%p" + p + ", " + f64 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_gt_f64(double f64, Register b) { - emitString("setp.gt.f64" + " " + "%p" + ", " + f64 + ", %r" + b.encoding() + ";" + ""); + public final void setp_gt_f64(double f64, Register b, int p) { + emitString("setp.gt.f64" + " " + "%p" + p + ", " + f64 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_ge_f64(double f64, Register b) { - emitString("setp.ge.f64" + " " + "%p" + ", " + f64 + ", %r" + b.encoding() + ";" + ""); + public final void setp_ge_f64(double f64, Register b, int p) { + emitString("setp.ge.f64" + " " + "%p" + p + ", " + f64 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_eq_s64(Register a, Register b) { - emitString("setp.eq.s64" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void setp_eq_s64(Register a, Register b, int p) { + emitString("setp.eq.s64" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void setp_eq_s64(long s64, Register b) { - emitString("setp.eq.s64" + " " + "%p" + ", " + s64 + ", %r" + b.encoding() + ";" + ""); + public final void setp_eq_s64(long s64, Register b, int p) { + emitString("setp.eq.s64" + " " + "%p" + p + ", " + s64 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_eq_s32(Register a, Register b) { - emitString("setp.eq.s32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void setp_eq_s32(Register a, Register b, int p) { + emitString("setp.eq.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void setp_ne_s32(Register a, Register b) { - emitString("setp.ne.s32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void setp_ne_s32(Register a, Register b, int p) { + emitString("setp.ne.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void setp_lt_s32(Register a, Register b) { - emitString("setp.lt.s32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void setp_lt_s32(Register a, Register b, int p) { + emitString("setp.lt.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void setp_le_s32(Register a, Register b) { - emitString("setp.le.s32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void setp_le_s32(Register a, Register b, int p) { + emitString("setp.le.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void setp_gt_s32(Register a, Register b) { - emitString("setp.gt.s32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void setp_gt_s32(Register a, Register b, int p) { + emitString("setp.gt.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void setp_ge_s32(Register a, Register b) { - emitString("setp.ge.s32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void setp_ge_s32(Register a, Register b, int p) { + emitString("setp.ge.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void setp_eq_s32(Register a, int s32) { - emitString("setp.eq.s32" + " " + "%p" + ", %r" + a.encoding() + ", " + s32 + ";" + ""); + public final void setp_eq_s32(Register a, int s32, int p) { + emitString("setp.eq.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + s32 + ";" + ""); } - public final void setp_ne_s32(Register a, int s32) { - emitString("setp.ne.s32" + " " + "%p" + ", %r" + a.encoding() + ", " + s32 + ";" + ""); + public final void setp_ne_s32(Register a, int s32, int p) { + emitString("setp.ne.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + s32 + ";" + ""); } - public final void setp_lt_s32(Register a, int s32) { - emitString("setp.lt.s32" + " " + "%p" + ", %r" + a.encoding() + ", " + s32 + ";" + ""); + public final void setp_lt_s32(Register a, int s32, int p) { + emitString("setp.lt.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + s32 + ";" + ""); } - public final void setp_le_s32(Register a, int s32) { - emitString("setp.le.s32" + " " + "%p" + ", %r" + a.encoding() + ", " + s32 + ";" + ""); + public final void setp_le_s32(Register a, int s32, int p) { + emitString("setp.le.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + s32 + ";" + ""); } - public final void setp_gt_s32(Register a, int s32) { - emitString("setp.gt.s32" + " " + "%p" + ", %r" + a.encoding() + ", " + s32 + ";" + ""); + public final void setp_gt_s32(Register a, int s32, int p) { + emitString("setp.gt.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + s32 + ";" + ""); } - public final void setp_ge_s32(Register a, int s32) { - emitString("setp.ge.s32" + " " + "%p" + ", %r" + a.encoding() + ", " + s32 + ";" + ""); + public final void setp_ge_s32(Register a, int s32, int p) { + emitString("setp.ge.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + s32 + ";" + ""); } - public final void setp_eq_s32(int s32, Register b) { - emitString("setp.eq.s32" + " " + "%p" + ", " + s32 + ", %r" + b.encoding() + ";" + ""); + public final void setp_eq_s32(int s32, Register b, int p) { + emitString("setp.eq.s32" + " " + "%p" + p + ", " + s32 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_ne_s32(int s32, Register b) { - emitString("setp.ne.s32" + " " + "%p" + ", " + s32 + ", %r" + b.encoding() + ";" + ""); + public final void setp_ne_s32(int s32, Register b, int p) { + emitString("setp.ne.s32" + " " + "%p" + p + ", " + s32 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_lt_s32(int s32, Register b) { - emitString("setp.lt.s32" + " " + "%p" + ", " + s32 + ", %r" + b.encoding() + ";" + ""); + public final void setp_lt_s32(int s32, Register b, int p) { + emitString("setp.lt.s32" + " " + "%p" + p + ", " + s32 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_le_s32(int s32, Register b) { - emitString("setp.le.s32" + " " + "%p" + ", " + s32 + ", %r" + b.encoding() + ";" + ""); + public final void setp_le_s32(int s32, Register b, int p) { + emitString("setp.le.s32" + " " + "%p" + p + ", " + s32 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_gt_s32(int s32, Register b) { - emitString("setp.gt.s32" + " " + "%p" + ", " + s32 + ", %r" + b.encoding() + ";" + ""); + public final void setp_gt_s32(int s32, Register b, int p) { + emitString("setp.gt.s32" + " " + "%p" + p + ", " + s32 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_ge_s32(int s32, Register b) { - emitString("setp.ge.s32" + " " + "%p" + ", " + s32 + ", %r" + b.encoding() + ";" + ""); + public final void setp_ge_s32(int s32, Register b, int p) { + emitString("setp.ge.s32" + " " + "%p" + p + ", " + s32 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_eq_u32(Register a, Register b) { - emitString("setp.eq.u32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void setp_eq_u32(Register a, Register b, int p) { + emitString("setp.eq.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void setp_ne_u32(Register a, Register b) { - emitString("setp.ne.u32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void setp_ne_u32(Register a, Register b, int p) { + emitString("setp.ne.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void setp_lt_u32(Register a, Register b) { - emitString("setp.lt.u32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void setp_lt_u32(Register a, Register b, int p) { + emitString("setp.lt.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void setp_le_u32(Register a, Register b) { - emitString("setp.le.u32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void setp_le_u32(Register a, Register b, int p) { + emitString("setp.le.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void setp_gt_u32(Register a, Register b) { - emitString("setp.gt.u32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void setp_gt_u32(Register a, Register b, int p) { + emitString("setp.gt.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void setp_ge_u32(Register a, Register b) { - emitString("setp.ge.u32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void setp_ge_u32(Register a, Register b, int p) { + emitString("setp.ge.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void setp_eq_u32(Register a, int u32) { - emitString("setp.eq.u32" + " " + "%p" + ", %r" + a.encoding() + ", " + u32 + ";" + ""); + public final void setp_eq_u32(Register a, int u32, int p) { + emitString("setp.eq.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + u32 + ";" + ""); } - public final void setp_ne_u32(Register a, int u32) { - emitString("setp.ne.u32" + " " + "%p" + ", %r" + a.encoding() + ", " + u32 + ";" + ""); + public final void setp_ne_u32(Register a, int u32, int p) { + emitString("setp.ne.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + u32 + ";" + ""); } - public final void setp_lt_u32(Register a, int u32) { - emitString("setp.lt.u32" + " " + "%p" + ", %r" + a.encoding() + ", " + u32 + ";" + ""); + public final void setp_lt_u32(Register a, int u32, int p) { + emitString("setp.lt.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + u32 + ";" + ""); } - public final void setp_le_u32(Register a, int u32) { - emitString("setp.le.u32" + " " + "%p" + ", %r" + a.encoding() + ", " + u32 + ";" + ""); + public final void setp_le_u32(Register a, int u32, int p) { + emitString("setp.le.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + u32 + ";" + ""); } - public final void setp_gt_u32(Register a, int u32) { - emitString("setp.gt.u32" + " " + "%p" + ", %r" + a.encoding() + ", " + u32 + ";" + ""); + public final void setp_gt_u32(Register a, int u32, int p) { + emitString("setp.gt.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + u32 + ";" + ""); } - public final void setp_ge_u32(Register a, int u32) { - emitString("setp.ge.u32" + " " + "%p" + ", %r" + a.encoding() + ", " + u32 + ";" + ""); + public final void setp_ge_u32(Register a, int u32, int p) { + emitString("setp.ge.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + u32 + ";" + ""); } - public final void setp_eq_u32(int u32, Register b) { - emitString("setp.eq.u32" + " " + "%p" + ", " + u32 + ", %r" + b.encoding() + ";" + ""); + public final void setp_eq_u32(int u32, Register b, int p) { + emitString("setp.eq.u32" + " " + "%p" + p + ", " + u32 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_ne_u32(int u32, Register b) { - emitString("setp.ne.u32" + " " + "%p" + ", " + u32 + ", %r" + b.encoding() + ";" + ""); + public final void setp_ne_u32(int u32, Register b, int p) { + emitString("setp.ne.u32" + " " + "%p" + p + ", " + u32 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_lt_u32(int u32, Register b) { - emitString("setp.lt.u32" + " " + "%p" + ", " + u32 + ", %r" + b.encoding() + ";" + ""); + public final void setp_lt_u32(int u32, Register b, int p) { + emitString("setp.lt.u32" + " " + "%p" + p + ", " + u32 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_le_u32(int u32, Register b) { - emitString("setp.le.u32" + " " + "%p" + ", " + u32 + ", %r" + b.encoding() + ";" + ""); + public final void setp_le_u32(int u32, Register b, int p) { + emitString("setp.le.u32" + " " + "%p" + p + ", " + u32 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_gt_u32(int u32, Register b) { - emitString("setp.gt.u32" + " " + "%p" + ", " + u32 + ", %r" + b.encoding() + ";" + ""); + public final void setp_gt_u32(int u32, Register b, int p) { + emitString("setp.gt.u32" + " " + "%p" + p + ", " + u32 + ", %r" + b.encoding() + ";" + ""); } - public final void setp_ge_u32(int u32, Register b) { - emitString("setp.ge.u32" + " " + "%p" + ", " + u32 + ", %r" + b.encoding() + ";" + ""); + public final void setp_ge_u32(int u32, Register b, int p) { + emitString("setp.ge.u32" + " " + "%p" + p + ", " + u32 + ", %r" + b.encoding() + ";" + ""); } // Store in global state space diff -r 4eec2ac671c2 -r 03fe11f5f186 graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java --- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java Tue Sep 17 18:36:54 2013 -0700 +++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java Tue Sep 17 23:35:42 2013 -0400 @@ -81,7 +81,7 @@ @Override public TargetMethodAssembler newAssembler(LIRGenerator lirGen, CompilationResult compilationResult) { - // Omit the frame if the method: + // Omit the frame of the method: // - has no spill slots or other slots allocated during register allocation // - has no callee-saved registers // - has no incoming arguments passed on the stack @@ -90,14 +90,13 @@ AbstractAssembler masm = createAssembler(frameMap); HotSpotFrameContext frameContext = new HotSpotFrameContext(); TargetMethodAssembler tasm = new PTXTargetMethodAssembler(target, runtime(), frameMap, masm, frameContext, compilationResult); - tasm.setFrameSize(frameMap.frameSize()); + tasm.setFrameSize(0); return tasm; } - private static void emitKernelEntry(TargetMethodAssembler tasm, LIRGenerator lirGen, - ResolvedJavaMethod codeCacheOwner) { + private static void emitKernelEntry(TargetMethodAssembler tasm, LIRGenerator lirGen, ResolvedJavaMethod codeCacheOwner) { // Emit PTX kernel entry text based on PTXParameterOp - // instructions in the start block. Remove the instructions + // instructions in the start block. Remove the instructions // once kernel entry text and directives are emitted to // facilitate seemless PTX code generation subsequently. assert codeCacheOwner != null : lirGen.getGraph() + " is not associated with a method"; @@ -154,23 +153,39 @@ RegisterValue regVal = (RegisterValue) value; Kind regKind = regVal.getKind(); switch (regKind) { - case Int: - signed32.add(regVal.getRegister().encoding()); - break; - case Long: - signed64.add(regVal.getRegister().encoding()); - break; - case Float: - float32.add(regVal.getRegister().encoding()); - break; - case Double: - float64.add(regVal.getRegister().encoding()); - break; - case Object: - signed64.add(regVal.getRegister().encoding()); - break; - default : - throw GraalInternalError.shouldNotReachHere("unhandled register type " + value.toString()); + case Int: + // If the register was used as a wider signed type + // do not add it here + if (!signed64.contains(regVal.getRegister().encoding())) { + signed32.add(regVal.getRegister().encoding()); + } + break; + case Long: + case Object: + // If the register was used as a narrower signed type + // remove it from there and add it to wider type. + if (signed32.contains(regVal.getRegister().encoding())) { + signed32.remove(regVal.getRegister().encoding()); + } + signed64.add(regVal.getRegister().encoding()); + break; + case Float: + // If the register was used as a wider signed type + // do not add it here + if (!float64.contains(regVal.getRegister().encoding())) { + float32.add(regVal.getRegister().encoding()); + } + break; + case Double: + // If the register was used as a narrower signed type + // remove it from there and add it to wider type. + if (float32.contains(regVal.getRegister().encoding())) { + float32.remove(regVal.getRegister().encoding()); + } + float64.add(regVal.getRegister().encoding()); + break; + default: + throw GraalInternalError.shouldNotReachHere("unhandled register type " + value.toString()); } } return value; @@ -200,6 +215,11 @@ for (Integer i : float64) { codeBuffer.emitString(".reg .f64 %r" + i.intValue() + ";"); } + // emit predicate register declaration + int maxPredRegNum = ((PTXLIRGenerator) lirGen).getNextPredRegNumber(); + if (maxPredRegNum > 0) { + codeBuffer.emitString(".reg .pred %p<" + maxPredRegNum + ">;"); + } } @Override diff -r 4eec2ac671c2 -r 03fe11f5f186 graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXLIRGenerator.java --- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXLIRGenerator.java Tue Sep 17 18:36:54 2013 -0700 +++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXLIRGenerator.java Tue Sep 17 23:35:42 2013 -0400 @@ -45,8 +45,6 @@ import com.oracle.graal.lir.ptx.PTXArithmetic.Unary2Op; import com.oracle.graal.lir.ptx.PTXCompare.CompareOp; import com.oracle.graal.lir.ptx.PTXControlFlow.BranchOp; -import com.oracle.graal.lir.ptx.PTXControlFlow.CondMoveOp; -import com.oracle.graal.lir.ptx.PTXControlFlow.FloatCondMoveOp; import com.oracle.graal.lir.ptx.PTXControlFlow.ReturnOp; import com.oracle.graal.lir.ptx.PTXControlFlow.ReturnNoValOp; import com.oracle.graal.lir.ptx.PTXControlFlow.SequentialSwitchOp; @@ -68,6 +66,11 @@ */ public class PTXLIRGenerator extends LIRGenerator { + // Number of the predicate register that can be used when needed. + // This value will be recorded and incremented in the LIR instruction + // that sets a predicate register. (e.g., CompareOp) + private int nextPredRegNum; + public static final ForeignCallDescriptor ARITHMETIC_FREM = new ForeignCallDescriptor("arithmeticFrem", float.class, float.class, float.class); public static final ForeignCallDescriptor ARITHMETIC_DREM = new ForeignCallDescriptor("arithmeticDrem", double.class, double.class, double.class); @@ -82,6 +85,11 @@ public PTXLIRGenerator(StructuredGraph graph, CodeCacheProvider runtime, TargetDescription target, FrameMap frameMap, CallingConvention cc, LIR lir) { super(graph, runtime, target, frameMap, cc, lir); lir.spillMoveFactory = new PTXSpillMoveFactory(); + nextPredRegNum = 0; + } + + public int getNextPredRegNumber() { + return nextPredRegNum; } @Override @@ -231,24 +239,24 @@ public void emitCompareBranch(Value left, Value right, Condition cond, boolean unorderedIsTrue, LabelRef label) { switch (left.getKind().getStackKind()) { case Int: - append(new CompareOp(ICMP, cond, left, right)); - append(new BranchOp(cond, label)); + append(new CompareOp(ICMP, cond, left, right, nextPredRegNum)); + append(new BranchOp(cond, label, nextPredRegNum++)); break; case Long: - append(new CompareOp(LCMP, cond, left, right)); - append(new BranchOp(cond, label)); + append(new CompareOp(LCMP, cond, left, right, nextPredRegNum)); + append(new BranchOp(cond, label, nextPredRegNum++)); break; case Float: - append(new CompareOp(FCMP, cond, left, right)); - append(new BranchOp(cond, label)); + append(new CompareOp(FCMP, cond, left, right, nextPredRegNum)); + append(new BranchOp(cond, label, nextPredRegNum++)); break; case Double: - append(new CompareOp(DCMP, cond, left, right)); - append(new BranchOp(cond, label)); + append(new CompareOp(DCMP, cond, left, right, nextPredRegNum)); + append(new BranchOp(cond, label, nextPredRegNum++)); break; case Object: - append(new CompareOp(ACMP, cond, left, right)); - append(new BranchOp(cond, label)); + append(new CompareOp(ACMP, cond, left, right, nextPredRegNum)); + append(new BranchOp(cond, label, nextPredRegNum++)); break; default: throw GraalInternalError.shouldNotReachHere("" + left.getKind()); @@ -267,69 +275,12 @@ @Override public Variable emitConditionalMove(Value left, Value right, Condition cond, boolean unorderedIsTrue, Value trueValue, Value falseValue) { - boolean mirrored = emitCompare(cond, left, right); - Condition finalCondition = mirrored ? cond.mirror() : cond; - - Variable result = newVariable(trueValue.getKind()); - switch (left.getKind().getStackKind()) { - case Int: - case Long: - case Object: - append(new CondMoveOp(result, finalCondition, load(trueValue), loadNonConst(falseValue))); - break; - case Float: - case Double: - append(new FloatCondMoveOp(result, finalCondition, unorderedIsTrue, load(trueValue), load(falseValue))); - break; - default: - throw GraalInternalError.shouldNotReachHere("missing: " + left.getKind()); - } - return result; + // TODO: There is no conventional conditional move instruction in PTX. + // So, this method is changed to throw NYI exception. + // To be revisited if this needs to be really implemented. + throw new InternalError("NYI"); } - /** - * This method emits the compare instruction, and may reorder the operands. It returns true if - * it did so. - * - * - * @param a the left operand of the comparison - * @param b the right operand of the comparison - * @return true if the left and right operands were switched, false otherwise - */ - private boolean emitCompare(Condition cond, Value a, Value b) { - Variable left; - Value right; - boolean mirrored; - if (LIRValueUtil.isVariable(b)) { - left = load(b); - right = loadNonConst(a); - mirrored = true; - } else { - left = load(a); - right = loadNonConst(b); - mirrored = false; - } - switch (left.getKind().getStackKind()) { - case Int: - append(new CompareOp(ICMP, cond, left, right)); - break; - case Long: - append(new CompareOp(LCMP, cond, left, right)); - break; - case Object: - append(new CompareOp(ACMP, cond, left, right)); - break; - case Float: - append(new CompareOp(FCMP, cond, left, right)); - break; - case Double: - append(new CompareOp(DCMP, cond, left, right)); - break; - default: - throw GraalInternalError.shouldNotReachHere(); - } - return mirrored; - } @Override public Variable emitIntegerTestMove(Value left, Value right, Value trueValue, Value falseValue) { @@ -755,10 +706,10 @@ // Making a copy of the switch value is necessary because jump table destroys the input // value if (key.getKind() == Kind.Int || key.getKind() == Kind.Long) { - append(new SequentialSwitchOp(keyConstants, keyTargets, defaultTarget, key, Value.ILLEGAL)); + append(new SequentialSwitchOp(keyConstants, keyTargets, defaultTarget, key, Value.ILLEGAL, nextPredRegNum)); } else { assert key.getKind() == Kind.Object : key.getKind(); - append(new SequentialSwitchOp(keyConstants, keyTargets, defaultTarget, key, newVariable(Kind.Object))); + append(new SequentialSwitchOp(keyConstants, keyTargets, defaultTarget, key, newVariable(Kind.Object), nextPredRegNum)); } } @@ -772,7 +723,7 @@ // Making a copy of the switch value is necessary because jump table destroys the input // value Variable tmp = emitMove(key); - append(new TableSwitchOp(lowKey, defaultTarget, targets, tmp, newVariable(target.wordKind))); + append(new TableSwitchOp(lowKey, defaultTarget, targets, tmp, newVariable(target.wordKind), nextPredRegNum++)); } @Override diff -r 4eec2ac671c2 -r 03fe11f5f186 graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotRegisterConfig.java --- a/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotRegisterConfig.java Tue Sep 17 18:36:54 2013 -0700 +++ b/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotRegisterConfig.java Tue Sep 17 23:35:42 2013 -0400 @@ -74,11 +74,8 @@ private static Register[] initAllocatable() { Register[] registers = new Register[] { - param0, param1, param2, param3, - param4, param5, param6, param7, r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15, - // retReg, }; return registers; diff -r 4eec2ac671c2 -r 03fe11f5f186 graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotRuntime.java --- a/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotRuntime.java Tue Sep 17 18:36:54 2013 -0700 +++ b/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotRuntime.java Tue Sep 17 23:35:42 2013 -0400 @@ -28,6 +28,8 @@ import com.oracle.graal.hotspot.*; import com.oracle.graal.hotspot.meta.*; import com.oracle.graal.nodes.spi.*; +import com.oracle.graal.graph.Node; +import com.oracle.graal.nodes.calc.ConvertNode; public class PTXHotSpotRuntime extends HotSpotRuntime { @@ -37,6 +39,20 @@ } @Override + public void lower(Node n, LoweringTool tool) { + if (n instanceof ConvertNode) { + // PTX has a cvt instruction that "takes a variety of + // operand types and sizes, as its job is to convert from + // nearly any data type to any other data type (and + // size)." [Section 6.2 of PTX ISA manual] + // So, there is no need to lower the operation. + return; + } else { + super.lower(n, tool); + } + } + + @Override public void registerReplacements(Replacements replacements) { //TODO: Do we need to implement this functionality for PTX? } diff -r 4eec2ac671c2 -r 03fe11f5f186 graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXCompare.java --- a/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXCompare.java Tue Sep 17 18:36:54 2013 -0700 +++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXCompare.java Tue Sep 17 23:35:42 2013 -0400 @@ -41,18 +41,21 @@ @Opcode private final PTXCompare opcode; @Use({REG, STACK, CONST}) protected Value x; @Use({REG, STACK, CONST}) protected Value y; + // Number of predicate register that would be set by this instruction. + protected int predRegNum; private final Condition condition; - public CompareOp(PTXCompare opcode, Condition condition, Value x, Value y) { + public CompareOp(PTXCompare opcode, Condition condition, Value x, Value y, int predReg) { this.opcode = opcode; this.condition = condition; this.x = x; this.y = y; + predRegNum = predReg; } @Override public void emitCode(TargetMethodAssembler tasm, PTXAssembler masm) { - emit(tasm, masm, opcode, condition, x, y); + emit(tasm, masm, opcode, condition, x, y, predRegNum); } @Override @@ -64,17 +67,19 @@ } } - public static void emit(TargetMethodAssembler tasm, PTXAssembler masm, PTXCompare opcode, Condition condition, Value x, Value y) { + public static void emit(TargetMethodAssembler tasm, PTXAssembler masm, + PTXCompare opcode, Condition condition, + Value x, Value y, int p) { if (isConstant(x)) { switch (opcode) { case ICMP: - emitCompareConstReg(masm, condition, tasm.asIntConst(x), asIntReg(y)); + emitCompareConstReg(masm, condition, tasm.asIntConst(x), asIntReg(y), p); break; case FCMP: - emitCompareConstReg(masm, condition, tasm.asFloatConst(x), asFloatReg(y)); + emitCompareConstReg(masm, condition, tasm.asFloatConst(x), asFloatReg(y), p); break; case DCMP: - emitCompareConstReg(masm, condition, tasm.asDoubleConst(x), asDoubleReg(y)); + emitCompareConstReg(masm, condition, tasm.asDoubleConst(x), asDoubleReg(y), p); break; default: throw GraalInternalError.shouldNotReachHere(); @@ -84,16 +89,16 @@ int b = tasm.asIntConst(y); switch (opcode) { case ICMP: - emitCompareRegConst(masm, condition, a, b); + emitCompareRegConst(masm, condition, a, b, p); break; case ACMP: if (((Constant) y).isNull()) { switch (condition) { case EQ: - masm.setp_eq_s32(a, b); + masm.setp_eq_s32(a, b, p); break; case NE: - masm.setp_ne_s32(a, b); + masm.setp_ne_s32(a, b, p); break; default: throw GraalInternalError.shouldNotReachHere(); @@ -108,16 +113,16 @@ } else { switch (opcode) { case ICMP: - emitCompareRegReg(masm, condition, asIntReg(x), asIntReg(y)); + emitCompareRegReg(masm, condition, asIntReg(x), asIntReg(y), p); break; case LCMP: - emitCompareRegReg(masm, condition, asLongReg(x), asLongReg(y)); + emitCompareRegReg(masm, condition, asLongReg(x), asLongReg(y), p); break; case FCMP: - emitCompareRegReg(masm, condition, asFloatReg(x), asFloatReg(y)); + emitCompareRegReg(masm, condition, asFloatReg(x), asFloatReg(y), p); break; case DCMP: - emitCompareRegReg(masm, condition, asDoubleReg(x), asDoubleReg(y)); + emitCompareRegReg(masm, condition, asDoubleReg(x), asDoubleReg(y), p); break; default: throw GraalInternalError.shouldNotReachHere("missing: " + opcode); @@ -125,161 +130,161 @@ } } - private static void emitCompareConstReg(PTXAssembler masm, Condition condition, float a, Register b) { + private static void emitCompareConstReg(PTXAssembler masm, Condition condition, float a, Register b, int p) { switch (condition) { case EQ: - masm.setp_eq_f32(a, b); + masm.setp_eq_f32(a, b, p); break; case NE: - masm.setp_ne_f32(a, b); + masm.setp_ne_f32(a, b, p); break; case LT: - masm.setp_lt_f32(a, b); + masm.setp_lt_f32(a, b, p); break; case LE: - masm.setp_le_f32(a, b); + masm.setp_le_f32(a, b, p); break; case GT: - masm.setp_gt_f32(a, b); + masm.setp_gt_f32(a, b, p); break; case GE: - masm.setp_ge_f32(a, b); + masm.setp_ge_f32(a, b, p); break; default: throw GraalInternalError.shouldNotReachHere(); } } - private static void emitCompareConstReg(PTXAssembler masm, Condition condition, double a, Register b) { + private static void emitCompareConstReg(PTXAssembler masm, Condition condition, double a, Register b, int p) { switch (condition) { case EQ: - masm.setp_eq_f64(a, b); + masm.setp_eq_f64(a, b, p); break; case NE: - masm.setp_ne_f64(a, b); + masm.setp_ne_f64(a, b, p); break; case LT: - masm.setp_lt_f64(a, b); + masm.setp_lt_f64(a, b, p); break; case LE: - masm.setp_le_f64(a, b); + masm.setp_le_f64(a, b, p); break; case GT: - masm.setp_gt_f64(a, b); + masm.setp_gt_f64(a, b, p); break; case GE: - masm.setp_ge_f64(a, b); + masm.setp_ge_f64(a, b, p); break; default: throw GraalInternalError.shouldNotReachHere(); } } - private static void emitCompareConstReg(PTXAssembler masm, Condition condition, int a, Register b) { + private static void emitCompareConstReg(PTXAssembler masm, Condition condition, int a, Register b, int p) { switch (condition) { case EQ: - masm.setp_eq_s32(a, b); + masm.setp_eq_s32(a, b, p); break; case NE: - masm.setp_ne_s32(a, b); + masm.setp_ne_s32(a, b, p); break; case LT: - masm.setp_lt_s32(a, b); + masm.setp_lt_s32(a, b, p); break; case LE: - masm.setp_le_s32(a, b); + masm.setp_le_s32(a, b, p); break; case GT: - masm.setp_gt_s32(a, b); + masm.setp_gt_s32(a, b, p); break; case GE: - masm.setp_ge_s32(a, b); + masm.setp_ge_s32(a, b, p); break; case AT: - masm.setp_gt_u32(a, b); + masm.setp_gt_u32(a, b, p); break; case AE: - masm.setp_ge_u32(a, b); + masm.setp_ge_u32(a, b, p); break; case BT: - masm.setp_lt_u32(a, b); + masm.setp_lt_u32(a, b, p); break; case BE: - masm.setp_le_u32(a, b); + masm.setp_le_u32(a, b, p); break; default: throw GraalInternalError.shouldNotReachHere(); } } - private static void emitCompareRegConst(PTXAssembler masm, Condition condition, Register a, int b) { + private static void emitCompareRegConst(PTXAssembler masm, Condition condition, Register a, int b, int p) { switch (condition) { case EQ: - masm.setp_eq_s32(a, b); + masm.setp_eq_s32(a, b, p); break; case NE: - masm.setp_ne_s32(a, b); + masm.setp_ne_s32(a, b, p); break; case LT: - masm.setp_lt_s32(a, b); + masm.setp_lt_s32(a, b, p); break; case LE: - masm.setp_le_s32(a, b); + masm.setp_le_s32(a, b, p); break; case GT: - masm.setp_gt_s32(a, b); + masm.setp_gt_s32(a, b, p); break; case GE: - masm.setp_ge_s32(a, b); + masm.setp_ge_s32(a, b, p); break; case AT: - masm.setp_gt_u32(a, b); + masm.setp_gt_u32(a, b, p); break; case AE: - masm.setp_ge_u32(a, b); + masm.setp_ge_u32(a, b, p); break; case BT: - masm.setp_lt_u32(a, b); + masm.setp_lt_u32(a, b, p); break; case BE: - masm.setp_le_u32(a, b); + masm.setp_le_u32(a, b, p); break; default: throw GraalInternalError.shouldNotReachHere(); } } - private static void emitCompareRegReg(PTXAssembler masm, Condition condition, Register a, Register b) { + private static void emitCompareRegReg(PTXAssembler masm, Condition condition, Register a, Register b, int p) { switch (condition) { case EQ: - masm.setp_eq_s32(a, b); + masm.setp_eq_s32(a, b, p); break; case NE: - masm.setp_ne_s32(a, b); + masm.setp_ne_s32(a, b, p); break; case LT: - masm.setp_lt_s32(a, b); + masm.setp_lt_s32(a, b, p); break; case LE: - masm.setp_le_s32(a, b); + masm.setp_le_s32(a, b, p); break; case GT: - masm.setp_gt_s32(a, b); + masm.setp_gt_s32(a, b, p); break; case GE: - masm.setp_ge_s32(a, b); + masm.setp_ge_s32(a, b, p); break; case AT: - masm.setp_gt_u32(a, b); + masm.setp_gt_u32(a, b, p); break; case AE: - masm.setp_ge_u32(a, b); + masm.setp_ge_u32(a, b, p); break; case BT: - masm.setp_lt_u32(a, b); + masm.setp_lt_u32(a, b, p); break; case BE: - masm.setp_le_u32(a, b); + masm.setp_le_u32(a, b, p); break; default: throw GraalInternalError.shouldNotReachHere(); diff -r 4eec2ac671c2 -r 03fe11f5f186 graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXControlFlow.java --- a/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXControlFlow.java Tue Sep 17 18:36:54 2013 -0700 +++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXControlFlow.java Tue Sep 17 23:35:42 2013 -0400 @@ -72,16 +72,17 @@ protected Condition condition; protected LabelRef destination; + protected int predRegNum; - public BranchOp(Condition condition, LabelRef destination) { + public BranchOp(Condition condition, LabelRef destination, int predReg) { this.condition = condition; this.destination = destination; + this.predRegNum = predReg; } @Override public void emitCode(TargetMethodAssembler tasm, PTXAssembler masm) { - masm.at(); - masm.bra(masm.nameOf(destination.label())); + masm.bra(masm.nameOf(destination.label()), predRegNum); } @Override @@ -151,14 +152,17 @@ private LabelRef defaultTarget; @Alive({REG}) protected Value key; @Temp({REG, ILLEGAL}) protected Value scratch; + // Number of predicate register that would be set by this instruction. + protected int predRegNum; - public SequentialSwitchOp(Constant[] keyConstants, LabelRef[] keyTargets, LabelRef defaultTarget, Value key, Value scratch) { + public SequentialSwitchOp(Constant[] keyConstants, LabelRef[] keyTargets, LabelRef defaultTarget, Value key, Value scratch, int predReg) { assert keyConstants.length == keyTargets.length; this.keyConstants = keyConstants; this.keyTargets = keyTargets; this.defaultTarget = defaultTarget; this.key = key; this.scratch = scratch; + predRegNum = predReg; } @Override @@ -171,25 +175,22 @@ } long lc = keyConstants[i].asLong(); assert NumUtil.isInt(lc); - masm.setp_eq_s32((int) lc, intKey); - masm.at(); - masm.bra(masm.nameOf(keyTargets[i].label())); + masm.setp_eq_s32((int) lc, intKey, predRegNum); + masm.bra(masm.nameOf(keyTargets[i].label()), predRegNum); } } else if (key.getKind() == Kind.Long) { Register longKey = asLongReg(key); for (int i = 0; i < keyConstants.length; i++) { - masm.setp_eq_s64(tasm.asLongConst(keyConstants[i]), longKey); - masm.at(); - masm.bra(masm.nameOf(keyTargets[i].label())); + masm.setp_eq_s64(tasm.asLongConst(keyConstants[i]), longKey, predRegNum); + masm.bra(masm.nameOf(keyTargets[i].label()), predRegNum); } } else if (key.getKind() == Kind.Object) { Register intKey = asObjectReg(key); Register temp = asObjectReg(scratch); for (int i = 0; i < keyConstants.length; i++) { PTXMove.move(tasm, masm, temp.asValue(Kind.Object), keyConstants[i]); - masm.setp_eq_u32(intKey, temp); - masm.at(); - masm.bra(keyTargets[i].label().toString()); + masm.setp_eq_u32(intKey, temp, predRegNum); + masm.bra(keyTargets[i].label().toString(), predRegNum); } } else { throw new GraalInternalError("sequential switch only supported for int, long and object"); @@ -219,38 +220,40 @@ private final LabelRef[] targets; @Alive protected Value index; @Temp protected Value scratch; + // Number of predicate register that would be set by this instruction. + protected int predRegNum; - public TableSwitchOp(final int lowKey, final LabelRef defaultTarget, final LabelRef[] targets, Variable index, Variable scratch) { + public TableSwitchOp(final int lowKey, final LabelRef defaultTarget, final LabelRef[] targets, Variable index, Variable scratch, int predReg) { this.lowKey = lowKey; this.defaultTarget = defaultTarget; this.targets = targets; this.index = index; this.scratch = scratch; + predRegNum = predReg; } @Override public void emitCode(TargetMethodAssembler tasm, PTXAssembler masm) { - tableswitch(tasm, masm, lowKey, defaultTarget, targets, asIntReg(index), asLongReg(scratch)); + tableswitch(tasm, masm, lowKey, defaultTarget, targets, asIntReg(index), asLongReg(scratch), predRegNum); } } @SuppressWarnings("unused") - private static void tableswitch(TargetMethodAssembler tasm, PTXAssembler masm, int lowKey, LabelRef defaultTarget, LabelRef[] targets, Register value, Register scratch) { + private static void tableswitch(TargetMethodAssembler tasm, PTXAssembler masm, int lowKey, LabelRef defaultTarget, LabelRef[] targets, Register value, Register scratch, int predNum) { Buffer buf = masm.codeBuffer; // Compare index against jump table bounds int highKey = lowKey + targets.length - 1; if (lowKey != 0) { // subtract the low value from the switch value // new Sub(value, value, lowKey).emit(masm); - masm.setp_gt_s32(value, highKey - lowKey); + masm.setp_gt_s32(value, highKey - lowKey, predNum); } else { - masm.setp_gt_s32(value, highKey); + masm.setp_gt_s32(value, highKey, predNum); } // Jump to default target if index is not within the jump table if (defaultTarget != null) { - masm.at(); - masm.bra(defaultTarget.label().toString()); + masm.bra(defaultTarget.label().toString(), predNum); } // address of jump table diff -r 4eec2ac671c2 -r 03fe11f5f186 graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMemOp.java --- a/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMemOp.java Tue Sep 17 18:36:54 2013 -0700 +++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMemOp.java Tue Sep 17 23:35:42 2013 -0400 @@ -34,7 +34,7 @@ public class PTXMemOp { // Load operation from .global state space - @Opcode("LOAD") + @Opcode("LOAD_REGBASE_DISP") public static class LoadOp extends PTXLIRInstruction { private final Kind kind; @@ -132,7 +132,7 @@ } // Load operation from .param state space - @Opcode("LOAD") + @Opcode("LOAD_PARAM") public static class LoadParamOp extends PTXLIRInstruction { private final Kind kind; @@ -240,7 +240,6 @@ public void emitCode(TargetMethodAssembler tasm, PTXAssembler masm) { assert isRegister(input); PTXAddress addr = address.toAddress(); - // masm.st_global_return_value_s64(addr.getBase(), addr.getDisplacement(), asRegister(input)); switch (kind) { case Byte: diff -r 4eec2ac671c2 -r 03fe11f5f186 graal/com.oracle.graal.ptx/src/com/oracle/graal/ptx/PTX.java --- a/graal/com.oracle.graal.ptx/src/com/oracle/graal/ptx/PTX.java Tue Sep 17 18:36:54 2013 -0700 +++ b/graal/com.oracle.graal.ptx/src/com/oracle/graal/ptx/PTX.java Tue Sep 17 23:35:42 2013 -0400 @@ -42,30 +42,6 @@ // @formatter:off - /* Parameter State Space - * - * The parameter (.param) state space is used (1) to pass input - * arguments from the host to the kernel, (2a) to declare formal - * input and return parameters for device functions called from - * within kernel execution, and (2b) to declare locally-scoped - * byte array variables that serve as function call arguments, - * typically for passing large structures by value to a function. - * - * TODO: XXX - * The parameters are virtual symbols - just like registers. Bit, - * Till we figure out how to model a virtual register set in Graal, - * we will pretend that we can use only 8 parameters. - */ - - public static final Register param0 = new Register(0, 0, "param0", PARAM); - public static final Register param1 = new Register(1, 1, "param1", PARAM); - public static final Register param2 = new Register(2, 2, "param2", PARAM); - public static final Register param3 = new Register(3, 3, "param3", PARAM); - public static final Register param4 = new Register(4, 4, "param4", PARAM); - public static final Register param5 = new Register(5, 5, "param5", PARAM); - public static final Register param6 = new Register(6, 6, "param6", PARAM); - public static final Register param7 = new Register(7, 7, "param7", PARAM); - /* * Register State Space * @@ -85,36 +61,60 @@ * will pretend that we can use only 16 registers. */ - public static final Register r0 = new Register(8, 8, "r0", REG); - public static final Register r1 = new Register(9, 9, "r1", REG); - public static final Register r2 = new Register(10, 10, "r2", REG); - public static final Register r3 = new Register(11, 11, "r3", REG); - public static final Register r4 = new Register(12, 12, "r4", REG); - public static final Register r5 = new Register(13, 13, "r5", REG); - public static final Register r6 = new Register(14, 14, "r6", REG); - public static final Register r7 = new Register(15, 15, "r7", REG); + public static final Register r0 = new Register(0, 0, "r0", REG); + public static final Register r1 = new Register(1, 1, "r1", REG); + public static final Register r2 = new Register(2, 2, "r2", REG); + public static final Register r3 = new Register(3, 3, "r3", REG); + public static final Register r4 = new Register(4, 4, "r4", REG); + public static final Register r5 = new Register(5, 5, "r5", REG); + public static final Register r6 = new Register(6, 6, "r6", REG); + public static final Register r7 = new Register(7, 7, "r7", REG); - public static final Register r8 = new Register(16, 16, "r8", REG); - public static final Register r9 = new Register(17, 17, "r9", REG); - public static final Register r10 = new Register(18, 18, "r10", REG); - public static final Register r11 = new Register(19, 19, "r11", REG); - public static final Register r12 = new Register(20, 20, "r12", REG); - public static final Register r13 = new Register(21, 21, "r13", REG); - public static final Register r14 = new Register(22, 22, "r14", REG); - public static final Register r15 = new Register(23, 23, "r15", REG); - - // Define a virtual register that holds return value - public static final Register retReg = new Register(24, 24, "retReg", REG); + public static final Register r8 = new Register(8, 8, "r8", REG); + public static final Register r9 = new Register(9, 9, "r9", REG); + public static final Register r10 = new Register(10, 10, "r10", REG); + public static final Register r11 = new Register(11, 11, "r11", REG); + public static final Register r12 = new Register(12, 12, "r12", REG); + public static final Register r13 = new Register(13, 13, "r13", REG); + public static final Register r14 = new Register(14, 14, "r14", REG); + public static final Register r15 = new Register(15, 15, "r15", REG); public static final Register[] gprRegisters = { r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15 }; + /* Parameter State Space + * + * The parameter (.param) state space is used (1) to pass input + * arguments from the host to the kernel, (2a) to declare formal + * input and return parameters for device functions called from + * within kernel execution, and (2b) to declare locally-scoped + * byte array variables that serve as function call arguments, + * typically for passing large structures by value to a function. + * + * TODO: XXX + * The parameters are virtual symbols - just like registers. Bit, + * Till we figure out how to model a virtual register set in Graal, + * we will pretend that we can use only 8 parameters. + */ + + public static final Register param0 = new Register(16, 16, "param0", PARAM); + public static final Register param1 = new Register(17, 17, "param1", PARAM); + public static final Register param2 = new Register(18, 18, "param2", PARAM); + public static final Register param3 = new Register(19, 19, "param3", PARAM); + public static final Register param4 = new Register(20, 20, "param4", PARAM); + public static final Register param5 = new Register(21, 21, "param5", PARAM); + public static final Register param6 = new Register(22, 22, "param6", PARAM); + public static final Register param7 = new Register(23, 23, "param7", PARAM); + public static final Register[] paramRegisters = { param0, param1, param2, param3, param4, param5, param6, param7 }; + // Define a virtual register that holds return value + public static final Register retReg = new Register(24, 24, "retReg", REG); + // PTX ISA Manual: Section 9:. Special Registers // PTX includes a number of predefined, read-only variables, which diff -r 4eec2ac671c2 -r 03fe11f5f186 src/share/vm/graal/graalVMToCompiler.cpp --- a/src/share/vm/graal/graalVMToCompiler.cpp Tue Sep 17 18:36:54 2013 -0700 +++ b/src/share/vm/graal/graalVMToCompiler.cpp Tue Sep 17 23:35:42 2013 -0400 @@ -62,15 +62,13 @@ if (JNIHandles::resolve(_graalRuntimePermObject) == NULL) { #ifdef AMD64 Symbol* name = NULL; - if (UseGPU) { - // Set name to PTXHotSpotRuntime if nVidia GPU was detected. - if ((gpu::get_target_il_type() == gpu::PTX) && - gpu::is_available() && gpu::has_gpu_linkage()) { - name = vmSymbols::com_oracle_graal_hotspot_ptx_PTXHotSpotGraalRuntime(); - } - // Set name to corresponding runtime classname for other - // supported GPU runtimes, here. - } else { + // Set name to PTXHotSpotRuntime if nVidia GPU was detected. + if (UseGPU && (gpu::get_target_il_type() == gpu::PTX) && + gpu::is_available() && gpu::has_gpu_linkage()) { + name = vmSymbols::com_oracle_graal_hotspot_ptx_PTXHotSpotGraalRuntime(); + } + + if (name == NULL) { name = vmSymbols::com_oracle_graal_hotspot_amd64_AMD64HotSpotGraalRuntime(); } #endif