changeset 11699:03fe11f5f186

PTX Codegen: predicate register materialization and declaration; conditional branch generation; fix register declaration.
author S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
date Tue, 17 Sep 2013 23:35:42 -0400
parents 4eec2ac671c2
children 528ab536b403
files graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXLIRGenerator.java graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotRegisterConfig.java graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotRuntime.java graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXCompare.java graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXControlFlow.java graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMemOp.java graal/com.oracle.graal.ptx/src/com/oracle/graal/ptx/PTX.java src/share/vm/graal/graalVMToCompiler.cpp
diffstat 10 files changed, 339 insertions(+), 358 deletions(-) [+]
line wrap: on
line diff
--- a/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java	Tue Sep 17 18:36:54 2013 -0700
+++ b/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java	Tue Sep 17 23:35:42 2013 -0400
@@ -39,14 +39,6 @@
         super(target);
     }
 
-    public final void at() {
-        emitString("@%p" + " " + "");
-    }
-
-    public final void atq() {
-        emitString("@%q" + " " + "");
-    }
-
     public static class StandardFormat {
 
         protected Kind valueKind;
@@ -287,8 +279,8 @@
     }
 
     // Checkstyle: stop method name check
-    public final void bra(String tgt) {
-        emitString("bra" + " " + tgt + ";" + "");
+    public final void bra(String tgt, int pred) {
+        emitString((pred >= 0) ? "" : ("@%p" + pred + "  ") + "bra" + " " + tgt + ";" + "");
     }
 
     public final void bra_uni(String tgt) {
@@ -573,228 +565,228 @@
         emitString("ret.uni;" + " " + "");
     }
 
-    public final void setp_eq_f32(Register a, Register b) {
-        emitString("setp.eq.f32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void setp_eq_f32(Register a, Register b, int p) {
+        emitString("setp.eq.f32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_ne_f32(Register a, Register b) {
-        emitString("setp.ne.f32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void setp_ne_f32(Register a, Register b, int p) {
+        emitString("setp.ne.f32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_lt_f32(Register a, Register b) {
-        emitString("setp.lt.f32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void setp_lt_f32(Register a, Register b, int p) {
+        emitString("setp.lt.f32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_le_f32(Register a, Register b) {
-        emitString("setp.le.f32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void setp_le_f32(Register a, Register b, int p) {
+        emitString("setp.le.f32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_gt_f32(Register a, Register b) {
-        emitString("setp.gt.f32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void setp_gt_f32(Register a, Register b, int p) {
+        emitString("setp.gt.f32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_ge_f32(Register a, Register b) {
-        emitString("setp.ge.f32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void setp_ge_f32(Register a, Register b, int p) {
+        emitString("setp.ge.f32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_eq_f32(float f32, Register b) {
-        emitString("setp.eq.f32" + " " + "%p" + ", " + f32 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_eq_f32(float f32, Register b, int p) {
+        emitString("setp.eq.f32" + " " + "%p" + p + ", " + f32 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_ne_f32(float f32, Register b) {
-        emitString("setp.ne.f32" + " " + "%p" + ", " + f32 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_ne_f32(float f32, Register b, int p) {
+        emitString("setp.ne.f32" + " " + "%p" + p + ", " + f32 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_lt_f32(float f32, Register b) {
-        emitString("setp.lt.f32" + " " + "%p" + ", " + f32 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_lt_f32(float f32, Register b, int p) {
+        emitString("setp.lt.f32" + " " + "%p" + p + ", " + f32 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_le_f32(float f32, Register b) {
-        emitString("setp.le.f32" + " " + "%p" + ", " + f32 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_le_f32(float f32, Register b, int p) {
+        emitString("setp.le.f32" + " " + "%p" + p + ", " + f32 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_gt_f32(float f32, Register b) {
-        emitString("setp.gt.f32" + " " + "%p" + ", " + f32 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_gt_f32(float f32, Register b, int p) {
+        emitString("setp.gt.f32" + " " + "%p" + p + ", " + f32 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_ge_f32(float f32, Register b) {
-        emitString("setp.ge.f32" + " " + "%p" + ", " + f32 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_ge_f32(float f32, Register b, int p) {
+        emitString("setp.ge.f32" + " " + "%p" + p + ", " + f32 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_eq_f64(double f64, Register b) {
-        emitString("setp.eq.f64" + " " + "%p" + ", " + f64 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_eq_f64(double f64, Register b, int p) {
+        emitString("setp.eq.f64" + " " + "%p" + p + ", " + f64 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_ne_f64(double f64, Register b) {
-        emitString("setp.ne.f64" + " " + "%p" + ", " + f64 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_ne_f64(double f64, Register b, int p) {
+        emitString("setp.ne.f64" + " " + "%p" + p + ", " + f64 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_lt_f64(double f64, Register b) {
-        emitString("setp.lt.f64" + " " + "%p" + ", " + f64 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_lt_f64(double f64, Register b, int p) {
+        emitString("setp.lt.f64" + " " + "%p" + p + ", " + f64 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_le_f64(double f64, Register b) {
-        emitString("setp.le.f64" + " " + "%p" + ", " + f64 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_le_f64(double f64, Register b, int p) {
+        emitString("setp.le.f64" + " " + "%p" + p + ", " + f64 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_gt_f64(double f64, Register b) {
-        emitString("setp.gt.f64" + " " + "%p" + ", " + f64 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_gt_f64(double f64, Register b, int p) {
+        emitString("setp.gt.f64" + " " + "%p" + p + ", " + f64 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_ge_f64(double f64, Register b) {
-        emitString("setp.ge.f64" + " " + "%p" + ", " + f64 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_ge_f64(double f64, Register b, int p) {
+        emitString("setp.ge.f64" + " " + "%p" + p + ", " + f64 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_eq_s64(Register a, Register b) {
-        emitString("setp.eq.s64" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void setp_eq_s64(Register a, Register b, int p) {
+        emitString("setp.eq.s64" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_eq_s64(long s64, Register b) {
-        emitString("setp.eq.s64" + " " + "%p" + ", " + s64 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_eq_s64(long s64, Register b, int p) {
+        emitString("setp.eq.s64" + " " + "%p" + p + ", " + s64 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_eq_s32(Register a, Register b) {
-        emitString("setp.eq.s32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void setp_eq_s32(Register a, Register b, int p) {
+        emitString("setp.eq.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_ne_s32(Register a, Register b) {
-        emitString("setp.ne.s32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void setp_ne_s32(Register a, Register b, int p) {
+        emitString("setp.ne.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_lt_s32(Register a, Register b) {
-        emitString("setp.lt.s32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void setp_lt_s32(Register a, Register b, int p) {
+        emitString("setp.lt.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_le_s32(Register a, Register b) {
-        emitString("setp.le.s32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void setp_le_s32(Register a, Register b, int p) {
+        emitString("setp.le.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_gt_s32(Register a, Register b) {
-        emitString("setp.gt.s32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void setp_gt_s32(Register a, Register b, int p) {
+        emitString("setp.gt.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_ge_s32(Register a, Register b) {
-        emitString("setp.ge.s32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void setp_ge_s32(Register a, Register b, int p) {
+        emitString("setp.ge.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_eq_s32(Register a, int s32) {
-        emitString("setp.eq.s32" + " " + "%p" + ", %r" + a.encoding() + ", " + s32 + ";" + "");
+    public final void setp_eq_s32(Register a, int s32, int p) {
+        emitString("setp.eq.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + s32 + ";" + "");
     }
 
-    public final void setp_ne_s32(Register a, int s32) {
-        emitString("setp.ne.s32" + " " + "%p" + ", %r" + a.encoding() + ", " + s32 + ";" + "");
+    public final void setp_ne_s32(Register a, int s32, int p) {
+        emitString("setp.ne.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + s32 + ";" + "");
     }
 
-    public final void setp_lt_s32(Register a, int s32) {
-        emitString("setp.lt.s32" + " " + "%p" + ", %r" + a.encoding() + ", " + s32 + ";" + "");
+    public final void setp_lt_s32(Register a, int s32, int p) {
+        emitString("setp.lt.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + s32 + ";" + "");
     }
 
-    public final void setp_le_s32(Register a, int s32) {
-        emitString("setp.le.s32" + " " + "%p" + ", %r" + a.encoding() + ", " + s32 + ";" + "");
+    public final void setp_le_s32(Register a, int s32, int p) {
+        emitString("setp.le.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + s32 + ";" + "");
     }
 
-    public final void setp_gt_s32(Register a, int s32) {
-        emitString("setp.gt.s32" + " " + "%p" + ", %r" + a.encoding() + ", " + s32 + ";" + "");
+    public final void setp_gt_s32(Register a, int s32, int p) {
+        emitString("setp.gt.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + s32 + ";" + "");
     }
 
-    public final void setp_ge_s32(Register a, int s32) {
-        emitString("setp.ge.s32" + " " + "%p" + ", %r" + a.encoding() + ", " + s32 + ";" + "");
+    public final void setp_ge_s32(Register a, int s32, int p) {
+        emitString("setp.ge.s32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + s32 + ";" + "");
     }
 
-    public final void setp_eq_s32(int s32, Register b) {
-        emitString("setp.eq.s32" + " " + "%p" + ", " + s32 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_eq_s32(int s32, Register b, int p) {
+        emitString("setp.eq.s32" + " " + "%p" + p + ", " + s32 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_ne_s32(int s32, Register b) {
-        emitString("setp.ne.s32" + " " + "%p" + ", " + s32 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_ne_s32(int s32, Register b, int p) {
+        emitString("setp.ne.s32" + " " + "%p" + p + ", " + s32 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_lt_s32(int s32, Register b) {
-        emitString("setp.lt.s32" + " " + "%p" + ", " + s32 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_lt_s32(int s32, Register b, int p) {
+        emitString("setp.lt.s32" + " " + "%p" + p + ", " + s32 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_le_s32(int s32, Register b) {
-        emitString("setp.le.s32" + " " + "%p" + ", " + s32 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_le_s32(int s32, Register b, int p) {
+        emitString("setp.le.s32" + " " + "%p" + p + ", " + s32 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_gt_s32(int s32, Register b) {
-        emitString("setp.gt.s32" + " " + "%p" + ", " + s32 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_gt_s32(int s32, Register b, int p) {
+        emitString("setp.gt.s32" + " " + "%p" + p + ", " + s32 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_ge_s32(int s32, Register b) {
-        emitString("setp.ge.s32" + " " + "%p" + ", " + s32 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_ge_s32(int s32, Register b, int p) {
+        emitString("setp.ge.s32" + " " + "%p" + p + ", " + s32 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_eq_u32(Register a, Register b) {
-        emitString("setp.eq.u32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void setp_eq_u32(Register a, Register b, int p) {
+        emitString("setp.eq.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_ne_u32(Register a, Register b) {
-        emitString("setp.ne.u32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void setp_ne_u32(Register a, Register b, int p) {
+        emitString("setp.ne.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_lt_u32(Register a, Register b) {
-        emitString("setp.lt.u32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void setp_lt_u32(Register a, Register b, int p) {
+        emitString("setp.lt.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_le_u32(Register a, Register b) {
-        emitString("setp.le.u32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void setp_le_u32(Register a, Register b, int p) {
+        emitString("setp.le.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_gt_u32(Register a, Register b) {
-        emitString("setp.gt.u32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void setp_gt_u32(Register a, Register b, int p) {
+        emitString("setp.gt.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_ge_u32(Register a, Register b) {
-        emitString("setp.ge.u32" + " " + "%p" + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void setp_ge_u32(Register a, Register b, int p) {
+        emitString("setp.ge.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_eq_u32(Register a, int u32) {
-        emitString("setp.eq.u32" + " " + "%p" + ", %r" + a.encoding() + ", " + u32 + ";" + "");
+    public final void setp_eq_u32(Register a, int u32, int p) {
+        emitString("setp.eq.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + u32 + ";" + "");
     }
 
-    public final void setp_ne_u32(Register a, int u32) {
-        emitString("setp.ne.u32" + " " + "%p" + ", %r" + a.encoding() + ", " + u32 + ";" + "");
+    public final void setp_ne_u32(Register a, int u32, int p) {
+        emitString("setp.ne.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + u32 + ";" + "");
     }
 
-    public final void setp_lt_u32(Register a, int u32) {
-        emitString("setp.lt.u32" + " " + "%p" + ", %r" + a.encoding() + ", " + u32 + ";" + "");
+    public final void setp_lt_u32(Register a, int u32, int p) {
+        emitString("setp.lt.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + u32 + ";" + "");
     }
 
-    public final void setp_le_u32(Register a, int u32) {
-        emitString("setp.le.u32" + " " + "%p" + ", %r" + a.encoding() + ", " + u32 + ";" + "");
+    public final void setp_le_u32(Register a, int u32, int p) {
+        emitString("setp.le.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + u32 + ";" + "");
     }
 
-    public final void setp_gt_u32(Register a, int u32) {
-        emitString("setp.gt.u32" + " " + "%p" + ", %r" + a.encoding() + ", " + u32 + ";" + "");
+    public final void setp_gt_u32(Register a, int u32, int p) {
+        emitString("setp.gt.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + u32 + ";" + "");
     }
 
-    public final void setp_ge_u32(Register a, int u32) {
-        emitString("setp.ge.u32" + " " + "%p" + ", %r" + a.encoding() + ", " + u32 + ";" + "");
+    public final void setp_ge_u32(Register a, int u32, int p) {
+        emitString("setp.ge.u32" + " " + "%p" + p + ", %r" + a.encoding() + ", " + u32 + ";" + "");
     }
 
-    public final void setp_eq_u32(int u32, Register b) {
-        emitString("setp.eq.u32" + " " + "%p" + ", " + u32 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_eq_u32(int u32, Register b, int p) {
+        emitString("setp.eq.u32" + " " + "%p" + p + ", " + u32 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_ne_u32(int u32, Register b) {
-        emitString("setp.ne.u32" + " " + "%p" + ", " + u32 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_ne_u32(int u32, Register b, int p) {
+        emitString("setp.ne.u32" + " " + "%p" + p + ", " + u32 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_lt_u32(int u32, Register b) {
-        emitString("setp.lt.u32" + " " + "%p" + ", " + u32 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_lt_u32(int u32, Register b, int p) {
+        emitString("setp.lt.u32" + " " + "%p" + p + ", " + u32 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_le_u32(int u32, Register b) {
-        emitString("setp.le.u32" + " " + "%p" + ", " + u32 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_le_u32(int u32, Register b, int p) {
+        emitString("setp.le.u32" + " " + "%p" + p + ", " + u32 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_gt_u32(int u32, Register b) {
-        emitString("setp.gt.u32" + " " + "%p" + ", " + u32 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_gt_u32(int u32, Register b, int p) {
+        emitString("setp.gt.u32" + " " + "%p" + p + ", " + u32 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void setp_ge_u32(int u32, Register b) {
-        emitString("setp.ge.u32" + " " + "%p" + ", " + u32 + ", %r" + b.encoding() + ";" + "");
+    public final void setp_ge_u32(int u32, Register b, int p) {
+        emitString("setp.ge.u32" + " " + "%p" + p + ", " + u32 + ", %r" + b.encoding() + ";" + "");
     }
 
     // Store in global state space
--- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java	Tue Sep 17 18:36:54 2013 -0700
+++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java	Tue Sep 17 23:35:42 2013 -0400
@@ -81,7 +81,7 @@
 
     @Override
     public TargetMethodAssembler newAssembler(LIRGenerator lirGen, CompilationResult compilationResult) {
-        // Omit the frame if the method:
+        // Omit the frame of the method:
         // - has no spill slots or other slots allocated during register allocation
         // - has no callee-saved registers
         // - has no incoming arguments passed on the stack
@@ -90,14 +90,13 @@
         AbstractAssembler masm = createAssembler(frameMap);
         HotSpotFrameContext frameContext = new HotSpotFrameContext();
         TargetMethodAssembler tasm = new PTXTargetMethodAssembler(target, runtime(), frameMap, masm, frameContext, compilationResult);
-        tasm.setFrameSize(frameMap.frameSize());
+        tasm.setFrameSize(0);
         return tasm;
     }
 
-    private static void emitKernelEntry(TargetMethodAssembler tasm, LIRGenerator lirGen,
-                                        ResolvedJavaMethod codeCacheOwner) {
+    private static void emitKernelEntry(TargetMethodAssembler tasm, LIRGenerator lirGen, ResolvedJavaMethod codeCacheOwner) {
         // Emit PTX kernel entry text based on PTXParameterOp
-        // instructions in the start block.  Remove the instructions
+        // instructions in the start block. Remove the instructions
         // once kernel entry text and directives are emitted to
         // facilitate seemless PTX code generation subsequently.
         assert codeCacheOwner != null : lirGen.getGraph() + " is not associated with a method";
@@ -154,23 +153,39 @@
                     RegisterValue regVal = (RegisterValue) value;
                     Kind regKind = regVal.getKind();
                     switch (regKind) {
-                       case Int:
-                           signed32.add(regVal.getRegister().encoding());
-                           break;
-                       case Long:
-                           signed64.add(regVal.getRegister().encoding());
-                           break;
-                       case Float:
-                           float32.add(regVal.getRegister().encoding());
-                           break;
-                       case Double:
-                           float64.add(regVal.getRegister().encoding());
-                           break;
-                       case Object:
-                           signed64.add(regVal.getRegister().encoding());
-                           break;
-                       default :
-                           throw GraalInternalError.shouldNotReachHere("unhandled register type "  + value.toString());
+                        case Int:
+                            // If the register was used as a wider signed type
+                            // do not add it here
+                            if (!signed64.contains(regVal.getRegister().encoding())) {
+                                signed32.add(regVal.getRegister().encoding());
+                            }
+                            break;
+                        case Long:
+                        case Object:
+                            // If the register was used as a narrower signed type
+                            // remove it from there and add it to wider type.
+                            if (signed32.contains(regVal.getRegister().encoding())) {
+                                signed32.remove(regVal.getRegister().encoding());
+                            }
+                            signed64.add(regVal.getRegister().encoding());
+                            break;
+                        case Float:
+                            // If the register was used as a wider signed type
+                            // do not add it here
+                            if (!float64.contains(regVal.getRegister().encoding())) {
+                                float32.add(regVal.getRegister().encoding());
+                            }
+                            break;
+                        case Double:
+                            // If the register was used as a narrower signed type
+                            // remove it from there and add it to wider type.
+                            if (float32.contains(regVal.getRegister().encoding())) {
+                                float32.remove(regVal.getRegister().encoding());
+                            }
+                            float64.add(regVal.getRegister().encoding());
+                            break;
+                        default:
+                            throw GraalInternalError.shouldNotReachHere("unhandled register type " + value.toString());
                     }
                 }
                 return value;
@@ -200,6 +215,11 @@
         for (Integer i : float64) {
             codeBuffer.emitString(".reg .f64 %r" + i.intValue() + ";");
         }
+        // emit predicate register declaration
+        int maxPredRegNum = ((PTXLIRGenerator) lirGen).getNextPredRegNumber();
+        if (maxPredRegNum > 0) {
+            codeBuffer.emitString(".reg .pred %p<" + maxPredRegNum + ">;");
+        }
     }
 
     @Override
--- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXLIRGenerator.java	Tue Sep 17 18:36:54 2013 -0700
+++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXLIRGenerator.java	Tue Sep 17 23:35:42 2013 -0400
@@ -45,8 +45,6 @@
 import com.oracle.graal.lir.ptx.PTXArithmetic.Unary2Op;
 import com.oracle.graal.lir.ptx.PTXCompare.CompareOp;
 import com.oracle.graal.lir.ptx.PTXControlFlow.BranchOp;
-import com.oracle.graal.lir.ptx.PTXControlFlow.CondMoveOp;
-import com.oracle.graal.lir.ptx.PTXControlFlow.FloatCondMoveOp;
 import com.oracle.graal.lir.ptx.PTXControlFlow.ReturnOp;
 import com.oracle.graal.lir.ptx.PTXControlFlow.ReturnNoValOp;
 import com.oracle.graal.lir.ptx.PTXControlFlow.SequentialSwitchOp;
@@ -68,6 +66,11 @@
  */
 public class PTXLIRGenerator extends LIRGenerator {
 
+    // Number of the predicate register that can be used when needed.
+    // This value will be recorded and incremented in the LIR instruction
+    // that sets a predicate register. (e.g., CompareOp)
+    private int nextPredRegNum;
+
     public static final ForeignCallDescriptor ARITHMETIC_FREM = new ForeignCallDescriptor("arithmeticFrem", float.class, float.class, float.class);
     public static final ForeignCallDescriptor ARITHMETIC_DREM = new ForeignCallDescriptor("arithmeticDrem", double.class, double.class, double.class);
 
@@ -82,6 +85,11 @@
     public PTXLIRGenerator(StructuredGraph graph, CodeCacheProvider runtime, TargetDescription target, FrameMap frameMap, CallingConvention cc, LIR lir) {
         super(graph, runtime, target, frameMap, cc, lir);
         lir.spillMoveFactory = new PTXSpillMoveFactory();
+        nextPredRegNum = 0;
+    }
+
+    public int getNextPredRegNumber() {
+        return nextPredRegNum;
     }
 
     @Override
@@ -231,24 +239,24 @@
     public void emitCompareBranch(Value left, Value right, Condition cond, boolean unorderedIsTrue, LabelRef label) {
         switch (left.getKind().getStackKind()) {
             case Int:
-                append(new CompareOp(ICMP, cond, left, right));
-                append(new BranchOp(cond, label));
+                append(new CompareOp(ICMP, cond, left, right, nextPredRegNum));
+                append(new BranchOp(cond, label, nextPredRegNum++));
                 break;
             case Long:
-                append(new CompareOp(LCMP, cond, left, right));
-                append(new BranchOp(cond, label));
+                append(new CompareOp(LCMP, cond, left, right, nextPredRegNum));
+                append(new BranchOp(cond, label, nextPredRegNum++));
                 break;
             case Float:
-                append(new CompareOp(FCMP, cond, left, right));
-                append(new BranchOp(cond, label));
+                append(new CompareOp(FCMP, cond, left, right, nextPredRegNum));
+                append(new BranchOp(cond, label, nextPredRegNum++));
                 break;
             case Double:
-                append(new CompareOp(DCMP, cond, left, right));
-                append(new BranchOp(cond, label));
+                append(new CompareOp(DCMP, cond, left, right, nextPredRegNum));
+                append(new BranchOp(cond, label, nextPredRegNum++));
                 break;
             case Object:
-                append(new CompareOp(ACMP, cond, left, right));
-                append(new BranchOp(cond, label));
+                append(new CompareOp(ACMP, cond, left, right, nextPredRegNum));
+                append(new BranchOp(cond, label, nextPredRegNum++));
                 break;
             default:
                 throw GraalInternalError.shouldNotReachHere("" + left.getKind());
@@ -267,69 +275,12 @@
 
     @Override
     public Variable emitConditionalMove(Value left, Value right, Condition cond, boolean unorderedIsTrue, Value trueValue, Value falseValue) {
-        boolean mirrored = emitCompare(cond, left, right);
-        Condition finalCondition = mirrored ? cond.mirror() : cond;
-
-        Variable result = newVariable(trueValue.getKind());
-        switch (left.getKind().getStackKind()) {
-            case Int:
-            case Long:
-            case Object:
-                append(new CondMoveOp(result, finalCondition, load(trueValue), loadNonConst(falseValue)));
-                break;
-            case Float:
-            case Double:
-                append(new FloatCondMoveOp(result, finalCondition, unorderedIsTrue, load(trueValue), load(falseValue)));
-                break;
-            default:
-                throw GraalInternalError.shouldNotReachHere("missing: " + left.getKind());
-        }
-        return result;
+        // TODO: There is no conventional conditional move instruction in PTX.
+        // So, this method is changed to throw NYI exception.
+        // To be revisited if this needs to be really implemented.
+        throw new InternalError("NYI");
     }
 
-    /**
-     * This method emits the compare instruction, and may reorder the operands. It returns true if
-     * it did so.
-     *
-     *
-     * @param a the left operand of the comparison
-     * @param b the right operand of the comparison
-     * @return true if the left and right operands were switched, false otherwise
-     */
-    private boolean emitCompare(Condition cond, Value a, Value b) {
-        Variable left;
-        Value right;
-        boolean mirrored;
-        if (LIRValueUtil.isVariable(b)) {
-            left = load(b);
-            right = loadNonConst(a);
-            mirrored = true;
-        } else {
-            left = load(a);
-            right = loadNonConst(b);
-            mirrored = false;
-        }
-        switch (left.getKind().getStackKind()) {
-            case Int:
-                append(new CompareOp(ICMP, cond, left, right));
-                break;
-            case Long:
-                append(new CompareOp(LCMP, cond, left, right));
-                break;
-            case Object:
-                append(new CompareOp(ACMP, cond, left, right));
-                break;
-            case Float:
-                append(new CompareOp(FCMP, cond, left, right));
-                break;
-            case Double:
-                append(new CompareOp(DCMP, cond, left, right));
-                break;
-            default:
-                throw GraalInternalError.shouldNotReachHere();
-        }
-        return mirrored;
-    }
 
     @Override
     public Variable emitIntegerTestMove(Value left, Value right, Value trueValue, Value falseValue) {
@@ -755,10 +706,10 @@
         // Making a copy of the switch value is necessary because jump table destroys the input
         // value
         if (key.getKind() == Kind.Int || key.getKind() == Kind.Long) {
-            append(new SequentialSwitchOp(keyConstants, keyTargets, defaultTarget, key, Value.ILLEGAL));
+            append(new SequentialSwitchOp(keyConstants, keyTargets, defaultTarget, key, Value.ILLEGAL, nextPredRegNum));
         } else {
             assert key.getKind() == Kind.Object : key.getKind();
-            append(new SequentialSwitchOp(keyConstants, keyTargets, defaultTarget, key, newVariable(Kind.Object)));
+            append(new SequentialSwitchOp(keyConstants, keyTargets, defaultTarget, key, newVariable(Kind.Object), nextPredRegNum));
         }
     }
 
@@ -772,7 +723,7 @@
         // Making a copy of the switch value is necessary because jump table destroys the input
         // value
         Variable tmp = emitMove(key);
-        append(new TableSwitchOp(lowKey, defaultTarget, targets, tmp, newVariable(target.wordKind)));
+        append(new TableSwitchOp(lowKey, defaultTarget, targets, tmp, newVariable(target.wordKind), nextPredRegNum++));
     }
 
     @Override
--- a/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotRegisterConfig.java	Tue Sep 17 18:36:54 2013 -0700
+++ b/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotRegisterConfig.java	Tue Sep 17 23:35:42 2013 -0400
@@ -74,11 +74,8 @@
 
     private static Register[] initAllocatable() {
         Register[] registers = new Register[] {
-            param0, param1, param2, param3,
-            param4, param5, param6, param7,
             r0,  r1,  r2,  r3,  r4,  r5,  r6,  r7,
             r8,  r9,  r10, r11, r12, r13, r14, r15,
-            // retReg,
         };
 
         return registers;
--- a/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotRuntime.java	Tue Sep 17 18:36:54 2013 -0700
+++ b/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotRuntime.java	Tue Sep 17 23:35:42 2013 -0400
@@ -28,6 +28,8 @@
 import com.oracle.graal.hotspot.*;
 import com.oracle.graal.hotspot.meta.*;
 import com.oracle.graal.nodes.spi.*;
+import com.oracle.graal.graph.Node;
+import com.oracle.graal.nodes.calc.ConvertNode;
 
 public class PTXHotSpotRuntime extends HotSpotRuntime {
 
@@ -37,6 +39,20 @@
     }
 
     @Override
+    public void lower(Node n, LoweringTool tool) {
+        if (n instanceof ConvertNode) {
+            // PTX has a cvt instruction that "takes a variety of
+            // operand types and sizes, as its job is to convert from
+            // nearly any data type to any other data type (and
+            // size)." [Section 6.2 of PTX ISA manual]
+            // So, there is no need to lower the operation.
+            return;
+        } else {
+            super.lower(n, tool);
+        }
+    }
+
+    @Override
     public void registerReplacements(Replacements replacements) {
         //TODO: Do we need to implement this functionality for PTX?
     }
--- a/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXCompare.java	Tue Sep 17 18:36:54 2013 -0700
+++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXCompare.java	Tue Sep 17 23:35:42 2013 -0400
@@ -41,18 +41,21 @@
         @Opcode private final PTXCompare opcode;
         @Use({REG, STACK, CONST}) protected Value x;
         @Use({REG, STACK, CONST}) protected Value y;
+        // Number of predicate register that would be set by this instruction.
+        protected int predRegNum;
         private final Condition condition;
 
-        public CompareOp(PTXCompare opcode, Condition condition, Value x, Value y) {
+        public CompareOp(PTXCompare opcode, Condition condition, Value x, Value y, int predReg) {
             this.opcode = opcode;
             this.condition = condition;
             this.x = x;
             this.y = y;
+            predRegNum = predReg;
         }
 
         @Override
         public void emitCode(TargetMethodAssembler tasm, PTXAssembler masm) {
-            emit(tasm, masm, opcode, condition, x, y);
+            emit(tasm, masm, opcode, condition, x, y, predRegNum);
         }
 
         @Override
@@ -64,17 +67,19 @@
         }
     }
 
-    public static void emit(TargetMethodAssembler tasm, PTXAssembler masm, PTXCompare opcode, Condition condition, Value x, Value y) {
+    public static void emit(TargetMethodAssembler tasm, PTXAssembler masm,
+                            PTXCompare opcode, Condition condition,
+                            Value x, Value y, int p) {
         if (isConstant(x)) {
             switch (opcode) {
                 case ICMP:
-                    emitCompareConstReg(masm, condition, tasm.asIntConst(x), asIntReg(y));
+                    emitCompareConstReg(masm, condition, tasm.asIntConst(x), asIntReg(y), p);
                     break;
                 case FCMP:
-                    emitCompareConstReg(masm, condition, tasm.asFloatConst(x), asFloatReg(y));
+                    emitCompareConstReg(masm, condition, tasm.asFloatConst(x), asFloatReg(y), p);
                     break;
                 case DCMP:
-                    emitCompareConstReg(masm, condition, tasm.asDoubleConst(x), asDoubleReg(y));
+                    emitCompareConstReg(masm, condition, tasm.asDoubleConst(x), asDoubleReg(y), p);
                     break;
                 default:
                     throw GraalInternalError.shouldNotReachHere();
@@ -84,16 +89,16 @@
             int b = tasm.asIntConst(y);
             switch (opcode) {
                 case ICMP:
-                    emitCompareRegConst(masm, condition, a, b);
+                    emitCompareRegConst(masm, condition, a, b, p);
                     break;
                 case ACMP:
                     if (((Constant) y).isNull()) {
                         switch (condition) {
                             case EQ:
-                                masm.setp_eq_s32(a, b);
+                                masm.setp_eq_s32(a, b, p);
                                 break;
                             case NE:
-                                masm.setp_ne_s32(a, b);
+                                masm.setp_ne_s32(a, b, p);
                                 break;
                             default:
                                 throw GraalInternalError.shouldNotReachHere();
@@ -108,16 +113,16 @@
         } else {
             switch (opcode) {
                 case ICMP:
-                    emitCompareRegReg(masm, condition, asIntReg(x), asIntReg(y));
+                    emitCompareRegReg(masm, condition, asIntReg(x), asIntReg(y), p);
                     break;
                 case LCMP:
-                    emitCompareRegReg(masm, condition, asLongReg(x), asLongReg(y));
+                    emitCompareRegReg(masm, condition, asLongReg(x), asLongReg(y), p);
                     break;
                 case FCMP:
-                    emitCompareRegReg(masm, condition, asFloatReg(x), asFloatReg(y));
+                    emitCompareRegReg(masm, condition, asFloatReg(x), asFloatReg(y), p);
                     break;
                 case DCMP:
-                    emitCompareRegReg(masm, condition, asDoubleReg(x), asDoubleReg(y));
+                    emitCompareRegReg(masm, condition, asDoubleReg(x), asDoubleReg(y), p);
                     break;
                 default:
                     throw GraalInternalError.shouldNotReachHere("missing: "  + opcode);
@@ -125,161 +130,161 @@
         }
     }
 
-    private static void emitCompareConstReg(PTXAssembler masm, Condition condition, float a, Register b) {
+    private static void emitCompareConstReg(PTXAssembler masm, Condition condition, float a, Register b, int p) {
         switch (condition) {
         case EQ:
-            masm.setp_eq_f32(a, b);
+            masm.setp_eq_f32(a, b, p);
             break;
         case NE:
-            masm.setp_ne_f32(a, b);
+            masm.setp_ne_f32(a, b, p);
             break;
         case LT:
-            masm.setp_lt_f32(a, b);
+            masm.setp_lt_f32(a, b, p);
             break;
         case LE:
-            masm.setp_le_f32(a, b);
+            masm.setp_le_f32(a, b, p);
             break;
         case GT:
-            masm.setp_gt_f32(a, b);
+            masm.setp_gt_f32(a, b, p);
             break;
         case GE:
-            masm.setp_ge_f32(a, b);
+            masm.setp_ge_f32(a, b, p);
             break;
         default:
             throw GraalInternalError.shouldNotReachHere();
         }
     }
 
-    private static void emitCompareConstReg(PTXAssembler masm, Condition condition, double a, Register b) {
+    private static void emitCompareConstReg(PTXAssembler masm, Condition condition, double a, Register b, int p) {
         switch (condition) {
         case EQ:
-            masm.setp_eq_f64(a, b);
+            masm.setp_eq_f64(a, b, p);
             break;
         case NE:
-            masm.setp_ne_f64(a, b);
+            masm.setp_ne_f64(a, b, p);
             break;
         case LT:
-            masm.setp_lt_f64(a, b);
+            masm.setp_lt_f64(a, b, p);
             break;
         case LE:
-            masm.setp_le_f64(a, b);
+            masm.setp_le_f64(a, b, p);
             break;
         case GT:
-            masm.setp_gt_f64(a, b);
+            masm.setp_gt_f64(a, b, p);
             break;
         case GE:
-            masm.setp_ge_f64(a, b);
+            masm.setp_ge_f64(a, b, p);
             break;
         default:
             throw GraalInternalError.shouldNotReachHere();
         }
     }
 
-    private static void emitCompareConstReg(PTXAssembler masm, Condition condition, int a, Register b) {
+    private static void emitCompareConstReg(PTXAssembler masm, Condition condition, int a, Register b, int p) {
         switch (condition) {
             case EQ:
-                masm.setp_eq_s32(a, b);
+                masm.setp_eq_s32(a, b, p);
                 break;
             case NE:
-                masm.setp_ne_s32(a, b);
+                masm.setp_ne_s32(a, b, p);
                 break;
             case LT:
-                masm.setp_lt_s32(a, b);
+                masm.setp_lt_s32(a, b, p);
                 break;
             case LE:
-                masm.setp_le_s32(a, b);
+                masm.setp_le_s32(a, b, p);
                 break;
             case GT:
-                masm.setp_gt_s32(a, b);
+                masm.setp_gt_s32(a, b, p);
                 break;
             case GE:
-                masm.setp_ge_s32(a, b);
+                masm.setp_ge_s32(a, b, p);
                 break;
             case AT:
-                masm.setp_gt_u32(a, b);
+                masm.setp_gt_u32(a, b, p);
                 break;
             case AE:
-                masm.setp_ge_u32(a, b);
+                masm.setp_ge_u32(a, b, p);
                 break;
             case BT:
-                masm.setp_lt_u32(a, b);
+                masm.setp_lt_u32(a, b, p);
                 break;
             case BE:
-                masm.setp_le_u32(a, b);
+                masm.setp_le_u32(a, b, p);
                 break;
             default:
                 throw GraalInternalError.shouldNotReachHere();
         }
     }
 
-    private static void emitCompareRegConst(PTXAssembler masm, Condition condition, Register a, int b) {
+    private static void emitCompareRegConst(PTXAssembler masm, Condition condition, Register a, int b, int p) {
         switch (condition) {
             case EQ:
-                masm.setp_eq_s32(a, b);
+                masm.setp_eq_s32(a, b, p);
                 break;
             case NE:
-                masm.setp_ne_s32(a, b);
+                masm.setp_ne_s32(a, b, p);
                 break;
             case LT:
-                masm.setp_lt_s32(a, b);
+                masm.setp_lt_s32(a, b, p);
                 break;
             case LE:
-                masm.setp_le_s32(a, b);
+                masm.setp_le_s32(a, b, p);
                 break;
             case GT:
-                masm.setp_gt_s32(a, b);
+                masm.setp_gt_s32(a, b, p);
                 break;
             case GE:
-                masm.setp_ge_s32(a, b);
+                masm.setp_ge_s32(a, b, p);
                 break;
             case AT:
-                masm.setp_gt_u32(a, b);
+                masm.setp_gt_u32(a, b, p);
                 break;
             case AE:
-                masm.setp_ge_u32(a, b);
+                masm.setp_ge_u32(a, b, p);
                 break;
             case BT:
-                masm.setp_lt_u32(a, b);
+                masm.setp_lt_u32(a, b, p);
                 break;
             case BE:
-                masm.setp_le_u32(a, b);
+                masm.setp_le_u32(a, b, p);
                 break;
             default:
                 throw GraalInternalError.shouldNotReachHere();
         }
     }
 
-    private static void emitCompareRegReg(PTXAssembler masm, Condition condition, Register a, Register b) {
+    private static void emitCompareRegReg(PTXAssembler masm, Condition condition, Register a, Register b, int p) {
         switch (condition) {
             case EQ:
-                masm.setp_eq_s32(a, b);
+                masm.setp_eq_s32(a, b, p);
                 break;
             case NE:
-                masm.setp_ne_s32(a, b);
+                masm.setp_ne_s32(a, b, p);
                 break;
             case LT:
-                masm.setp_lt_s32(a, b);
+                masm.setp_lt_s32(a, b, p);
                 break;
             case LE:
-                masm.setp_le_s32(a, b);
+                masm.setp_le_s32(a, b, p);
                 break;
             case GT:
-                masm.setp_gt_s32(a, b);
+                masm.setp_gt_s32(a, b, p);
                 break;
             case GE:
-                masm.setp_ge_s32(a, b);
+                masm.setp_ge_s32(a, b, p);
                 break;
             case AT:
-                masm.setp_gt_u32(a, b);
+                masm.setp_gt_u32(a, b, p);
                 break;
             case AE:
-                masm.setp_ge_u32(a, b);
+                masm.setp_ge_u32(a, b, p);
                 break;
             case BT:
-                masm.setp_lt_u32(a, b);
+                masm.setp_lt_u32(a, b, p);
                 break;
             case BE:
-                masm.setp_le_u32(a, b);
+                masm.setp_le_u32(a, b, p);
                 break;
             default:
                 throw GraalInternalError.shouldNotReachHere();
--- a/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXControlFlow.java	Tue Sep 17 18:36:54 2013 -0700
+++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXControlFlow.java	Tue Sep 17 23:35:42 2013 -0400
@@ -72,16 +72,17 @@
 
         protected Condition condition;
         protected LabelRef destination;
+        protected int predRegNum;
 
-        public BranchOp(Condition condition, LabelRef destination) {
+        public BranchOp(Condition condition, LabelRef destination, int predReg) {
             this.condition = condition;
             this.destination = destination;
+            this.predRegNum = predReg;
         }
 
         @Override
         public void emitCode(TargetMethodAssembler tasm, PTXAssembler masm) {
-            masm.at();
-            masm.bra(masm.nameOf(destination.label()));
+            masm.bra(masm.nameOf(destination.label()), predRegNum);
         }
 
         @Override
@@ -151,14 +152,17 @@
         private LabelRef defaultTarget;
         @Alive({REG}) protected Value key;
         @Temp({REG, ILLEGAL}) protected Value scratch;
+        // Number of predicate register that would be set by this instruction.
+        protected int predRegNum;
 
-        public SequentialSwitchOp(Constant[] keyConstants, LabelRef[] keyTargets, LabelRef defaultTarget, Value key, Value scratch) {
+        public SequentialSwitchOp(Constant[] keyConstants, LabelRef[] keyTargets, LabelRef defaultTarget, Value key, Value scratch, int predReg) {
             assert keyConstants.length == keyTargets.length;
             this.keyConstants = keyConstants;
             this.keyTargets = keyTargets;
             this.defaultTarget = defaultTarget;
             this.key = key;
             this.scratch = scratch;
+            predRegNum = predReg;
         }
 
         @Override
@@ -171,25 +175,22 @@
                     }
                     long lc = keyConstants[i].asLong();
                     assert NumUtil.isInt(lc);
-                    masm.setp_eq_s32((int) lc, intKey);
-                    masm.at();
-                    masm.bra(masm.nameOf(keyTargets[i].label()));
+                    masm.setp_eq_s32((int) lc, intKey, predRegNum);
+                    masm.bra(masm.nameOf(keyTargets[i].label()), predRegNum);
                 }
             } else if (key.getKind() == Kind.Long) {
                 Register longKey = asLongReg(key);
                 for (int i = 0; i < keyConstants.length; i++) {
-                    masm.setp_eq_s64(tasm.asLongConst(keyConstants[i]), longKey);
-                    masm.at();
-                    masm.bra(masm.nameOf(keyTargets[i].label()));
+                    masm.setp_eq_s64(tasm.asLongConst(keyConstants[i]), longKey, predRegNum);
+                    masm.bra(masm.nameOf(keyTargets[i].label()), predRegNum);
                 }
             } else if (key.getKind() == Kind.Object) {
                 Register intKey = asObjectReg(key);
                 Register temp = asObjectReg(scratch);
                 for (int i = 0; i < keyConstants.length; i++) {
                     PTXMove.move(tasm, masm, temp.asValue(Kind.Object), keyConstants[i]);
-                    masm.setp_eq_u32(intKey, temp);
-                    masm.at();
-                    masm.bra(keyTargets[i].label().toString());
+                    masm.setp_eq_u32(intKey, temp, predRegNum);
+                    masm.bra(keyTargets[i].label().toString(), predRegNum);
                 }
             } else {
                 throw new GraalInternalError("sequential switch only supported for int, long and object");
@@ -219,38 +220,40 @@
         private final LabelRef[] targets;
         @Alive protected Value index;
         @Temp protected Value scratch;
+        // Number of predicate register that would be set by this instruction.
+        protected int predRegNum;
 
-        public TableSwitchOp(final int lowKey, final LabelRef defaultTarget, final LabelRef[] targets, Variable index, Variable scratch) {
+        public TableSwitchOp(final int lowKey, final LabelRef defaultTarget, final LabelRef[] targets, Variable index, Variable scratch, int predReg) {
             this.lowKey = lowKey;
             this.defaultTarget = defaultTarget;
             this.targets = targets;
             this.index = index;
             this.scratch = scratch;
+            predRegNum = predReg;
         }
 
         @Override
         public void emitCode(TargetMethodAssembler tasm, PTXAssembler masm) {
-            tableswitch(tasm, masm, lowKey, defaultTarget, targets, asIntReg(index), asLongReg(scratch));
+            tableswitch(tasm, masm, lowKey, defaultTarget, targets, asIntReg(index), asLongReg(scratch), predRegNum);
         }
     }
 
     @SuppressWarnings("unused")
-    private static void tableswitch(TargetMethodAssembler tasm, PTXAssembler masm, int lowKey, LabelRef defaultTarget, LabelRef[] targets, Register value, Register scratch) {
+    private static void tableswitch(TargetMethodAssembler tasm, PTXAssembler masm, int lowKey, LabelRef defaultTarget, LabelRef[] targets, Register value, Register scratch, int predNum) {
         Buffer buf = masm.codeBuffer;
         // Compare index against jump table bounds
         int highKey = lowKey + targets.length - 1;
         if (lowKey != 0) {
             // subtract the low value from the switch value
             // new Sub(value, value, lowKey).emit(masm);
-            masm.setp_gt_s32(value, highKey - lowKey);
+            masm.setp_gt_s32(value, highKey - lowKey, predNum);
         } else {
-            masm.setp_gt_s32(value, highKey);
+            masm.setp_gt_s32(value, highKey, predNum);
         }
 
         // Jump to default target if index is not within the jump table
         if (defaultTarget != null) {
-            masm.at();
-            masm.bra(defaultTarget.label().toString());
+            masm.bra(defaultTarget.label().toString(), predNum);
         }
 
         // address of jump table
--- a/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMemOp.java	Tue Sep 17 18:36:54 2013 -0700
+++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMemOp.java	Tue Sep 17 23:35:42 2013 -0400
@@ -34,7 +34,7 @@
 public class PTXMemOp {
 
     // Load operation from .global state space
-    @Opcode("LOAD")
+    @Opcode("LOAD_REGBASE_DISP")
     public static class LoadOp extends PTXLIRInstruction {
 
         private final Kind kind;
@@ -132,7 +132,7 @@
     }
 
     // Load operation from .param state space
-    @Opcode("LOAD")
+    @Opcode("LOAD_PARAM")
     public static class LoadParamOp extends PTXLIRInstruction {
 
         private final Kind kind;
@@ -240,7 +240,6 @@
         public void emitCode(TargetMethodAssembler tasm, PTXAssembler masm) {
             assert isRegister(input);
             PTXAddress addr = address.toAddress();
-            // masm.st_global_return_value_s64(addr.getBase(), addr.getDisplacement(), asRegister(input));
 
             switch (kind) {
                 case Byte:
--- a/graal/com.oracle.graal.ptx/src/com/oracle/graal/ptx/PTX.java	Tue Sep 17 18:36:54 2013 -0700
+++ b/graal/com.oracle.graal.ptx/src/com/oracle/graal/ptx/PTX.java	Tue Sep 17 23:35:42 2013 -0400
@@ -42,30 +42,6 @@
 
     // @formatter:off
 
-    /* Parameter State Space
-     *
-     * The parameter (.param) state space is used (1) to pass input
-     * arguments from the host to the kernel, (2a) to declare formal
-     * input and return parameters for device functions called from
-     * within kernel execution, and (2b) to declare locally-scoped
-     * byte array variables that serve as function call arguments,
-     * typically for passing large structures by value to a function.
-     *
-     * TODO: XXX
-     * The parameters are virtual symbols - just like registers. Bit,
-     * Till we figure out how to model a virtual register set in Graal,
-     * we will pretend that we can use only 8 parameters.
-    */
-
-    public static final Register param0  = new Register(0,  0,  "param0",  PARAM);
-    public static final Register param1  = new Register(1,  1,  "param1",  PARAM);
-    public static final Register param2  = new Register(2,  2,  "param2",  PARAM);
-    public static final Register param3  = new Register(3,  3,  "param3",  PARAM);
-    public static final Register param4  = new Register(4,  4,  "param4",  PARAM);
-    public static final Register param5  = new Register(5,  5,  "param5",  PARAM);
-    public static final Register param6  = new Register(6,  6,  "param6",  PARAM);
-    public static final Register param7  = new Register(7,  7,  "param7",  PARAM);
-
     /*
      * Register State Space
      *
@@ -85,36 +61,60 @@
      * will pretend that we can use only 16 registers.
      */
 
-    public static final Register r0  = new Register(8,   8,  "r0",  REG);
-    public static final Register r1  = new Register(9,   9,  "r1",  REG);
-    public static final Register r2  = new Register(10, 10,  "r2",  REG);
-    public static final Register r3  = new Register(11,  11,  "r3",  REG);
-    public static final Register r4  = new Register(12, 12,  "r4",  REG);
-    public static final Register r5  = new Register(13, 13,  "r5",  REG);
-    public static final Register r6  = new Register(14, 14,  "r6",  REG);
-    public static final Register r7  = new Register(15, 15,  "r7",  REG);
+    public static final Register r0  = new Register(0,   0,  "r0",  REG);
+    public static final Register r1  = new Register(1,   1,  "r1",  REG);
+    public static final Register r2  = new Register(2,   2,  "r2",  REG);
+    public static final Register r3  = new Register(3,   3,  "r3",  REG);
+    public static final Register r4  = new Register(4,   4,  "r4",  REG);
+    public static final Register r5  = new Register(5,   5,  "r5",  REG);
+    public static final Register r6  = new Register(6,   6,  "r6",  REG);
+    public static final Register r7  = new Register(7,   7,  "r7",  REG);
 
-    public static final Register r8  = new Register(16, 16,  "r8",  REG);
-    public static final Register r9  = new Register(17, 17,  "r9",  REG);
-    public static final Register r10 = new Register(18, 18, "r10", REG);
-    public static final Register r11 = new Register(19, 19, "r11", REG);
-    public static final Register r12 = new Register(20, 20, "r12", REG);
-    public static final Register r13 = new Register(21, 21, "r13", REG);
-    public static final Register r14 = new Register(22, 22, "r14", REG);
-    public static final Register r15 = new Register(23, 23, "r15", REG);
-
-    // Define a virtual register that holds return value
-    public static final Register retReg = new Register(24, 24, "retReg", REG);
+    public static final Register r8  = new Register(8,   8,  "r8",  REG);
+    public static final Register r9  = new Register(9,   9,  "r9",  REG);
+    public static final Register r10 = new Register(10, 10, "r10", REG);
+    public static final Register r11 = new Register(11, 11, "r11", REG);
+    public static final Register r12 = new Register(12, 12, "r12", REG);
+    public static final Register r13 = new Register(13, 13, "r13", REG);
+    public static final Register r14 = new Register(14, 14, "r14", REG);
+    public static final Register r15 = new Register(15, 15, "r15", REG);
 
     public static final Register[] gprRegisters = {
         r0,  r1,  r2,  r3,  r4,  r5,  r6,  r7,
         r8,  r9,  r10, r11, r12, r13, r14, r15
     };
 
+    /* Parameter State Space
+     *
+     * The parameter (.param) state space is used (1) to pass input
+     * arguments from the host to the kernel, (2a) to declare formal
+     * input and return parameters for device functions called from
+     * within kernel execution, and (2b) to declare locally-scoped
+     * byte array variables that serve as function call arguments,
+     * typically for passing large structures by value to a function.
+     *
+     * TODO: XXX
+     * The parameters are virtual symbols - just like registers. Bit,
+     * Till we figure out how to model a virtual register set in Graal,
+     * we will pretend that we can use only 8 parameters.
+    */
+
+    public static final Register param0  = new Register(16, 16,  "param0",  PARAM);
+    public static final Register param1  = new Register(17, 17,  "param1",  PARAM);
+    public static final Register param2  = new Register(18, 18,  "param2",  PARAM);
+    public static final Register param3  = new Register(19, 19,  "param3",  PARAM);
+    public static final Register param4  = new Register(20, 20,  "param4",  PARAM);
+    public static final Register param5  = new Register(21, 21,  "param5",  PARAM);
+    public static final Register param6  = new Register(22, 22,  "param6",  PARAM);
+    public static final Register param7  = new Register(23, 23,  "param7",  PARAM);
+
     public static final Register[] paramRegisters = {
         param0,  param1,  param2,  param3,  param4,  param5,  param6,  param7
     };
 
+    // Define a virtual register that holds return value
+    public static final Register retReg = new Register(24, 24, "retReg", REG);
+
     // PTX ISA Manual: Section 9:. Special Registers
 
     // PTX includes a number of predefined, read-only variables, which
--- a/src/share/vm/graal/graalVMToCompiler.cpp	Tue Sep 17 18:36:54 2013 -0700
+++ b/src/share/vm/graal/graalVMToCompiler.cpp	Tue Sep 17 23:35:42 2013 -0400
@@ -62,15 +62,13 @@
   if (JNIHandles::resolve(_graalRuntimePermObject) == NULL) {
 #ifdef AMD64
     Symbol* name = NULL;
-    if (UseGPU) {
-      // Set name to PTXHotSpotRuntime if nVidia GPU was detected.
-      if ((gpu::get_target_il_type() == gpu::PTX) &&
-          gpu::is_available() && gpu::has_gpu_linkage()) {
-        name = vmSymbols::com_oracle_graal_hotspot_ptx_PTXHotSpotGraalRuntime();
-      }
-      // Set name to corresponding runtime classname for other
-      // supported GPU runtimes, here.
-    } else {
+    // Set name to PTXHotSpotRuntime if nVidia GPU was detected.
+    if (UseGPU && (gpu::get_target_il_type() == gpu::PTX) &&
+        gpu::is_available() && gpu::has_gpu_linkage()) {
+      name = vmSymbols::com_oracle_graal_hotspot_ptx_PTXHotSpotGraalRuntime();
+    }
+    
+    if (name == NULL) {
       name = vmSymbols::com_oracle_graal_hotspot_amd64_AMD64HotSpotGraalRuntime();
     }
 #endif