changeset 13963:dff4ff4d40c8

Merge.
author Thomas Wuerthinger <thomas.wuerthinger@oracle.com>
date Thu, 06 Feb 2014 14:50:45 +0100
parents 258a09b6449b (current diff) bc471f405eb8 (diff)
children 8df361535530
files
diffstat 59 files changed, 554 insertions(+), 189 deletions(-) [+]
line wrap: on
line diff
--- a/graal/com.oracle.graal.asm.hsail/src/com/oracle/graal/asm/hsail/HSAILAssembler.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.graal.asm.hsail/src/com/oracle/graal/asm/hsail/HSAILAssembler.java	Thu Feb 06 14:50:45 2014 +0100
@@ -120,7 +120,13 @@
     }
 
     private void emitAddrOp(String instr, Value reg, HSAILAddress addr) {
-        emitString(instr + " " + HSAIL.mapRegister(reg) + ", " + mapAddress(addr) + ";");
+        String storeValue = null;
+        if (reg instanceof RegisterValue) {
+            storeValue = HSAIL.mapRegister(reg);
+        } else if (reg instanceof Constant) {
+            storeValue = ((Constant) reg).asBoxedValue().toString();
+        }
+        emitString(instr + " " + storeValue + ", " + mapAddress(addr) + ";");
     }
 
     /**
@@ -160,10 +166,31 @@
         emitAddrOp("st_global_" + argTypeStr, dest, addr);
     }
 
+    private void storeImmediateImpl(String storeType, String value, HSAILAddress addr) {
+        emitString("st_global_" + storeType + " " + value + ", " + mapAddress(addr) + ";");
+    }
+
+    public final void emitStoreImmediate(Kind kind, long src, HSAILAddress addr) {
+        assert (kind != Kind.Float && kind != Kind.Double);
+        storeImmediateImpl(getArgTypeFromKind(kind), Long.toString(src), addr);
+    }
+
+    public final void emitStoreImmediate(float src, HSAILAddress addr) {
+        storeImmediateImpl("f32", Float.toString(src), addr);
+    }
+
+    public final void emitStoreImmediate(double src, HSAILAddress addr) {
+        storeImmediateImpl("f64", Double.toString(src), addr);
+    }
+
     public final void emitSpillLoad(Value dest, Value src) {
         emitString("ld_spill_" + getArgType(dest) + " " + HSAIL.mapRegister(dest) + ", " + mapStackSlot(src, getArgSize(dest)) + ";");
     }
 
+    public final void emitStore(Value src, HSAILAddress addr) {
+        emitString("st_global_" + getArgType(src) + " " + HSAIL.mapRegister(src) + ", " + mapAddress(addr) + ";");
+    }
+
     public final void emitSpillStore(Value src, Value dest) {
         int sizestored = getArgSize(src);
         if (maxDataTypeSize < sizestored) {
@@ -206,7 +233,7 @@
         }
     }
 
-    public static final String getArgType(Value src) {
+    private static String getArgType(Value src) {
         return getArgTypeFromKind(src.getKind());
     }
 
@@ -237,6 +264,9 @@
             case Byte:
                 prefix = "s8";
                 break;
+            case NarrowOop:
+                prefix = "u32";
+                break;
             default:
                 throw GraalInternalError.shouldNotReachHere();
         }
@@ -506,4 +536,8 @@
     public void emitComment(String comment) {
         emitString(comment);
     }
+
+    public void emitStoreRelease(Value src, HSAILAddress address) {
+        emitAddrOp("st_global_rel_u" + getArgSize(src), src, address);
+    }
 }
--- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotLIRGenerator.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotLIRGenerator.java	Thu Feb 06 14:50:45 2014 +0100
@@ -26,6 +26,8 @@
 import sun.misc.*;
 
 import com.oracle.graal.api.code.*;
+import static com.oracle.graal.api.code.ValueUtil.asConstant;
+import static com.oracle.graal.api.code.ValueUtil.isConstant;
 import com.oracle.graal.api.meta.*;
 import com.oracle.graal.compiler.hsail.*;
 import com.oracle.graal.hotspot.*;
@@ -33,11 +35,12 @@
 import com.oracle.graal.lir.hsail.*;
 import com.oracle.graal.lir.hsail.HSAILControlFlow.*;
 import com.oracle.graal.lir.hsail.HSAILMove.*;
+import com.oracle.graal.phases.util.*;
 import com.oracle.graal.nodes.*;
 import com.oracle.graal.nodes.calc.*;
 import com.oracle.graal.nodes.extended.*;
 import com.oracle.graal.nodes.java.*;
-import com.oracle.graal.phases.util.*;
+import com.oracle.graal.graph.*;
 
 /**
  * The HotSpot specific portion of the HSAIL LIR generator.
@@ -79,6 +82,11 @@
         return access != null && access.isCompressible();
     }
 
+    @Override
+    public boolean canStoreConstant(Constant c, boolean isCompressed) {
+        return true;
+    }
+
     /**
      * Appends either a {@link CompareAndSwapOp} or a {@link CompareAndSwapCompressedOp} depending
      * on whether the memory location of a given {@link LoweredCompareAndSwapNode} contains a
@@ -114,6 +122,13 @@
         setResult(node, nodeResult);
     }
 
+    /**
+     * Returns whether or not the input access should be (de)compressed.
+     */
+    private boolean isCompressedOperation(Kind kind, Access access) {
+        return access != null && access.isCompressible() && ((kind == Kind.Long && config.useCompressedClassPointers) || (kind == Kind.Object && config.useCompressedOops));
+    }
+
     @Override
     public Variable emitLoad(Kind kind, Value address, Access access) {
         HSAILAddressValue loadAddress = asAddressValue(address);
@@ -141,6 +156,23 @@
         if (access instanceof DeoptimizingNode) {
             state = state((DeoptimizingNode) access);
         }
+        boolean isCompressed = isCompressedOperation(kind, access);
+        if (isConstant(inputVal)) {
+            Constant c = asConstant(inputVal);
+            if (canStoreConstant(c, isCompressed)) {
+                if (isCompressed) {
+                    if ((c.getKind() == Kind.Object) && c.isNull()) {
+                        append(new StoreConstantOp(Kind.NarrowOop, storeAddress, c, state));
+                    } else {
+                        throw GraalInternalError.shouldNotReachHere("can't handle: " + access);
+                    }
+                    return;
+                } else {
+                    append(new StoreConstantOp(kind, storeAddress, c, state));
+                    return;
+                }
+            }
+        }
         Variable input = load(inputVal);
         if (isCompressCandidate(access) && config.useCompressedOops && kind == Kind.Object) {
             Variable scratch = newVariable(Kind.Long);
--- a/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotBackend.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotBackend.java	Thu Feb 06 14:50:45 2014 +0100
@@ -141,7 +141,7 @@
     }
 
     /**
-     * Gets the address of {@code gpu::Ptx::execute_kernel_from_vm()}.
+     * Gets the address of {@code Ptx::execute_kernel_from_vm()}.
      */
     private static native long getLaunchKernelAddress();
 
--- a/graal/com.oracle.graal.lir.hsail/src/com/oracle/graal/lir/hsail/HSAILMove.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.graal.lir.hsail/src/com/oracle/graal/lir/hsail/HSAILMove.java	Thu Feb 06 14:50:45 2014 +0100
@@ -200,6 +200,56 @@
         }
     }
 
+    public static class StoreConstantOp extends MemOp {
+
+        protected final Constant input;
+
+        public StoreConstantOp(Kind kind, HSAILAddressValue address, Constant input, LIRFrameState state) {
+            super(kind, address, state);
+            this.input = input;
+        }
+
+        @Override
+        public void emitMemAccess(HSAILAssembler masm) {
+            switch (kind) {
+                case Boolean:
+                case Byte:
+                    masm.emitStoreImmediate(kind, input.asLong() & 0xFF, address.toAddress());
+                    break;
+                case Char:
+                case Short:
+                    masm.emitStoreImmediate(kind, input.asLong() & 0xFFFF, address.toAddress());
+                    break;
+                case Int:
+                case Long:
+                    masm.emitStoreImmediate(kind, input.asLong(), address.toAddress());
+                    break;
+                case Float:
+                    masm.emitStoreImmediate(input.asFloat(), address.toAddress());
+                    break;
+                case Double:
+                    masm.emitStoreImmediate(input.asDouble(), address.toAddress());
+                    break;
+                case Object:
+                    if (input.isNull()) {
+                        masm.emitStoreImmediate(kind, 0L, address.toAddress());
+                    } else {
+                        throw GraalInternalError.shouldNotReachHere("Cannot store 64-bit constants to object ref");
+                    }
+                    break;
+                case NarrowOop:
+                    if (input.isNull()) {
+                        masm.emitStoreImmediate(kind, 0L, address.toAddress());
+                    } else {
+                        throw GraalInternalError.shouldNotReachHere("Cannot store 64-bit constants to object ref");
+                    }
+                    break;
+                default:
+                    throw GraalInternalError.shouldNotReachHere();
+            }
+        }
+    }
+
     public static class LoadCompressedPointer extends LoadOp {
 
         private final long base;
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/ConstantNode.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/ConstantNode.java	Thu Feb 06 14:50:45 2014 +0100
@@ -70,7 +70,7 @@
      * Used to measure the impact of ConstantNodes not recording their usages. This and all code
      * predicated on this value being true will be removed at some point.
      */
-    public static final boolean ConstantNodeRecordsUsages = Boolean.getBoolean("graal.constantNodeRecordsUsages");
+    public static final boolean ConstantNodeRecordsUsages = Boolean.parseBoolean(System.getProperty("graal.constantNodeRecordsUsages", "true"));
 
     @Override
     public boolean recordsUsages() {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.truffle.sl.test/tests/ControlFlow.output	Thu Feb 06 14:50:45 2014 +0100
@@ -0,0 +1,1 @@
+1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.truffle.sl.test/tests/ControlFlow.sl	Thu Feb 06 14:50:45 2014 +0100
@@ -0,0 +1,10 @@
+function foo() {}
+function bar() {}
+
+function main() {  
+  foo();
+  if (1 < 2) {
+    bar();
+    return 1;
+  }
+}  
--- a/graal/com.oracle.truffle.sl.test/tests/Fibonacci.output	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl.test/tests/Fibonacci.output	Thu Feb 06 14:50:45 2014 +0100
@@ -1,1 +1,10 @@
-267914296
+1: 1
+2: 1
+3: 2
+4: 3
+5: 5
+6: 8
+7: 13
+8: 21
+9: 34
+10: 55
--- a/graal/com.oracle.truffle.sl.test/tests/Fibonacci.sl	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl.test/tests/Fibonacci.sl	Thu Feb 06 14:50:45 2014 +0100
@@ -13,5 +13,9 @@
 }
 
 function main() {  
-  println(fib(42));
+  i = 1;
+  while (i <= 10) {
+    println(i + ": " + fib(i));
+    i = i + 1;
+  }
 }  
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.truffle.sl.test/tests/FunctionLiteral.output	Thu Feb 06 14:50:45 2014 +0100
@@ -0,0 +1,2 @@
+42
+38
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.truffle.sl.test/tests/FunctionLiteral.sl	Thu Feb 06 14:50:45 2014 +0100
@@ -0,0 +1,16 @@
+function add(a, b) {
+  return a + b;
+}
+
+function sub(a, b) {
+  return a - b;
+}
+
+function foo(f) {
+  println(f(40, 2));
+}
+
+function main() {
+  foo(add);
+  foo(sub);
+}  
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.truffle.sl.test/tests/HelloWorld.output	Thu Feb 06 14:50:45 2014 +0100
@@ -0,0 +1,1 @@
+Hello World!
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.truffle.sl.test/tests/HelloWorld.sl	Thu Feb 06 14:50:45 2014 +0100
@@ -0,0 +1,3 @@
+function main() {  
+  println("Hello World!");  
+}  
--- a/graal/com.oracle.truffle.sl.test/tests/LoopCall.sl	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl.test/tests/LoopCall.sl	Thu Feb 06 14:50:45 2014 +0100
@@ -3,13 +3,18 @@
 }
 
 function loop(n) {
-  i = 0;  
+  i = 0;
   while (i < n) {  
     i = add(i, 1);  
-  }  
+  }
   return i;
-}  
+}
 
 function main() {
+  i = 0;
+  while (i < 20) {
+    loop(1000);
+    i = i + 1;
+  }
   println(loop(1000));  
-}  
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.truffle.sl.test/tests/LoopInvalidate.output	Thu Feb 06 14:50:45 2014 +0100
@@ -0,0 +1,1 @@
+1000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.truffle.sl.test/tests/LoopInvalidate.sl	Thu Feb 06 14:50:45 2014 +0100
@@ -0,0 +1,21 @@
+function add(a, b) {
+  return a + b;
+}
+
+function loop(n) {
+  i = 0;  
+  while (i < n) {  
+    i = add(i, 1);  
+  }  
+  return i;
+}  
+
+function main() {
+  i = 0;
+  while (i < 20) {
+    loop(1000);
+    i = i + 1;
+  }
+  add("a", "b");
+  println(loop(1000));  
+}  
--- a/graal/com.oracle.truffle.sl.test/tests/LoopPolymorphic.sl	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl.test/tests/LoopPolymorphic.sl	Thu Feb 06 14:50:45 2014 +0100
@@ -3,14 +3,20 @@
 }
 
 function loop(n) {
-  i = 0;  
-  while (i < n) {  
-    i = add(i, 1);  
-  }  
+  i = 0;
+  while (i < n) {
+    i = add(i, 1); 
+  }
   return i;
-}  
+}
 
 function main() {
   add("a", "b");
+
+  i = 0;
+  while (i < 20) {
+    loop(1000);
+    i = i + 1;
+  }
   println(loop(1000));  
-}  
+}
--- a/graal/com.oracle.truffle.sl.test/tests/LoopPrint.sl	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl.test/tests/LoopPrint.sl	Thu Feb 06 14:50:45 2014 +0100
@@ -7,5 +7,10 @@
 }  
 
 function main() {
+  i = 0;
+  while (i < 20) {
+    loop(1000);
+    i = i + 1;
+  }
   println(loop(1000));  
 }  
--- a/graal/com.oracle.truffle.sl.test/tests/String.output	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl.test/tests/String.output	Thu Feb 06 14:50:45 2014 +0100
@@ -6,3 +6,5 @@
 nulls
 bars
 foos
+2 < 4: true
+Type error at String.sl line 9 col 36: operation "<" not defined for Number 2, String "4"
--- a/graal/com.oracle.truffle.sl.test/tests/String.sl	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl.test/tests/String.sl	Thu Feb 06 14:50:45 2014 +0100
@@ -5,6 +5,10 @@
   return "bar";
 }
 
+function f(a, b) {
+  return a + " < " + b + ": " + (a < b);
+}
+
 function main() {  
   println("s" + null());  
   println("s" + null);  
@@ -15,4 +19,7 @@
   println(null() + "s");  
   println(foo() + "s");  
   println(foo + "s");
+
+  println(f(2, 4));
+  println(f(2, "4"));
 }  
--- a/graal/com.oracle.truffle.sl.test/tests/Sum.output	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl.test/tests/Sum.output	Thu Feb 06 14:50:45 2014 +0100
@@ -1,1 +1,1 @@
-100000000000
+50005000
--- a/graal/com.oracle.truffle.sl.test/tests/Sum.sl	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl.test/tests/Sum.sl	Thu Feb 06 14:50:45 2014 +0100
@@ -1,8 +1,8 @@
 function main() {  
   i = 0;  
   sum = 0;  
-  while (i < 100000) {  
-    sum = sum + 1000000;  
+  while (i <= 10000) {  
+    sum = sum + i;  
     i = i + 1;  
   }  
   return sum;  
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.truffle.sl.test/tests/error/TypeError03.output	Thu Feb 06 14:50:45 2014 +0100
@@ -0,0 +1,1 @@
+Type error at TypeError03.sl line 2 col 7: operation "&&" not defined for String "4", ANY
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.truffle.sl.test/tests/error/TypeError03.sl	Thu Feb 06 14:50:45 2014 +0100
@@ -0,0 +1,3 @@
+function main() {  
+  "4" && 4;  
+}  
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.truffle.sl.test/tests/error/TypeError04.output	Thu Feb 06 14:50:45 2014 +0100
@@ -0,0 +1,1 @@
+Type error at TypeError04.sl line 2 col 11: operation "||" not defined for Boolean false, Number 4
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.truffle.sl.test/tests/error/TypeError04.sl	Thu Feb 06 14:50:45 2014 +0100
@@ -0,0 +1,3 @@
+function main() {  
+  (1 > 2) || 4;  
+}  
--- a/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/SLMain.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/SLMain.java	Thu Feb 06 14:50:45 2014 +0100
@@ -183,8 +183,10 @@
     }
 
     /**
-     * Dumps the AST of all functions to the IGV visualizer, via a socket connection. IGV can be
-     * started with the mx command "mx igv". Optionally, also prints the ASTs to the console.
+     * When dumpASTToIGV is true: dumps the AST of all functions to the IGV visualizer, via a socket
+     * connection. IGV can be started with the mx command "mx igv".
+     * <p>
+     * When printASTToLog is true: prints the ASTs to the console.
      */
     private static void printScript(String groupName, SLContext context, PrintStream logOutput, boolean printASTToLog, boolean dumpASTToIGV) {
         if (dumpASTToIGV) {
--- a/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/builtins/SLDefineFunctionBuiltin.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/builtins/SLDefineFunctionBuiltin.java	Thu Feb 06 14:50:45 2014 +0100
@@ -22,10 +22,12 @@
  */
 package com.oracle.truffle.sl.builtins;
 
+import com.oracle.truffle.api.CompilerDirectives.SlowPath;
 import com.oracle.truffle.api.*;
 import com.oracle.truffle.api.dsl.*;
 import com.oracle.truffle.api.nodes.*;
 import com.oracle.truffle.sl.parser.*;
+import com.oracle.truffle.sl.runtime.*;
 
 /**
  * Builtin function to define (or redefine) functions. The provided source code is parsed the same
@@ -36,9 +38,14 @@
 
     @Specialization
     public String defineFunction(String code) {
-        Source source = getContext().getSourceManager().get("[defineFunction]", code);
+        return doDefineFunction(getContext(), code);
+    }
+
+    @SlowPath
+    private static String doDefineFunction(SLContext context, String code) {
+        Source source = context.getSourceManager().get("[defineFunction]", code);
         /* The same parsing code as for parsing the initial source. */
-        Parser.parseSL(getContext(), source);
+        Parser.parseSL(context, source);
         return code;
     }
 }
--- a/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/builtins/SLPrintlnBuiltin.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/builtins/SLPrintlnBuiltin.java	Thu Feb 06 14:50:45 2014 +0100
@@ -22,6 +22,9 @@
  */
 package com.oracle.truffle.sl.builtins;
 
+import java.io.*;
+
+import com.oracle.truffle.api.CompilerDirectives.SlowPath;
 import com.oracle.truffle.api.dsl.*;
 import com.oracle.truffle.api.nodes.*;
 import com.oracle.truffle.sl.runtime.*;
@@ -30,31 +33,55 @@
  * Builtin function to write a value to the {@link SLContext#getOutput() standard output}. The
  * different specialization leverage the typed {@code println} methods available in Java, i.e.,
  * primitive values are printed without converting them to a {@link String} first.
+ * <p>
+ * Printing involves a lot of Java code, so we need to tell the optimizing system that it should not
+ * unconditionally inline everything reachable from the println() method. This is done via the
+ * {@link SlowPath} annotations.
  */
 @NodeInfo(shortName = "println")
 public abstract class SLPrintlnBuiltin extends SLBuiltinNode {
 
     @Specialization
     public long println(long value) {
-        getContext().getOutput().println(value);
+        doPrint(getContext().getOutput(), value);
         return value;
     }
 
+    @SlowPath
+    private static void doPrint(PrintStream out, long value) {
+        out.println(value);
+    }
+
     @Specialization
     public boolean println(boolean value) {
-        getContext().getOutput().println(value);
+        doPrint(getContext().getOutput(), value);
         return value;
     }
 
+    @SlowPath
+    private static void doPrint(PrintStream out, boolean value) {
+        out.println(value);
+    }
+
     @Specialization
     public String println(String value) {
-        getContext().getOutput().println(value);
+        doPrint(getContext().getOutput(), value);
         return value;
     }
 
+    @SlowPath
+    private static void doPrint(PrintStream out, String value) {
+        out.println(value);
+    }
+
     @Specialization
     public Object println(Object value) {
-        getContext().getOutput().println(value);
+        doPrint(getContext().getOutput(), value);
         return value;
     }
+
+    @SlowPath
+    private static void doPrint(PrintStream out, Object value) {
+        out.println(value);
+    }
 }
--- a/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/builtins/SLReadlnBuiltin.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/builtins/SLReadlnBuiltin.java	Thu Feb 06 14:50:45 2014 +0100
@@ -24,6 +24,7 @@
 
 import java.io.*;
 
+import com.oracle.truffle.api.CompilerDirectives.SlowPath;
 import com.oracle.truffle.api.dsl.*;
 import com.oracle.truffle.api.nodes.*;
 import com.oracle.truffle.sl.*;
@@ -37,17 +38,22 @@
 
     @Specialization
     public String readln() {
+        String result = doRead(getContext().getInput());
+        if (result == null) {
+            /*
+             * We do not have a sophisticated end of file handling, so returning an empty string is
+             * a reasonable alternative. Note that the Java null value should never be used, since
+             * it can interfere with the specialization logic in generated source code.
+             */
+            result = "";
+        }
+        return result;
+    }
+
+    @SlowPath
+    private static String doRead(BufferedReader in) {
         try {
-            String result = getContext().getInput().readLine();
-            if (result == null) {
-                /*
-                 * We do not have a sophisticated end of file handling, so returning an empty string
-                 * is a reasonable alternative. Note that the Java null value should never be used,
-                 * since it can interfere with the specialization logic in generated source code.
-                 */
-                result = "";
-            }
-            return result;
+            return in.readLine();
         } catch (IOException ex) {
             throw new SLException(ex.getMessage());
         }
--- a/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/SLBinaryNode.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/SLBinaryNode.java	Thu Feb 06 14:50:45 2014 +0100
@@ -26,8 +26,8 @@
 
 /**
  * Utility base class for operations that take two arguments (per convention called "left" and
- * "right). For concrete subclasses of this class, the Truffle DSL creates two child fields, and the
- * necessary constructors and logic to set them.
+ * "right"). For concrete subclasses of this class, the Truffle DSL creates two child fields, and
+ * the necessary constructors and logic to set them.
  */
 @NodeChildren({@NodeChild("leftNode"), @NodeChild("rightNode")})
 public abstract class SLBinaryNode extends SLExpressionNode {
--- a/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/SLExpressionNode.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/SLExpressionNode.java	Thu Feb 06 14:50:45 2014 +0100
@@ -58,10 +58,6 @@
      * subclasses overwrite the appropriate methods.
      */
 
-    public boolean executeBoolean(VirtualFrame frame) throws UnexpectedResultException {
-        return SLTypesGen.SLTYPES.expectBoolean(executeGeneric(frame));
-    }
-
     public long executeLong(VirtualFrame frame) throws UnexpectedResultException {
         return SLTypesGen.SLTYPES.expectLong(executeGeneric(frame));
     }
@@ -70,6 +66,10 @@
         return SLTypesGen.SLTYPES.expectBigInteger(executeGeneric(frame));
     }
 
+    public boolean executeBoolean(VirtualFrame frame) throws UnexpectedResultException {
+        return SLTypesGen.SLTYPES.expectBoolean(executeGeneric(frame));
+    }
+
     public String executeString(VirtualFrame frame) throws UnexpectedResultException {
         return SLTypesGen.SLTYPES.expectString(executeGeneric(frame));
     }
--- a/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/SLTypes.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/SLTypes.java	Thu Feb 06 14:50:45 2014 +0100
@@ -29,10 +29,10 @@
 import com.oracle.truffle.sl.runtime.*;
 
 /**
- * The type system of SL, as explained in {@link SLMain}. Based on the annotation {@link TypeSystem}
- * , the Truffle DSL generates the subclass {@link SLTypesGen} with type test and type conversion
- * methods for all types. In this class, we only cover types where the automatically generated ones
- * would not be sufficient.
+ * The type system of SL, as explained in {@link SLMain}. Based on the {@link TypeSystem}
+ * annotation, the Truffle DSL generates the subclass {@link SLTypesGen} with type test and type
+ * conversion methods for all types. In this class, we only cover types where the automatically
+ * generated ones would not be sufficient.
  */
 @TypeSystem({long.class, BigInteger.class, boolean.class, String.class, SLFunction.class, SLNull.class})
 public abstract class SLTypes {
--- a/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/call/SLCallNode.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/call/SLCallNode.java	Thu Feb 06 14:50:45 2014 +0100
@@ -22,6 +22,7 @@
  */
 package com.oracle.truffle.sl.nodes.call;
 
+import com.oracle.truffle.api.*;
 import com.oracle.truffle.api.dsl.*;
 import com.oracle.truffle.api.frame.*;
 import com.oracle.truffle.api.nodes.*;
@@ -57,6 +58,14 @@
     public Object executeGeneric(VirtualFrame frame) {
         SLFunction function = evaluateFunction(frame);
 
+        /*
+         * The number of arguments is constant for one call node. During compilation, the loop is
+         * unrolled and the execute methods of all arguments are inlined. This is triggered by the
+         * ExplodeLoop annotation on the method. The compiler assertion below illustrates that the
+         * array length is really constant.
+         */
+        CompilerAsserts.compilationConstant(argumentNodes.length);
+
         Object[] argumentValues = new Object[argumentNodes.length];
         for (int i = 0; i < argumentNodes.length; i++) {
             argumentValues[i] = argumentNodes[i].executeGeneric(frame);
--- a/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/controlflow/SLBlockNode.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/controlflow/SLBlockNode.java	Thu Feb 06 14:50:45 2014 +0100
@@ -22,6 +22,7 @@
  */
 package com.oracle.truffle.sl.nodes.controlflow;
 
+import com.oracle.truffle.api.*;
 import com.oracle.truffle.api.frame.*;
 import com.oracle.truffle.api.nodes.*;
 import com.oracle.truffle.sl.nodes.*;
@@ -30,7 +31,7 @@
  * A statement node that just executes a list of other statements.
  */
 @NodeInfo(shortName = "block")
-public class SLBlockNode extends SLStatementNode {
+public final class SLBlockNode extends SLStatementNode {
 
     /**
      * The array of child nodes. The annotation {@link com.oracle.truffle.api.nodes.Node.Children
@@ -55,6 +56,12 @@
     @Override
     @ExplodeLoop
     public void executeVoid(VirtualFrame frame) {
+        /*
+         * This assertion illustrates that the arryay length is really a constant during
+         * compilation.
+         */
+        CompilerAsserts.compilationConstant(bodyNodes.length);
+
         for (SLStatementNode statement : bodyNodes) {
             statement.executeVoid(frame);
         }
--- a/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/controlflow/SLFunctionBodyNode.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/controlflow/SLFunctionBodyNode.java	Thu Feb 06 14:50:45 2014 +0100
@@ -36,7 +36,7 @@
  * {@link SLNull#SINGLETON default null value}.
  */
 @NodeInfo(shortName = "body")
-public class SLFunctionBodyNode extends SLExpressionNode {
+public final class SLFunctionBodyNode extends SLExpressionNode {
 
     /** The body of the function. */
     @Child private SLStatementNode bodyNode;
--- a/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/controlflow/SLIfNode.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/controlflow/SLIfNode.java	Thu Feb 06 14:50:45 2014 +0100
@@ -29,7 +29,7 @@
 import com.oracle.truffle.sl.nodes.*;
 
 @NodeInfo(shortName = "if")
-public class SLIfNode extends SLStatementNode {
+public final class SLIfNode extends SLStatementNode {
 
     /**
      * The condition of the {@code if}. This in a {@link SLExpressionNode} because we require a
--- a/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/controlflow/SLReturnNode.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/controlflow/SLReturnNode.java	Thu Feb 06 14:50:45 2014 +0100
@@ -35,7 +35,7 @@
  * the return value.
  */
 @NodeInfo(shortName = "return")
-public class SLReturnNode extends SLStatementNode {
+public final class SLReturnNode extends SLStatementNode {
 
     @Child private SLExpressionNode valueNode;
 
--- a/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/controlflow/SLWhileNode.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/controlflow/SLWhileNode.java	Thu Feb 06 14:50:45 2014 +0100
@@ -30,7 +30,7 @@
 import com.oracle.truffle.sl.nodes.*;
 
 @NodeInfo(shortName = "while")
-public class SLWhileNode extends SLStatementNode {
+public final class SLWhileNode extends SLStatementNode {
 
     /**
      * The condition of the loop. This in a {@link SLExpressionNode} because we require a result
--- a/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/expression/SLAddNode.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/expression/SLAddNode.java	Thu Feb 06 14:50:45 2014 +0100
@@ -78,16 +78,6 @@
     }
 
     /**
-     * Specialization for String concatenation. This specialization is not strictly necessary, since
-     * {@link #add(Object, Object)} covers this case too. But it leads to slightly better code,
-     * since we do not require the {@link Object#toString()} calls in this specialization.
-     */
-    @Specialization
-    protected String add(String left, String right) {
-        return left + right;
-    }
-
-    /**
      * Specialization for String concatenation. The SL specification says that String concatenation
      * works if either the left or the right operand is a String. The non-string operand is
      * converted then automatically converted to a String.
--- a/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/expression/SLDivNode.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/expression/SLDivNode.java	Thu Feb 06 14:50:45 2014 +0100
@@ -29,13 +29,16 @@
 import com.oracle.truffle.sl.nodes.*;
 
 /**
- * This class is similar to the extensively documented {@link SLAddNode}.
+ * This class is similar to the extensively documented {@link SLAddNode}. Divisions by 0 throw the
+ * same {@link ArithmeticException exception} as in Java, SL has no special handling for it to keep
+ * the code simple.
  */
 @NodeInfo(shortName = "/")
 public abstract class SLDivNode extends SLBinaryNode {
 
-    @Specialization(rewriteOn = ArithmeticException.class)
+    @Specialization
     protected long div(long left, long right) {
+        /* No overflow is possible on a division. */
         return left / right;
     }
 
--- a/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/expression/SLEqualNode.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/expression/SLEqualNode.java	Thu Feb 06 14:50:45 2014 +0100
@@ -30,7 +30,7 @@
 import com.oracle.truffle.sl.runtime.*;
 
 /**
- * The {@code ==} operator of SL is defined on all types. Therefore, we need a (@link
+ * The {@code ==} operator of SL is defined on all types. Therefore, we need a
  * {@link #equal(Object, Object) generic implementation} that can handle all possible types. But
  * since {@code ==} can only return {@code true} when the type of the left and right operand are the
  * same, the specializations already cover all possible cases that can return {@code true} and the
@@ -78,7 +78,7 @@
     }
 
     /**
-     * The {@link Generic} annotation informs that Truffle DSL that this method should be executed
+     * The {@link Generic} annotation informs the Truffle DSL that this method should be executed
      * when no {@link Specialization specialized method} matches. The operand types must be
      * {@link Object}.
      */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/expression/demo/SLAddWithoutSpecializationNode.java	Thu Feb 06 14:50:45 2014 +0100
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2014, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.truffle.sl.nodes.expression.demo;
+
+import java.math.*;
+
+import com.oracle.truffle.api.*;
+import com.oracle.truffle.api.dsl.*;
+import com.oracle.truffle.api.frame.*;
+import com.oracle.truffle.api.nodes.*;
+import com.oracle.truffle.sl.nodes.*;
+import com.oracle.truffle.sl.nodes.expression.*;
+
+/**
+ * This is an example how the add operation would be implemented without specializations and without
+ * the Truffle DSL. Do not write such code in your language! See {@link SLAddNode} how the add
+ * operation is implemented correctly.
+ */
+public class SLAddWithoutSpecializationNode extends SLExpressionNode {
+
+    @Child private SLExpressionNode leftNode;
+    @Child private SLExpressionNode rightNode;
+
+    public SLAddWithoutSpecializationNode(SLExpressionNode leftNode, SLExpressionNode rightNode) {
+        this.leftNode = adoptChild(leftNode);
+        this.rightNode = adoptChild(rightNode);
+    }
+
+    @Override
+    public Object executeGeneric(VirtualFrame frame) {
+        /* Evaluate the child nodes. */
+        Object left = leftNode.executeGeneric(frame);
+        Object right = rightNode.executeGeneric(frame);
+
+        if (left instanceof Long && right instanceof Long) {
+            /* Fast path of the arbitrary-precision arithmetic. We need to check for overflows */
+            try {
+                return ExactMath.addExact((Long) left, (Long) right);
+            } catch (ArithmeticException ex) {
+                /* Fall through to BigInteger case. */
+            }
+        }
+
+        /* Implicit type conversions. */
+        if (left instanceof Long) {
+            left = BigInteger.valueOf((Long) left);
+        }
+        if (right instanceof Long) {
+            right = BigInteger.valueOf((Long) right);
+        }
+        if (left instanceof BigInteger && right instanceof BigInteger) {
+            /* Slow path of the arbitrary-precision arithmetic. */
+            return ((BigInteger) left).add((BigInteger) right);
+        }
+
+        /* String concatenation if either the left or the right operand is a String. */
+        if (left instanceof String || right instanceof String) {
+            return left.toString() + right.toString();
+        }
+
+        /* Type error. */
+        throw new UnsupportedSpecializationException(this, new Node[]{leftNode, rightNode}, new Object[]{left, right});
+    }
+}
--- a/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/local/SLReadArgumentNode.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/local/SLReadArgumentNode.java	Thu Feb 06 14:50:45 2014 +0100
@@ -32,10 +32,10 @@
 /**
  * Reads a function argument. Arguments are passed in as a {@link SLArguments} object, which
  * encapsulates an {@link SLArguments#getFromFrame Object[] array}. Language-defined subclasses of
- * {@link Arguments} are the standard Truffle way to pass values between function.
+ * {@link Arguments} are the standard Truffle way to pass values between functions.
  * <p>
  * Arguments are not type-specialized. To ensure that repeated accesses within a method are
- * specialized and can, e.g., accessed without unboxing, all arguments are loaded into local
+ * specialized and can, e.g., be accessed without unboxing, all arguments are loaded into local
  * variables {@link SLNodeFactory#addFormalParameter in the method prologue}.
  */
 public class SLReadArgumentNode extends SLExpressionNode {
--- a/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/local/SLReadLocalVariableNode.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/local/SLReadLocalVariableNode.java	Thu Feb 06 14:50:45 2014 +0100
@@ -33,7 +33,6 @@
  * variable changes its type, the frame access method throws an {@link FrameSlotTypeException},
  * which causes not rewriting. The rewriting code is generated by the Truffle DSL.
  */
-@PolymorphicLimit(1)
 @NodeField(name = "slot", type = FrameSlot.class)
 public abstract class SLReadLocalVariableNode extends SLExpressionNode {
 
@@ -53,13 +52,17 @@
         return frame.getBoolean(getSlot());
     }
 
-    @Specialization(rewriteOn = {FrameSlotTypeException.class})
+    @Specialization(order = 1, rewriteOn = {FrameSlotTypeException.class})
     protected Object readObject(VirtualFrame frame) throws FrameSlotTypeException {
         return frame.getObject(getSlot());
     }
 
-    @Generic
-    protected Object readGeneric(VirtualFrame frame) {
+    /**
+     * This is the generic case that always succeeds. Since we already have another specialization
+     * with the same signature above, we need to order them explicitly with the order attribute.
+     */
+    @Specialization(order = 2)
+    protected Object read(VirtualFrame frame) {
         return frame.getValue(getSlot());
     }
 }
--- a/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/local/SLWriteLocalVariableNode.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/nodes/local/SLWriteLocalVariableNode.java	Thu Feb 06 14:50:45 2014 +0100
@@ -42,7 +42,7 @@
     protected abstract FrameSlot getSlot();
 
     /**
-     * Specialized method to write a primitive {@code long} value}. This is only possible if the
+     * Specialized method to write a primitive {@code long} value. This is only possible if the
      * local variable also has currently the type {@code long}, therefore a Truffle DSL
      * {@link #isLongKind() custom guard} is specified.
      */
--- a/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/parser/SimpleLanguage.atg	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/parser/SimpleLanguage.atg	Thu Feb 06 14:50:45 2014 +0100
@@ -21,6 +21,12 @@
  * questions.
  */
 
+/*
+ * This is the grammar of SL that is used to automatically generate the Parser.java and Scanner.java
+ * files. You can download the parser generator Coco/R from http://ssw.jku.at/coco/. Then run
+ * "java -jar Coco.jar SimpleLanguage.atg"
+ */
+
 COMPILER SimpleLanguage
 
 CHARACTERS
--- a/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/runtime/SLContext.java	Thu Feb 06 14:50:36 2014 +0100
+++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/runtime/SLContext.java	Thu Feb 06 14:50:45 2014 +0100
@@ -39,9 +39,9 @@
  * context is used during {@link SLNodeFactory parsing} and by {@link SLBuiltinNode#getContext()
  * builtin functions}.
  * <p>
- * It would be an error to have two different context instances at the same. From a software
- * engineering point of view, it is better to pass around this encapsulated context object instead
- * of storing the data in static Java fields.
+ * It would be an error to have two different context instances during the execution of one script.
+ * However, if two separate scripts run in one Java VM at the same time, they have a different
+ * context. Therefore, the context is not a singleton.
  */
 public final class SLContext {
     private final SourceManager sourceManager;
--- a/mx/projects	Thu Feb 06 14:50:36 2014 +0100
+++ b/mx/projects	Thu Feb 06 14:50:45 2014 +0100
@@ -24,15 +24,15 @@
 library@DACAPO_SCALA@path=lib/dacapo-scala-0.1.0-20120216.jar
 library@DACAPO_SCALA@urls=http://repo.scalabench.org/snapshots/org/scalabench/benchmarks/scala-benchmark-suite/0.1.0-SNAPSHOT/scala-benchmark-suite-0.1.0-20120216.103539-3.jar
 
-library@OKRA@path=lib/okra-1.6.jar
-library@OKRA@urls=http://cr.openjdk.java.net/~tdeneau/okra-1.6.jar
-library@OKRA@sourcePath=lib/okra-1.6-src.jar
-library@OKRA@sourceUrls=http://cr.openjdk.java.net/~tdeneau/okra-1.6-src.jar
+library@OKRA@path=lib/okra-1.7.jar
+library@OKRA@urls=http://cr.openjdk.java.net/~tdeneau/okra-1.7.jar
+library@OKRA@sourcePath=lib/okra-1.7-src.jar
+library@OKRA@sourceUrls=http://cr.openjdk.java.net/~tdeneau/okra-1.7-src.jar
 
-library@OKRA_WITH_SIM@path=lib/okra-1.6-with-sim.jar
-library@OKRA_WITH_SIM@urls=http://cr.openjdk.java.net/~tdeneau/okra-1.6-with-sim.jar
-library@OKRA_WITH_SIM@sourcePath=lib/okra-1.6-with-sim-src.jar
-library@OKRA_WITH_SIM@sourceUrls=http://cr.openjdk.java.net/~tdeneau/okra-1.6-with-sim-src.jar
+library@OKRA_WITH_SIM@path=lib/okra-1.7-with-sim.jar
+library@OKRA_WITH_SIM@urls=http://cr.openjdk.java.net/~tdeneau/okra-1.7-with-sim.jar
+library@OKRA_WITH_SIM@sourcePath=lib/okra-1.7-with-sim-src.jar
+library@OKRA_WITH_SIM@sourceUrls=http://cr.openjdk.java.net/~tdeneau/okra-1.7-with-sim-src.jar
 
 library@JRUBYPARSER@path=lib/jrubyparser-0.5.0.jar
 library@JRUBYPARSER@urls=http://repo1.maven.org/maven2/org/jruby/jrubyparser/0.5.0/jrubyparser-0.5.0.jar
--- a/src/gpu/hsail/vm/gpu_hsail.cpp	Thu Feb 06 14:50:36 2014 +0100
+++ b/src/gpu/hsail/vm/gpu_hsail.cpp	Thu Feb 06 14:50:45 2014 +0100
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "runtime/javaCalls.hpp"
 #include "runtime/gpu.hpp"
+#include "hsail/vm/gpu_hsail.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/ostream.hpp"
 #include "memory/allocation.hpp"
@@ -55,29 +56,29 @@
 
 //  public native void executeKernel(HotSpotNmethod kernel, int jobSize, int i, int j, Object[] args) throws InvalidInstalledCodeException;
 
-JNINativeMethod gpu::Hsail::HSAIL_methods[] = {
-  {CC"initialize",       CC"()Z",                               FN_PTR(gpu::Hsail::initialize)},
-  {CC"generateKernel",   CC"([B" STRING ")J",                   FN_PTR(gpu::Hsail::generate_kernel)},
-  {CC"executeKernel0",   CC"("HS_INSTALLED_CODE"I["OBJECT")Z",  FN_PTR(gpu::Hsail::execute_kernel_void_1d)},
+JNINativeMethod Hsail::HSAIL_methods[] = {
+  {CC"initialize",       CC"()Z",                               FN_PTR(Hsail::initialize)},
+  {CC"generateKernel",   CC"([B" STRING ")J",                   FN_PTR(Hsail::generate_kernel)},
+  {CC"executeKernel0",   CC"("HS_INSTALLED_CODE"I["OBJECT")Z",  FN_PTR(Hsail::execute_kernel_void_1d)},
 };
 
-void * gpu::Hsail::_device_context = NULL;
+void * Hsail::_device_context = NULL;
 
-gpu::Hsail::okra_create_context_func_t  gpu::Hsail::_okra_create_context;
-gpu::Hsail::okra_create_kernel_func_t   gpu::Hsail::_okra_create_kernel;
-gpu::Hsail::okra_push_object_func_t     gpu::Hsail::_okra_push_object;
-gpu::Hsail::okra_push_boolean_func_t    gpu::Hsail::_okra_push_boolean;
-gpu::Hsail::okra_push_byte_func_t       gpu::Hsail::_okra_push_byte;
-gpu::Hsail::okra_push_double_func_t     gpu::Hsail::_okra_push_double;
-gpu::Hsail::okra_push_float_func_t      gpu::Hsail::_okra_push_float;
-gpu::Hsail::okra_push_int_func_t        gpu::Hsail::_okra_push_int;
-gpu::Hsail::okra_push_long_func_t       gpu::Hsail::_okra_push_long;
-gpu::Hsail::okra_execute_with_range_func_t    gpu::Hsail::_okra_execute_with_range;
-gpu::Hsail::okra_clearargs_func_t       gpu::Hsail::_okra_clearargs;
-gpu::Hsail::okra_register_heap_func_t   gpu::Hsail::_okra_register_heap;
+Hsail::okra_create_context_func_t  Hsail::_okra_create_context;
+Hsail::okra_create_kernel_func_t   Hsail::_okra_create_kernel;
+Hsail::okra_push_object_func_t     Hsail::_okra_push_object;
+Hsail::okra_push_boolean_func_t    Hsail::_okra_push_boolean;
+Hsail::okra_push_byte_func_t       Hsail::_okra_push_byte;
+Hsail::okra_push_double_func_t     Hsail::_okra_push_double;
+Hsail::okra_push_float_func_t      Hsail::_okra_push_float;
+Hsail::okra_push_int_func_t        Hsail::_okra_push_int;
+Hsail::okra_push_long_func_t       Hsail::_okra_push_long;
+Hsail::okra_execute_with_range_func_t    Hsail::_okra_execute_with_range;
+Hsail::okra_clearargs_func_t       Hsail::_okra_clearargs;
+Hsail::okra_register_heap_func_t   Hsail::_okra_register_heap;
 
 
-void gpu::Hsail::register_heap() {
+void Hsail::register_heap() {
   // After the okra functions are set up and the heap is initialized, register the java heap with HSA
   guarantee(Universe::heap() != NULL, "heap should be there by now.");
   if (TraceGPUInteraction) {
@@ -87,7 +88,7 @@
   _okra_register_heap(Universe::heap()->base(), Universe::heap()->capacity());
 }
 
-GPU_VMENTRY(jboolean, gpu::Hsail::execute_kernel_void_1d, (JNIEnv* env, jclass, jobject kernel_handle, jint dimX, jobject args_handle))
+GPU_VMENTRY(jboolean, Hsail::execute_kernel_void_1d, (JNIEnv* env, jclass, jobject kernel_handle, jint dimX, jobject args_handle))
 
   ResourceMark rm;
   jlong nmethodValue = HotSpotInstalledCode::codeBlob(kernel_handle);
@@ -115,7 +116,7 @@
   return _okra_execute_with_range(kernel, dimX);
 GPU_END
 
-GPU_ENTRY(jlong, gpu::Hsail::generate_kernel, (JNIEnv *env, jclass, jbyteArray code_handle, jstring name_handle))
+GPU_ENTRY(jlong, Hsail::generate_kernel, (JNIEnv *env, jclass, jbyteArray code_handle, jstring name_handle))
   guarantee(_okra_create_kernel != NULL, "[HSAIL] Okra not linked");
   ResourceMark rm;
   jsize name_len = env->GetStringLength(name_handle);
@@ -158,7 +159,7 @@
         return false; \
   } \
 
-GPU_ENTRY(jboolean, gpu::Hsail::initialize, (JNIEnv *env, jclass))
+GPU_ENTRY(jboolean, Hsail::initialize, (JNIEnv *env, jclass))
   if (okra_library_name == NULL) {
     if (TraceGPUInteraction) {
       tty->print_cr("Unsupported HSAIL platform");
@@ -211,7 +212,7 @@
   return true;
 GPU_END
 
-bool gpu::Hsail::register_natives(JNIEnv* env) {
+bool Hsail::register_natives(JNIEnv* env) {
   jclass klass = env->FindClass("com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend");
   if (klass == NULL) {
     if (TraceGPUInteraction) {
--- a/src/gpu/hsail/vm/hsailKernelArguments.cpp	Thu Feb 06 14:50:36 2014 +0100
+++ b/src/gpu/hsail/vm/hsailKernelArguments.cpp	Thu Feb 06 14:50:45 2014 +0100
@@ -46,7 +46,7 @@
   jvalue jValue;
   java_lang_boxing_object::get_value(arg, &jValue);
   
-  bool pushed = gpu::Hsail::_okra_push_boolean(_kernel, jValue.z);
+  bool pushed = Hsail::_okra_push_boolean(_kernel, jValue.z);
   assert(pushed == true, "arg push failed");
 }
 
@@ -58,7 +58,7 @@
   jvalue jValue;
   java_lang_boxing_object::get_value(arg, &jValue);
   
-  bool pushed = gpu::Hsail::_okra_push_byte(_kernel, jValue.b);
+  bool pushed = Hsail::_okra_push_byte(_kernel, jValue.b);
   assert(pushed == true, "arg push failed");
 }
 
@@ -72,7 +72,7 @@
   if (TraceGPUInteraction) {
     tty->print_cr("[HSAIL] HSAILKernelArguments::double value = %e", jValue.d);
   }  
-  bool pushed = gpu::Hsail::_okra_push_double(_kernel, jValue.d);
+  bool pushed = Hsail::_okra_push_double(_kernel, jValue.d);
   assert(pushed == true, "arg push failed");
 }
 
@@ -86,7 +86,7 @@
   if (TraceGPUInteraction) {
     tty->print_cr("[HSAIL] HSAILKernelArguments::float value = %f", jValue.f);
   }    
-  bool pushed = gpu::Hsail::_okra_push_float(_kernel, jValue.f);
+  bool pushed = Hsail::_okra_push_float(_kernel, jValue.f);
   assert(pushed == true, "float push failed");
 }
 
@@ -107,7 +107,7 @@
   jvalue jValue;
   java_lang_boxing_object::get_value(arg, &jValue);
   
-  bool pushed = gpu::Hsail::_okra_push_int(_kernel, jValue.i);
+  bool pushed = Hsail::_okra_push_int(_kernel, jValue.i);
   assert(pushed == true, "arg push failed");
 }
 
@@ -119,7 +119,7 @@
   jvalue jValue;
   java_lang_boxing_object::get_value(arg, &jValue);
   
-  bool pushed = gpu::Hsail::_okra_push_long(_kernel, jValue.j);
+  bool pushed = Hsail::_okra_push_long(_kernel, jValue.j);
   assert(pushed == true, "arg push failed");  
 }
 
@@ -130,7 +130,7 @@
     tty->print_cr("[HSAIL] HSAILKernelArguments::do_array 0x%08x, is a %s", (address) arg, arg->klass()->external_name());
   }
     
-  bool pushed = gpu::Hsail::_okra_push_object(_kernel, arg);
+  bool pushed = Hsail::_okra_push_object(_kernel, arg);
   assert(pushed == true, "arg push failed");  
 }
 
@@ -153,7 +153,7 @@
     tty->print_cr("[HSAIL] HSAILKernelArguments::do_object, 0x%08x is a %s", (address) arg, arg->klass()->external_name());
   }
     
-  bool pushed = gpu::Hsail::_okra_push_object(_kernel, arg);
+  bool pushed = Hsail::_okra_push_object(_kernel, arg);
   assert(pushed == true, "arg push failed");  
 }
 
--- a/src/gpu/hsail/vm/hsailKernelArguments.hpp	Thu Feb 06 14:50:36 2014 +0100
+++ b/src/gpu/hsail/vm/hsailKernelArguments.hpp	Thu Feb 06 14:50:45 2014 +0100
@@ -26,10 +26,11 @@
 #define KERNEL_ARGUMENTS_HSAIL_HPP
 
 #include "runtime/gpu.hpp"
+#include "hsail/vm/gpu_hsail.hpp"
 #include "runtime/signature.hpp"
 
 class HSAILKernelArguments : public SignatureIterator {
-  friend class gpu::Hsail;
+  friend class Hsail;
 
 public:
 
@@ -71,7 +72,7 @@
       if (TraceGPUInteraction) {
         tty->print_cr("[HSAIL] instance method, this 0x%08x, is a %s", (address) arg, arg->klass()->external_name());
       }
-      bool pushed = gpu::Hsail::_okra_push_object(kernel, arg);
+      bool pushed = Hsail::_okra_push_object(kernel, arg);
       assert(pushed == true, "'this' push failed");
     } else {
       if (TraceGPUInteraction) {
--- a/src/gpu/ptx/vm/gpu_ptx.cpp	Thu Feb 06 14:50:36 2014 +0100
+++ b/src/gpu/ptx/vm/gpu_ptx.cpp	Thu Feb 06 14:50:45 2014 +0100
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "runtime/javaCalls.hpp"
 #include "runtime/gpu.hpp"
+#include "ptx/vm/gpu_ptx.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/ostream.hpp"
 #include "memory/allocation.hpp"
@@ -47,12 +48,14 @@
 // Entry to GPU native method implementation that transitions current thread to '_thread_in_vm'.
 #define GPU_VMENTRY(result_type, name, signature) \
   JNIEXPORT result_type JNICALL name signature { \
+  if (TraceGPUInteraction) tty->print_cr("[CUDA] Ptx::" #name); \
   GRAAL_VM_ENTRY_MARK; \
 
 // Entry to GPU native method implementation that calls a JNI function
 // and hence cannot transition current thread to '_thread_in_vm'.
 #define GPU_ENTRY(result_type, name, signature) \
   JNIEXPORT result_type JNICALL name signature { \
+  if (TraceGPUInteraction) tty->print_cr("[CUDA] Ptx::" #name); \
 
 #define GPU_END }
 
@@ -61,37 +64,37 @@
 
 #define STRING                "Ljava/lang/String;"
 
-JNINativeMethod gpu::Ptx::PTX_methods[] = {
-  {CC"initialize",              CC"()Z",               FN_PTR(gpu::Ptx::initialize)},
-  {CC"generateKernel",          CC"([B" STRING ")J",   FN_PTR(gpu::Ptx::generate_kernel)},
-  {CC"getLaunchKernelAddress",  CC"()J",               FN_PTR(gpu::Ptx::get_execute_kernel_from_vm_address)},
-  {CC"getAvailableProcessors0", CC"()I",               FN_PTR(gpu::Ptx::get_total_cores)},
+JNINativeMethod Ptx::PTX_methods[] = {
+  {CC"initialize",              CC"()Z",               FN_PTR(Ptx::initialize)},
+  {CC"generateKernel",          CC"([B" STRING ")J",   FN_PTR(Ptx::generate_kernel)},
+  {CC"getLaunchKernelAddress",  CC"()J",               FN_PTR(Ptx::get_execute_kernel_from_vm_address)},
+  {CC"getAvailableProcessors0", CC"()I",               FN_PTR(Ptx::get_total_cores)},
 };
 
-void * gpu::Ptx::_device_context;
-int    gpu::Ptx::_cu_device = 0;
+void * Ptx::_device_context;
+int    Ptx::_cu_device = 0;
 
-gpu::Ptx::cuda_cu_init_func_t gpu::Ptx::_cuda_cu_init;
-gpu::Ptx::cuda_cu_ctx_create_func_t gpu::Ptx::_cuda_cu_ctx_create;
-gpu::Ptx::cuda_cu_ctx_destroy_func_t gpu::Ptx::_cuda_cu_ctx_destroy;
-gpu::Ptx::cuda_cu_ctx_synchronize_func_t gpu::Ptx::_cuda_cu_ctx_synchronize;
-gpu::Ptx::cuda_cu_ctx_get_current_func_t gpu::Ptx::_cuda_cu_ctx_get_current;
-gpu::Ptx::cuda_cu_ctx_set_current_func_t gpu::Ptx::_cuda_cu_ctx_set_current;
-gpu::Ptx::cuda_cu_device_get_count_func_t gpu::Ptx::_cuda_cu_device_get_count;
-gpu::Ptx::cuda_cu_device_get_name_func_t gpu::Ptx::_cuda_cu_device_get_name;
-gpu::Ptx::cuda_cu_device_get_func_t gpu::Ptx::_cuda_cu_device_get;
-gpu::Ptx::cuda_cu_device_compute_capability_func_t gpu::Ptx::_cuda_cu_device_compute_capability;
-gpu::Ptx::cuda_cu_device_get_attribute_func_t gpu::Ptx::_cuda_cu_device_get_attribute;
-gpu::Ptx::cuda_cu_launch_kernel_func_t gpu::Ptx::_cuda_cu_launch_kernel;
-gpu::Ptx::cuda_cu_module_get_function_func_t gpu::Ptx::_cuda_cu_module_get_function;
-gpu::Ptx::cuda_cu_module_load_data_ex_func_t gpu::Ptx::_cuda_cu_module_load_data_ex;
-gpu::Ptx::cuda_cu_memcpy_htod_func_t gpu::Ptx::_cuda_cu_memcpy_htod;
-gpu::Ptx::cuda_cu_memcpy_dtoh_func_t gpu::Ptx::_cuda_cu_memcpy_dtoh;
-gpu::Ptx::cuda_cu_memalloc_func_t gpu::Ptx::_cuda_cu_memalloc;
-gpu::Ptx::cuda_cu_memfree_func_t gpu::Ptx::_cuda_cu_memfree;
-gpu::Ptx::cuda_cu_mem_host_register_func_t gpu::Ptx::_cuda_cu_mem_host_register;
-gpu::Ptx::cuda_cu_mem_host_get_device_pointer_func_t gpu::Ptx::_cuda_cu_mem_host_get_device_pointer;
-gpu::Ptx::cuda_cu_mem_host_unregister_func_t gpu::Ptx::_cuda_cu_mem_host_unregister;
+Ptx::cuda_cu_init_func_t Ptx::_cuda_cu_init;
+Ptx::cuda_cu_ctx_create_func_t Ptx::_cuda_cu_ctx_create;
+Ptx::cuda_cu_ctx_destroy_func_t Ptx::_cuda_cu_ctx_destroy;
+Ptx::cuda_cu_ctx_synchronize_func_t Ptx::_cuda_cu_ctx_synchronize;
+Ptx::cuda_cu_ctx_get_current_func_t Ptx::_cuda_cu_ctx_get_current;
+Ptx::cuda_cu_ctx_set_current_func_t Ptx::_cuda_cu_ctx_set_current;
+Ptx::cuda_cu_device_get_count_func_t Ptx::_cuda_cu_device_get_count;
+Ptx::cuda_cu_device_get_name_func_t Ptx::_cuda_cu_device_get_name;
+Ptx::cuda_cu_device_get_func_t Ptx::_cuda_cu_device_get;
+Ptx::cuda_cu_device_compute_capability_func_t Ptx::_cuda_cu_device_compute_capability;
+Ptx::cuda_cu_device_get_attribute_func_t Ptx::_cuda_cu_device_get_attribute;
+Ptx::cuda_cu_launch_kernel_func_t Ptx::_cuda_cu_launch_kernel;
+Ptx::cuda_cu_module_get_function_func_t Ptx::_cuda_cu_module_get_function;
+Ptx::cuda_cu_module_load_data_ex_func_t Ptx::_cuda_cu_module_load_data_ex;
+Ptx::cuda_cu_memcpy_htod_func_t Ptx::_cuda_cu_memcpy_htod;
+Ptx::cuda_cu_memcpy_dtoh_func_t Ptx::_cuda_cu_memcpy_dtoh;
+Ptx::cuda_cu_memalloc_func_t Ptx::_cuda_cu_memalloc;
+Ptx::cuda_cu_memfree_func_t Ptx::_cuda_cu_memfree;
+Ptx::cuda_cu_mem_host_register_func_t Ptx::_cuda_cu_mem_host_register;
+Ptx::cuda_cu_mem_host_get_device_pointer_func_t Ptx::_cuda_cu_mem_host_get_device_pointer;
+Ptx::cuda_cu_mem_host_unregister_func_t Ptx::_cuda_cu_mem_host_unregister;
 
 #define STRINGIFY(x)     #x
 
@@ -108,7 +111,7 @@
 /*
  * see http://en.wikipedia.org/wiki/CUDA#Supported_GPUs
  */
-int gpu::Ptx::ncores(int major, int minor) {
+int Ptx::ncores(int major, int minor) {
     int device_type = (major << 4) + minor;
 
     switch (device_type) {
@@ -126,7 +129,7 @@
     }
 }
 
-bool gpu::Ptx::register_natives(JNIEnv* env) {
+bool Ptx::register_natives(JNIEnv* env) {
   jclass klass = env->FindClass("com/oracle/graal/hotspot/ptx/PTXHotSpotBackend");
   if (klass == NULL) {
     if (TraceGPUInteraction) {
@@ -136,7 +139,7 @@
   }
   jint status = env->RegisterNatives(klass, PTX_methods, sizeof(PTX_methods) / sizeof(JNINativeMethod));
   if (status != JNI_OK) {
-    if (TraceGPUInteraction) {
+    if (true || TraceGPUInteraction) {
       tty->print_cr("Error registering natives for PTXHotSpotBackend: %d", status);
     }
     return false;
@@ -144,7 +147,7 @@
   return true;
 }
 
-GPU_ENTRY(jboolean, gpu::Ptx::initialize, (JNIEnv *env, jclass))
+GPU_ENTRY(jboolean, Ptx::initialize, (JNIEnv *env, jclass))
 
   if (!link()) {
     return false;
@@ -255,7 +258,7 @@
   return true;
 GPU_END
 
-GPU_ENTRY(jint, gpu::Ptx::get_total_cores, (JNIEnv *env, jobject))
+GPU_ENTRY(jint, Ptx::get_total_cores, (JNIEnv *env, jobject))
 
     int minor, major, nmp;
     int status = _cuda_cu_device_get_attribute(&minor,
@@ -342,7 +345,7 @@
     return total;
 GPU_END
 
-GPU_ENTRY(jlong, gpu::Ptx::generate_kernel, (JNIEnv *env, jclass, jbyteArray code_handle, jstring name_handle))
+GPU_ENTRY(jlong, Ptx::generate_kernel, (JNIEnv *env, jclass, jbyteArray code_handle, jstring name_handle))
   ResourceMark rm;
   jsize name_len = env->GetStringLength(name_handle);
   jsize code_len = env->GetArrayLength(code_handle);
@@ -440,7 +443,7 @@
   int          _buffer_size;   // size (in bytes) of _buffer
   oop*         _pinned;        // objects that have been pinned with cuMemHostRegister
   int          _pinned_length; // length of _pinned
-  gpu::Ptx::CUdeviceptr  _ret_value;     // pointer to slot in GPU memory holding the return value
+  Ptx::CUdeviceptr  _ret_value;     // pointer to slot in GPU memory holding the return value
   int          _ret_type_size; // size of the return type value
   bool         _ret_is_object; // specifies if the return type is Object
   bool         _gc_locked;     // denotes when execution has locked GC
@@ -474,8 +477,8 @@
 
   void alloc_return_value() {
     if (_ret_type_size != 0) {
-      if (check(gpu::Ptx::_cuda_cu_memalloc(&_ret_value, _ret_type_size), "Allocate device memory for return value")) {
-        gpu::Ptx::CUdeviceptr* retValuePtr = (gpu::Ptx::CUdeviceptr*) ((_buffer + _buffer_size) - sizeof(_ret_value));
+      if (check(Ptx::_cuda_cu_memalloc(&_ret_value, _ret_type_size), "Allocate device memory for return value")) {
+        Ptx::CUdeviceptr* retValuePtr = (Ptx::CUdeviceptr*) ((_buffer + _buffer_size) - sizeof(_ret_value));
         *retValuePtr = _ret_value;
       }
     }
@@ -503,7 +506,7 @@
         // Size (in bytes) of object
         int objSize = obj->size() * HeapWordSize;
         //tty->print_cr("Pinning object %d at offset %d: %p", i, offset, obj);
-        if (!check(gpu::Ptx::_cuda_cu_mem_host_register(obj, objSize, GRAAL_CU_MEMHOSTREGISTER_DEVICEMAP), "Pin object")) {
+        if (!check(Ptx::_cuda_cu_mem_host_register(obj, objSize, GRAAL_CU_MEMHOSTREGISTER_DEVICEMAP), "Pin object")) {
           return;
         }
 
@@ -512,7 +515,7 @@
 
         // Replace host pointer to object with device pointer
         // to object in kernel parameters buffer
-        if (!check(gpu::Ptx::_cuda_cu_mem_host_get_device_pointer((gpu::Ptx::CUdeviceptr*) argPtr, obj, 0), "Get device pointer for pinned object")) {
+        if (!check(Ptx::_cuda_cu_mem_host_get_device_pointer((Ptx::CUdeviceptr*) argPtr, obj, 0), "Get device pointer for pinned object")) {
           return;
         }
       }
@@ -529,7 +532,7 @@
       GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE, &_buffer_size,
       GRAAL_CU_LAUNCH_PARAM_END
     };
-    if (check(gpu::Ptx::_cuda_cu_launch_kernel((struct CUfunc_st*) (address) kernel,
+    if (check(Ptx::_cuda_cu_launch_kernel((struct CUfunc_st*) (address) kernel,
                                       gridX, gridY, gridZ,
                                       dimX, dimY, dimZ,
                                       0, NULL, NULL, (void**) &config), "Launch kernel")) {
@@ -537,7 +540,7 @@
   }
 
   void synchronize() {
-    check(gpu::Ptx::_cuda_cu_ctx_synchronize(), "Synchronize kernel");
+    check(Ptx::_cuda_cu_ctx_synchronize(), "Synchronize kernel");
   }
 
   void unpin_objects() {
@@ -545,7 +548,7 @@
       oop obj = _pinned[--_pinned_length];
       assert(obj != NULL, "npe");
       //tty->print_cr("Unpinning object %d: %p", _pinned_length, obj);
-      if (!check(gpu::Ptx::_cuda_cu_mem_host_unregister(obj), "Unpin object")) {
+      if (!check(Ptx::_cuda_cu_mem_host_unregister(obj), "Unpin object")) {
         return;
       }
     }
@@ -553,27 +556,27 @@
 
   oop get_object_return_value() {
     oop return_val;
-    check(gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, _ret_value, T_OBJECT_BYTE_SIZE), "Copy return value from device");
+    check(Ptx::_cuda_cu_memcpy_dtoh(&return_val, _ret_value, T_OBJECT_BYTE_SIZE), "Copy return value from device");
     return return_val;
   }
 
   jlong get_primitive_return_value() {
     jlong return_val;
-    check(gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, _ret_value, _ret_type_size), "Copy return value from device");
+    check(Ptx::_cuda_cu_memcpy_dtoh(&return_val, _ret_value, _ret_type_size), "Copy return value from device");
     return return_val;
   }
 
   void free_return_value() {
     if (_ret_value != 0) {
-      check(gpu::Ptx::_cuda_cu_memfree(_ret_value), "Free device memory");
+      check(Ptx::_cuda_cu_memfree(_ret_value), "Free device memory");
       _ret_value = 0;
     }
   }
 
   void destroy_context() {
-    if (gpu::Ptx::_device_context != NULL) {
-      check(gpu::Ptx::_cuda_cu_ctx_destroy(gpu::Ptx::_device_context), "Destroy context");
-      gpu::Ptx::_device_context = NULL;
+    if (Ptx::_device_context != NULL) {
+      check(Ptx::_cuda_cu_ctx_destroy(Ptx::_device_context), "Destroy context");
+      Ptx::_device_context = NULL;
     }
   }
 
@@ -666,11 +669,11 @@
   }
 }
 
-GPU_VMENTRY(jlong, gpu::Ptx::get_execute_kernel_from_vm_address, (JNIEnv *env, jclass))
-  return (jlong) gpu::Ptx::execute_kernel_from_vm;
+GPU_VMENTRY(jlong, Ptx::get_execute_kernel_from_vm_address, (JNIEnv *env, jclass))
+  return (jlong) Ptx::execute_kernel_from_vm;
 GPU_END
 
-JRT_ENTRY(jlong, gpu::Ptx::execute_kernel_from_vm(JavaThread* thread, jlong kernel, jint dimX, jint dimY, jint dimZ,
+JRT_ENTRY(jlong, Ptx::execute_kernel_from_vm(JavaThread* thread, jlong kernel, jint dimX, jint dimY, jint dimZ,
                                                   jlong buffer,
                                                   jint bufferSize,
                                                   jint objectParametersCount,
@@ -724,7 +727,7 @@
 static char const cuda_library_name[] = "";
 #endif
 
-bool gpu::Ptx::link() {
+bool Ptx::link() {
   if (cuda_library_name == NULL) {
     if (TraceGPUInteraction) {
       tty->print_cr("Failed to find CUDA linkage");
--- a/src/gpu/ptx/vm/gpu_ptx.hpp	Thu Feb 06 14:50:36 2014 +0100
+++ b/src/gpu/ptx/vm/gpu_ptx.hpp	Thu Feb 06 14:50:45 2014 +0100
@@ -155,12 +155,12 @@
                                               unsigned int, void*, void**, void**);
   typedef int (*cuda_cu_module_get_function_func_t)(void*, void*, const char*);
   typedef int (*cuda_cu_module_load_data_ex_func_t)(void*, void*, unsigned int, void*, void**);
-  typedef int (*cuda_cu_memalloc_func_t)(gpu::Ptx::CUdeviceptr*, size_t);
-  typedef int (*cuda_cu_memfree_func_t)(gpu::Ptx::CUdeviceptr);
-  typedef int (*cuda_cu_memcpy_htod_func_t)(gpu::Ptx::CUdeviceptr, const void*, unsigned int);
-  typedef int (*cuda_cu_memcpy_dtoh_func_t)(const void*, gpu::Ptx::CUdeviceptr,  unsigned int);
+  typedef int (*cuda_cu_memalloc_func_t)(Ptx::CUdeviceptr*, size_t);
+  typedef int (*cuda_cu_memfree_func_t)(Ptx::CUdeviceptr);
+  typedef int (*cuda_cu_memcpy_htod_func_t)(Ptx::CUdeviceptr, const void*, unsigned int);
+  typedef int (*cuda_cu_memcpy_dtoh_func_t)(const void*, Ptx::CUdeviceptr,  unsigned int);
   typedef int (*cuda_cu_mem_host_register_func_t)(void*, size_t, unsigned int);
-  typedef int (*cuda_cu_mem_host_get_device_pointer_func_t)(gpu::Ptx::CUdeviceptr*, void*, unsigned int);
+  typedef int (*cuda_cu_mem_host_get_device_pointer_func_t)(Ptx::CUdeviceptr*, void*, unsigned int);
   typedef int (*cuda_cu_mem_host_unregister_func_t)(void*);
 
 public:
--- a/src/os/bsd/vm/gpu_bsd.cpp	Thu Feb 06 14:50:36 2014 +0100
+++ b/src/os/bsd/vm/gpu_bsd.cpp	Thu Feb 06 14:50:45 2014 +0100
@@ -23,6 +23,8 @@
  */
 
 #include "runtime/gpu.hpp"
+#include "ptx/vm/gpu_ptx.hpp"
+#include "hsail/vm/gpu_hsail.hpp"
 #include "utilities/ostream.hpp"
 
 jobject gpu::probe_gpus(JNIEnv* env) {
@@ -31,7 +33,7 @@
    * Let the CUDA driver initialization be the gate to GPU for now, pending
    * a better detection solution for NVIDA PTX and AMD HSAIL.
    */
-  if (gpu::Ptx::register_natives(env)) {
+  if (Ptx::register_natives(env)) {
     if (TraceGPUInteraction) {
       tty->print_cr("Assuming NVidia/PTX support (APPLE)");
     }
--- a/src/os/linux/vm/gpu_linux.cpp	Thu Feb 06 14:50:36 2014 +0100
+++ b/src/os/linux/vm/gpu_linux.cpp	Thu Feb 06 14:50:45 2014 +0100
@@ -23,6 +23,8 @@
  */
 
 #include "runtime/gpu.hpp"
+#include "ptx/vm/gpu_ptx.hpp"
+#include "hsail/vm/gpu_hsail.hpp"
 #include "utilities/ostream.hpp"
 
 /*
@@ -40,7 +42,7 @@
   bool hsail = false;
   bool ptx = false;
 
-  if (UseHSAILSimulator && gpu::Hsail::register_natives(env)) {
+  if (Hsail::register_natives(env)) {
     hsail = true;
   }
 
@@ -71,7 +73,7 @@
         if (TraceGPUInteraction) {
           tty->print_cr("Found supported nVidia device [vendor=0x%04x, device=0x%04x]", vendor, device);
         }
-        if (!ptx && gpu::Ptx::register_natives(env)) {
+        if (!ptx && Ptx::register_natives(env)) {
           ptx = true;
         }
       }
--- a/src/os/windows/vm/gpu_windows.cpp	Thu Feb 06 14:50:36 2014 +0100
+++ b/src/os/windows/vm/gpu_windows.cpp	Thu Feb 06 14:50:45 2014 +0100
@@ -24,11 +24,12 @@
 
 #include "precompiled.hpp"
 #include "runtime/gpu.hpp"
+#include "hsail/vm/gpu_hsail.hpp"
 #include "utilities/ostream.hpp"
 
 jobject gpu::probe_gpus(JNIEnv* env) {
   // TODO: add detection of PTX/NVidia
-  if (UseHSAILSimulator && gpu::Hsail::register_natives(env)) {
+  if (Hsail::register_natives(env)) {
     return env->NewStringUTF("HSAIL");
   }
   return env->NewStringUTF("");
--- a/src/share/vm/runtime/globals.hpp	Thu Feb 06 14:50:36 2014 +0100
+++ b/src/share/vm/runtime/globals.hpp	Thu Feb 06 14:50:45 2014 +0100
@@ -3840,9 +3840,6 @@
   product(bool , AllowNonVirtualCalls, false,                               \
           "Obey the ACC_SUPER flag and allow invokenonvirtual calls")       \
                                                                             \
-  product(bool, UseHSAILSimulator, false,                                   \
-          "Run code on HSAIL Simulator")                                    \
-                                                                            \
   diagnostic(ccstr, SharedArchiveFile, NULL,                                \
           "Override the default location of the CDS archive file")          \
                                                                             \
--- a/src/share/vm/runtime/gpu.hpp	Thu Feb 06 14:50:36 2014 +0100
+++ b/src/share/vm/runtime/gpu.hpp	Thu Feb 06 14:50:45 2014 +0100
@@ -32,21 +32,18 @@
 // Defines the interface to the graphics processor(s).
 class gpu : AllStatic {
  private:
-  static int _initialized_gpus;
-
-  // Notifies that a GPU device has been initialized.
-  static void initialized_gpu(const char* name);
+  static int _initialized_gpus;  // number of initialize GPU devices
 
  public:
 
+  // Notification of a GPU device that has been initialized.
+  static void initialized_gpu(const char* name);
+
   // Gets a comma separated list of supported GPU architecture names.
   static jobject probe_gpus(JNIEnv* env);
   
+  // Gets the number of GPU devices that have been initialized.
   static int initialized_gpus() { return _initialized_gpus; }
-
-# include "ptx/vm/gpu_ptx.hpp"
-# include "hsail/vm/gpu_hsail.hpp"
-
 };
 
 #endif // SHARE_VM_RUNTIME_GPU_HPP