changeset 12536:16e507054ebb

Merge
author Matthias Grimmer <grimmer@ssw.jku.at>
date Wed, 23 Oct 2013 13:41:10 +0200
parents cee7f686c470 (current diff) 2583afcd26ee (diff)
children c95e11c431b0
files graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotCodeCacheProvider.java
diffstat 19 files changed, 493 insertions(+), 403 deletions(-) [+]
line wrap: on
line diff
--- a/graal/com.oracle.graal.api.code/src/com/oracle/graal/api/code/CompilationResult.java	Wed Oct 23 13:40:56 2013 +0200
+++ b/graal/com.oracle.graal.api.code/src/com/oracle/graal/api/code/CompilationResult.java	Wed Oct 23 13:41:10 2013 +0200
@@ -326,6 +326,8 @@
     private int customStackAreaOffset = -1;
     private int registerRestoreEpilogueOffset = -1;
 
+    private final String name;
+
     /**
      * The buffer containing the emitted machine code.
      */
@@ -347,6 +349,14 @@
      */
     private long[] leafGraphIds;
 
+    public CompilationResult() {
+        this(null);
+    }
+
+    public CompilationResult(String name) {
+        this.name = name;
+    }
+
     public void setAssumptions(Assumptions assumptions) {
         this.assumptions = assumptions;
     }
@@ -620,4 +630,8 @@
         }
         return unmodifiableList(marks);
     }
+
+    public String getName() {
+        return name;
+    }
 }
--- a/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java	Wed Oct 23 13:40:56 2013 +0200
+++ b/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java	Wed Oct 23 13:41:10 2013 +0200
@@ -360,18 +360,18 @@
             assert var instanceof Variable;
             assert val instanceof Constant;
             Constant constant = (Constant) val;
-            return ("[" + emitRegister((Variable) var, false) + " + " + constant.asBoxedValue() + "]");
+            return ("[" + ((space == PTXStateSpace.Parameter) ? emitParameter((Variable) var) : emitRegister((Variable) var, false)) + " + " + constant.asBoxedValue() + "]");
         }
 
         @Override
         public String emitRegister(Variable var, boolean comma) {
-            /*
-             * if (space == Parameter) { return ("param" + var.index); } else { return ("%r" +
-             * var.index); }
-             */
             return ("%r" + var.index);
         }
 
+        public String emitParameter(Variable v) {
+            return ("param" + v.index);
+        }
+
         public String emit(boolean isLoad) {
             if (isLoad) {
                 return (space.getStateName() + "." + typeForKind(valueKind) + " " + emitRegister(dest, false) + ", " + emitAddress(source1, source2) + ";");
@@ -671,7 +671,7 @@
         }
 
         public String emitParameter(Variable v) {
-            return (" %r" + v.index);
+            return (" param" + v.index);
         }
 
         public void emit(PTXAssembler asm) {
--- a/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/GraalDebugConfig.java	Wed Oct 23 13:40:56 2013 +0200
+++ b/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/GraalDebugConfig.java	Wed Oct 23 13:41:10 2013 +0200
@@ -69,7 +69,11 @@
     // @formatter:on
 
     public static boolean areDebugScopePatternsEnabled() {
-        return DumpOnError.getValue() || Dump.getValue() != null || Meter.getValue() != null || Time.getValue() != null || Log.getValue() != null;
+        return DumpOnError.getValue() || Dump.getValue() != null || Log.getValue() != null || areMetricsOrTimersEnabled();
+    }
+
+    public static boolean areMetricsOrTimersEnabled() {
+        return Meter.getValue() != null || Time.getValue() != null;
     }
 
     private final DebugFilter logFilter;
--- a/graal/com.oracle.graal.graph/src/com/oracle/graal/graph/NodeClass.java	Wed Oct 23 13:40:56 2013 +0200
+++ b/graal/com.oracle.graal.graph/src/com/oracle/graal/graph/NodeClass.java	Wed Oct 23 13:41:10 2013 +0200
@@ -1194,7 +1194,7 @@
                     }
 
                     public Position next() {
-                        Position pos = new Position(true, i, i >= directInputCount ? 0 : NOT_ITERABLE);
+                        Position pos = new Position(true, i, i >= directInputCount ? NODE_LIST : NOT_ITERABLE);
                         i++;
                         return pos;
                     }
@@ -1224,7 +1224,7 @@
                     }
 
                     public Position next() {
-                        Position pos = new Position(false, i, i >= directSuccessorCount ? 0 : NOT_ITERABLE);
+                        Position pos = new Position(false, i, i >= directSuccessorCount ? NODE_LIST : NOT_ITERABLE);
                         i++;
                         return pos;
                     }
--- a/graal/com.oracle.graal.hotspot.test/src/com/oracle/graal/hotspot/test/HotSpotCryptoSubstitutionTest.java	Wed Oct 23 13:40:56 2013 +0200
+++ b/graal/com.oracle.graal.hotspot.test/src/com/oracle/graal/hotspot/test/HotSpotCryptoSubstitutionTest.java	Wed Oct 23 13:41:10 2013 +0200
@@ -48,7 +48,7 @@
     @Override
     protected InstalledCode addMethod(ResolvedJavaMethod method, CompilationResult compResult) {
         HotSpotResolvedJavaMethod hsMethod = (HotSpotResolvedJavaMethod) method;
-        HotSpotNmethod installedCode = new HotSpotNmethod(hsMethod, true);
+        HotSpotNmethod installedCode = new HotSpotNmethod(hsMethod, compResult.getName(), true);
         HotSpotCompiledNmethod compiledNmethod = new HotSpotCompiledNmethod(hsMethod, StructuredGraph.INVOCATION_ENTRY_BCI, compResult);
         CodeInstallResult result = runtime().getCompilerToVM().installCode(compiledNmethod, installedCode, null);
         Assert.assertEquals("Error installing method " + method + ": " + result, result, CodeInstallResult.OK);
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/VMToCompilerImpl.java	Wed Oct 23 13:40:56 2013 +0200
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/VMToCompilerImpl.java	Wed Oct 23 13:41:10 2013 +0200
@@ -401,7 +401,7 @@
             CompilationTask.withinEnqueue.set(Boolean.FALSE);
         }
 
-        if (Debug.isEnabled() && areDebugScopePatternsEnabled()) {
+        if (Debug.isEnabled() && areMetricsOrTimersEnabled()) {
             List<DebugValueMap> topLevelMaps = DebugValueMap.getTopLevelMaps();
             List<DebugValue> debugValues = KeyRegistry.getDebugValues();
             if (debugValues.size() > 0) {
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotCodeCacheProvider.java	Wed Oct 23 13:40:56 2013 +0200
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotCodeCacheProvider.java	Wed Oct 23 13:41:10 2013 +0200
@@ -158,7 +158,7 @@
     }
 
     public HotSpotInstalledCode installMethod(HotSpotResolvedJavaMethod method, int entryBCI, CompilationResult compResult) {
-        HotSpotInstalledCode installedCode = new HotSpotNmethod(method, true);
+        HotSpotInstalledCode installedCode = new HotSpotNmethod(method, compResult.getName(), true);
         runtime.getCompilerToVM().installCode(new HotSpotCompiledNmethod(method, entryBCI, compResult), installedCode, method.getSpeculationLog());
         return installedCode;
     }
@@ -166,7 +166,7 @@
     @Override
     public InstalledCode addMethod(ResolvedJavaMethod method, CompilationResult compResult) {
         HotSpotResolvedJavaMethod hotspotMethod = (HotSpotResolvedJavaMethod) method;
-        HotSpotInstalledCode code = new HotSpotNmethod(hotspotMethod, false);
+        HotSpotInstalledCode code = new HotSpotNmethod(hotspotMethod, compResult.getName(), false);
         CodeInstallResult result = runtime.getCompilerToVM().installCode(new HotSpotCompiledNmethod(hotspotMethod, -1, compResult), code, null);
         if (result != CodeInstallResult.OK) {
             return null;
@@ -182,7 +182,7 @@
     public InstalledCode addExternalMethod(ResolvedJavaMethod method, CompilationResult compResult) {
 
         HotSpotResolvedJavaMethod javaMethod = (HotSpotResolvedJavaMethod) method;
-        HotSpotInstalledCode icode = new HotSpotNmethod(javaMethod, false, true);
+        HotSpotInstalledCode icode = new HotSpotNmethod(javaMethod, compResult.getName(), false, true);
         HotSpotCompiledNmethod compiled = new HotSpotCompiledNmethod(javaMethod, -1, compResult);
         CompilerToVM vm = runtime.getCompilerToVM();
         CodeInstallResult result = vm.installCode(compiled, icode, null);
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotNmethod.java	Wed Oct 23 13:40:56 2013 +0200
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotNmethod.java	Wed Oct 23 13:41:10 2013 +0200
@@ -46,17 +46,17 @@
     private final HotSpotResolvedJavaMethod method;
     private final boolean isDefault;
     private final boolean isExternal;
+    private final String name;
 
-    public HotSpotNmethod(HotSpotResolvedJavaMethod method, boolean isDefault) {
-        this.method = method;
-        this.isDefault = isDefault;
-        this.isExternal = false;
+    public HotSpotNmethod(HotSpotResolvedJavaMethod method, String name, boolean isDefault) {
+        this(method, name, isDefault, false);
     }
 
-    public HotSpotNmethod(HotSpotResolvedJavaMethod method, boolean isDefault, boolean isExternal) {
+    public HotSpotNmethod(HotSpotResolvedJavaMethod method, String name, boolean isDefault, boolean isExternal) {
         this.method = method;
         this.isDefault = isDefault;
         this.isExternal = isExternal;
+        this.name = name;
     }
 
     public boolean isDefault() {
@@ -84,7 +84,7 @@
 
     @Override
     public String toString() {
-        return String.format("InstalledNmethod[method=%s, codeBlob=0x%x, isDefault=%b]", method, getCodeBlob(), isDefault);
+        return String.format("InstalledNmethod[method=%s, codeBlob=0x%x, isDefault=%b, name=]", method, getCodeBlob(), isDefault, name);
     }
 
     @Override
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/GuardNode.java	Wed Oct 23 13:40:56 2013 +0200
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/GuardNode.java	Wed Oct 23 13:41:10 2013 +0200
@@ -45,7 +45,7 @@
 
     @Input private LogicNode condition;
     private final DeoptimizationReason reason;
-    private final DeoptimizationAction action;
+    private DeoptimizationAction action;
     private boolean negated;
 
     public GuardNode(LogicNode condition, GuardingNode anchor, DeoptimizationReason reason, DeoptimizationAction action, boolean negated) {
@@ -107,4 +107,8 @@
     public void negate() {
         negated = !negated;
     }
+
+    public void setAction(DeoptimizationAction invalidaterecompile) {
+        this.action = invalidaterecompile;
+    }
 }
--- a/graal/com.oracle.graal.printer/src/com/oracle/graal/printer/BinaryGraphPrinter.java	Wed Oct 23 13:40:56 2013 +0200
+++ b/graal/com.oracle.graal.printer/src/com/oracle/graal/printer/BinaryGraphPrinter.java	Wed Oct 23 13:41:10 2013 +0200
@@ -407,6 +407,7 @@
             writeInt(node.getId());
             writePoolObject(nodeClass);
             writeByte(node.predecessor() == null ? 0 : 1);
+            // properties
             writeShort((char) props.size());
             for (Entry<Object, Object> entry : props.entrySet()) {
                 String key = entry.getKey().toString();
@@ -414,54 +415,42 @@
                 writePropertyObject(entry.getValue());
             }
             // inputs
-            Collection<Position> directInputPositions = nodeClass.getFirstLevelInputPositions();
-            for (Position pos : directInputPositions) {
-                if (pos.subIndex == NodeClass.NOT_ITERABLE) {
-                    Node in = nodeClass.get(node, pos);
-                    if (in != null) {
-                        writeInt(in.getId());
-                    } else {
-                        writeInt(-1);
-                    }
+            writeEdges(node, nodeClass.getFirstLevelInputPositions());
+            // successors
+            writeEdges(node, nodeClass.getFirstLevelSuccessorPositions());
+
+            props.clear();
+        }
+    }
+
+    private void writeEdges(Node node, Collection<Position> positions) throws IOException {
+        NodeClass nodeClass = node.getNodeClass();
+        for (Position pos : positions) {
+            if (pos.subIndex == NodeClass.NOT_ITERABLE) {
+                Node edge = nodeClass.get(node, pos);
+                writeNodeRef(edge);
+            } else {
+                NodeList<?> list = nodeClass.getNodeList(node, pos);
+                if (list == null) {
+                    writeShort((char) 0);
                 } else {
-                    NodeList<?> list = nodeClass.getNodeList(node, pos);
                     int listSize = list.count();
                     assert listSize == ((char) listSize);
                     writeShort((char) listSize);
-                    for (Node in : list) {
-                        if (in != null) {
-                            writeInt(in.getId());
-                        } else {
-                            writeInt(-1);
-                        }
+                    for (Node edge : list) {
+                        writeNodeRef(edge);
                     }
                 }
             }
-            // successors
-            Collection<Position> directSuccessorPositions = nodeClass.getFirstLevelSuccessorPositions();
-            for (Position pos : directSuccessorPositions) {
-                if (pos.subIndex == NodeClass.NOT_ITERABLE) {
-                    Node sux = nodeClass.get(node, pos);
-                    if (sux != null) {
-                        writeInt(sux.getId());
-                    } else {
-                        writeInt(-1);
-                    }
-                } else {
-                    NodeList<?> list = nodeClass.getNodeList(node, pos);
-                    int listSize = list.count();
-                    assert listSize == ((char) listSize);
-                    writeShort((char) listSize);
-                    for (Node sux : list) {
-                        if (sux != null) {
-                            writeInt(sux.getId());
-                        } else {
-                            writeInt(-1);
-                        }
-                    }
-                }
-            }
-            props.clear();
+        }
+    }
+
+    @SuppressWarnings("deprecation")
+    private void writeNodeRef(Node edge) throws IOException {
+        if (edge != null) {
+            writeInt(edge.getId());
+        } else {
+            writeInt(-1);
         }
     }
 
--- a/graal/com.oracle.graal.truffle/src/com/oracle/graal/truffle/TruffleCompilerImpl.java	Wed Oct 23 13:40:56 2013 +0200
+++ b/graal/com.oracle.graal.truffle/src/com/oracle/graal/truffle/TruffleCompilerImpl.java	Wed Oct 23 13:41:10 2013 +0200
@@ -154,7 +154,7 @@
                     CodeCacheProvider codeCache = providers.getCodeCache();
                     CallingConvention cc = getCallingConvention(codeCache, Type.JavaCallee, graph.method(), false);
                     return GraalCompiler.compileGraph(graph, cc, graph.method(), providers, backend, codeCache.getTarget(), null, plan, OptimisticOptimizations.ALL, new SpeculationLog(), suites,
-                                    new CompilationResult());
+                                    new CompilationResult(compilable.toString()));
                 }
             }
         });
--- a/mxtool/mx.py	Wed Oct 23 13:40:56 2013 +0200
+++ b/mxtool/mx.py	Wed Oct 23 13:41:10 2013 +0200
@@ -156,7 +156,8 @@
 _dists = dict()
 _suites = dict()
 _annotationProcessors = None
-_mainSuite = None
+_primary_suite_path = None
+_primary_suite = None
 _src_suitemodel = None
 _dst_suitemodel = None
 _opts = None
@@ -504,44 +505,45 @@
     """
     def __init__(self):
         self.missing = 'no hg executable found'
-        try:
-            subprocess.check_output(['hg'])
-            self.has_hg = True
-        except OSError:
-            self.has_hg = False
-            warn(self.missing)
-
-    def _check(self, abortOnFail=True):
+        self.has_hg = None
+
+    def check(self, abortOnFail=True):
+        if self.has_hg is None:
+            try:
+                subprocess.check_output(['hg'])
+                self.has_hg = True
+            except OSError:
+                self.has_hg = False
+                warn(self.missing)
+
         if not self.has_hg:
             if abortOnFail:
                 abort(self.missing)
             else:
                 warn(self.missing)
-        return self.has_hg
-
-    def _tip(self, s, abortOnError=True):
-        if not self.has_hg:
-            return None
+
+    def tip(self, sDir, abortOnError=True):
         try:
-            version = subprocess.check_output(['hg', 'tip', '-R', s.dir, '--template', '{node}'])
-            if s.version is not None and s.version != version:
-                abort('version of suite ' + s.name +' has changed during run')
-            return version
+            return subprocess.check_output(['hg', 'tip', '-R', sDir, '--template', '{node}'])
+        except OSError:
+            warn(self.missing)
         except subprocess.CalledProcessError:
             if abortOnError:
                 abort('failed to get tip revision id')
             else:
                 return None
 
-    def _canpush(self, s, strict=True):
+    def can_push(self, s, strict=True):
         try:
             output = subprocess.check_output(['hg', '-R', s.dir, 'status'])
             # super strict
             return output == ''
+        except OSError:
+            warn(self.missing)
         except subprocess.CalledProcessError:
             return False
 
-    def _default_push(self, sdir):
+    def default_push(self, sdir):
         with open(join(sdir, '.hg', 'hgrc')) as f:
             for line in f:
                 line = line.rstrip()
@@ -560,19 +562,19 @@
         self.primaryDir = None
         self.suitenamemap = {}
 
-    def _find_suite_dir(self, suitename):
+    def find_suite_dir(self, suitename):
         """locates the URL/path for suitename or None if not found"""
-        abort('_find_suite_dir not implemented')
-
-    def _set_primary_dir(self, d):
+        abort('find_suite_dir not implemented')
+
+    def set_primary_dir(self, d):
         """informs that d is the primary suite directory"""
         self._primaryDir = d
 
-    def _importee_dir(self, importer_dir, suitename):
+    def importee_dir(self, importer_dir, suitename):
         """returns the directory path for an import of suitename, given importer_dir"""
-        abort('_importee_dir not implemented')
-
-    def _nestedsuites_dirname(self):
+        abort('importee_dir not implemented')
+
+    def nestedsuites_dirname(self):
         """Returns the dirname that contains any nested suites if the model supports that"""
         return None
 
@@ -609,7 +611,7 @@
             self.suitenamemap[mappair[0]] = mappair[1]
 
     @staticmethod
-    def _set_suitemodel(option, suitemap):
+    def set_suitemodel(option, suitemap):
         if option.startswith('sibling'):
             return SiblingSuiteModel(os.getcwd(), option, suitemap)
         elif option.startswith('nested'):
@@ -620,7 +622,7 @@
             abort('unknown suitemodel type: ' + option)
 
     @staticmethod
-    def _parse_options():
+    def parse_options():
         # suite-specific args may match the known args so there is no way at this early stage
         # to use ArgParser to handle the suite model global arguments, so we just do it manually.
         def _get_argvalue(arg, args, i):
@@ -646,13 +648,15 @@
                 # to get warnings on suite loading issues before command line is parsed
                 global _warn
                 _warn = True
-
+            elif arg == '-p' or arg == '--primary-suite-path':
+                global _primary_suite_path
+                _primary_suite_path = os.path.abspath(_get_argvalue(arg, args, i + 1))
             i = i + 1
 
         global _src_suitemodel
-        _src_suitemodel = SuiteModel._set_suitemodel(src_suitemodel_arg, suitemap_arg)
+        _src_suitemodel = SuiteModel.set_suitemodel(src_suitemodel_arg, suitemap_arg)
         global _dst_suitemodel
-        _dst_suitemodel = SuiteModel._set_suitemodel(dst_suitemodel_arg, suitemap_arg)
+        _dst_suitemodel = SuiteModel.set_suitemodel(dst_suitemodel_arg, suitemap_arg)
 
 
 class SiblingSuiteModel(SuiteModel):
@@ -662,14 +666,14 @@
         self._suiteRootDir = suiteRootDir
         self._create_suitenamemap(option[len('sibling:'):], suitemap)
 
-    def _find_suite_dir(self, name):
+    def find_suite_dir(self, name):
         return self._search_dir(self._suiteRootDir, self._mxDirName(name))
 
-    def _set_primary_dir(self, d):
-        SuiteModel._set_primary_dir(self, d)
+    def set_primary_dir(self, d):
+        SuiteModel.set_primary_dir(self, d)
         self._suiteRootDir = dirname(d)
 
-    def _importee_dir(self, importer_dir, suitename):
+    def importee_dir(self, importer_dir, suitename):
         if self.suitenamemap.has_key(suitename):
             suitename = self.suitenamemap[suitename]
         return join(dirname(importer_dir), suitename)
@@ -684,10 +688,10 @@
         self._primaryDir = primaryDir
         self._create_suitenamemap(option[len('nested:'):], suitemap)
 
-    def _find_suite_dir(self, name):
+    def find_suite_dir(self, name):
         return self._search_dir(join(self._primaryDir, self._imported_suites_dirname()), self._mxDirName(name))
 
-    def _importee_dir(self, importer_dir, suitename):
+    def importee_dir(self, importer_dir, suitename):
         if self.suitenamemap.has_key(suitename):
             suitename = self.suitenamemap[suitename]
         if basename(importer_dir) == basename(self._primaryDir):
@@ -699,7 +703,7 @@
         else:
             return join(dirname(importer_dir), suitename)
 
-    def _nestedsuites_dirname(self):
+    def nestedsuites_dirname(self):
         return self._imported_suites_dirname()
 
 class PathSuiteModel(SuiteModel):
@@ -718,13 +722,13 @@
                 suiteurl = pair[0]
             self.suit_to_url[suitename] = suiteurl
 
-    def _find_suite_dir(self, suitename):
+    def find_suite_dir(self, suitename):
         if self.suit_to_url.has_key(suitename):
             return self.suit_to_url[suitename]
         else:
             return None
 
-    def _importee_dir(self, importer_dir, suitename):
+    def importee_dir(self, importer_dir, suitename):
         if suitename in self.suit_to_url:
             return self.suit_to_url[suitename]
         else:
@@ -736,7 +740,7 @@
         self.version = version
 
     @staticmethod
-    def _parse_specification(specification):
+    def parse_specification(specification):
         pair = specification.split(',')
         name = pair[0]
         if len(pair) > 1:
@@ -746,7 +750,7 @@
         return SuiteImport(name, version)
 
     @staticmethod
-    def _tostring(name, version):
+    def tostring(name, version):
         return name + ',' + version
 
     def __str__(self):
@@ -763,8 +767,6 @@
         self.commands = None
         self.primary = primary
         self.name = _suitename(mxDir)  # validated in _load_projects
-        self.version = None  # _hg._tip checks current version if not None
-        self.version = _hg._tip(self, False)
         if load:
             # load suites bottom up to make sure command overriding works properly
             self._load_imports()
@@ -775,6 +777,10 @@
     def __str__(self):
         return self.name
 
+    def version(self, abortOnError=True):
+        # we do not cache the version
+        return _hg.tip(self.dir, abortOnError)
+
     def _load_projects(self):
         libsMap = dict()
         projsMap = dict()
@@ -872,7 +878,7 @@
             abort('Missing "suite=<name>" in ' + projectsFile)
 
     def _commands_name(self):
-        return 'mx_' + self.name
+        return 'mx_' + self.name.replace('-','_')
 
     def _find_commands(self, name):
         commandsPath = join(self.mxDir, name + '.py')
@@ -907,7 +913,7 @@
             mod.mx_init(self)
             self.commands = mod
 
-    def _visit_imports(self, visitor, **extra_args):
+    def visit_imports(self, visitor, **extra_args):
         """
         Visitor support for the imports file.
         For each line of the imports file that specifies an import, the visitor function is
@@ -917,7 +923,7 @@
         for writing a (possibly) updated import line to the file, and the file is (possibly) updated after
         all imports are processed.
         N.B. There is no built-in support for avoiding visiting the same suite multiple times,
-        as this function only visits the imports of a singkle suite. If a (recursive) visitor function
+        as this function only visits the imports of a single suite. If a (recursive) visitor function
         wishes to visit a suite exactly once, it must manage that through extra_args.
         """
         importsFile = join(self.mxDir, 'imports')
@@ -932,25 +938,25 @@
                         if out is not None:
                             out.write(sline + '\n')
                         continue
-                    suite_import = SuiteImport._parse_specification(line.strip())
+                    suite_import = SuiteImport.parse_specification(line.strip())
                     visitor(self, suite_import, **extra_args)
 
             if out is not None:
                 update_file(importsFile, out.getvalue())
 
     @staticmethod
-    def _find_and_loadsuite(suite, suite_import, **extra_args):
+    def _find_and_loadsuite(importing_suite, suite_import, **extra_args):
         """visitor for the initial suite load"""
-        importMxDir = _src_suitemodel._find_suite_dir(suite_import.name)
+        importMxDir = _src_suitemodel.find_suite_dir(suite_import.name)
         if importMxDir is None:
             abort('import ' + suite_import.name + ' not found')
-        suite.imports.append(suite_import)
-        imported_suite = _loadSuite(importMxDir, False)
-        if imported_suite.version != suite.version:
-            warn('import version of ' + imported_suite.name +' does not match tip of ' + suite.version)
+        importing_suite.imports.append(suite_import)
+        _loadSuite(importMxDir, False)
+        # we do not check at this stage whether the tip version of imported_suite
+        # matches that of the import, since during development, this can and will change
 
     def _load_imports(self):
-        self._visit_imports(self._find_and_loadsuite)
+        self.visit_imports(self._find_and_loadsuite)
 
     def _load_env(self):
         e = join(self.mxDir, 'env')
@@ -1315,7 +1321,7 @@
         else:
             break
 
-    envPath = join(_mainSuite.mxDir, 'env')
+    envPath = join(_primary_suite.mxDir, 'env')
     if ask_yes_no('Persist this setting by adding "JAVA_HOME=' + javaHome + '" to ' + envPath, 'y'):
         with open(envPath, 'a') as fp:
             print >> fp, 'JAVA_HOME=' + javaHome
@@ -1336,6 +1342,7 @@
         self.add_argument('-v', action='store_true', dest='verbose', help='enable verbose output')
         self.add_argument('-V', action='store_true', dest='very_verbose', help='enable very verbose output')
         self.add_argument('-w', action='store_true', dest='warn', help='enable warning messages')
+        self.add_argument('-p', '--primary-suite-path', help='set the primary suite directory', metavar='<path>')
         self.add_argument('--dbg', type=int, dest='java_dbg_port', help='make Java processes wait on <port> for a debugger', metavar='<port>')
         self.add_argument('-d', action='store_const', const=8000, dest='java_dbg_port', help='alias for "-dbg 8000"')
         self.add_argument('--cp-pfx', dest='cp_prefix', help='class path prefix', metavar='<arg>')
@@ -1904,7 +1911,7 @@
 
     javaCompliance = java().javaCompliance
 
-    defaultEcjPath = join(_mainSuite.mxDir, 'ecj.jar')
+    defaultEcjPath = join(_primary_suite.mxDir, 'ecj.jar')
 
     parser = parser if parser is not None else ArgumentParser(prog='mx build')
     parser.add_argument('-f', action='store_true', dest='force', help='force build (disables timestamp checking)')
@@ -2223,7 +2230,7 @@
     if len(modified) != 0:
         if args.backup:
             backup = os.path.abspath('eclipseformat.backup.zip')
-            arcbase = _mainSuite.dir
+            arcbase = _primary_suite.dir
             zf = zipfile.ZipFile(backup, 'w', zipfile.ZIP_DEFLATED)
             for fi in modified:
                 arcname = os.path.relpath(fi.path, arcbase).replace(os.sep, '/')
@@ -2477,7 +2484,7 @@
         self.path = path
         self.timestamp = os.path.getmtime(path) if exists(path) else None
 
-    def outOfDate(self, arg):
+    def isOlderThan(self, arg):
         if not self.timestamp:
             return True
         if isinstance(arg, types.ListType):
@@ -2537,7 +2544,7 @@
             timestamp = TimeStampFile(join(p.suite.mxDir, 'checkstyle-timestamps', sourceDir[len(p.suite.dir) + 1:].replace(os.sep, '_') + '.timestamp'))
             mustCheck = False
             if not args.force and timestamp.exists():
-                mustCheck = timestamp.outOfDate(javafilelist)
+                mustCheck = timestamp.isOlderThan(javafilelist)
             else:
                 mustCheck = True
 
@@ -2848,7 +2855,7 @@
     if refreshOnly and not timestamp.exists():
         return
 
-    if not timestamp.outOfDate(projectsFile):
+    if not timestamp.isOlderThan(projectsFile) and not TimeStampFile(projectsFile).isOlderThan(__file__):
         logv('[Eclipse configurations are up to date - skipping]')
         return
 
@@ -2979,12 +2986,12 @@
                 out.close('buildCommand')
 
         if _isAnnotationProcessorDependency(p):
-            _genEclipseBuilder(out, p, 'Jar.launch', 'archive ' + p.name, refresh=False, async=False, xmlIndent='', xmlStandalone='no')
-            _genEclipseBuilder(out, p, 'Refresh.launch', '', refresh=True, async=True)
+            _genEclipseBuilder(out, p, 'Jar', 'archive ' + p.name, refresh=False, async=False, xmlIndent='', xmlStandalone='no')
+            _genEclipseBuilder(out, p, 'Refresh', '', refresh=True, async=True)
 
         if projToDist.has_key(p.name):
             dist, distDeps = projToDist[p.name]
-            _genEclipseBuilder(out, p, 'Create' + dist.name + 'Dist.launch', 'archive @' + dist.name, refresh=False, async=True)
+            _genEclipseBuilder(out, p, 'Create' + dist.name + 'Dist', 'archive @' + dist.name, logToFile=True, refresh=False, async=True)
 
         out.close('buildSpec')
         out.open('natures')
@@ -3046,7 +3053,8 @@
     """
     return p in sorted_deps(annotation_processors())
 
-def _genEclipseBuilder(dotProjectDoc, p, name, mxCommand, refresh=True, async=False, logToConsole=False, xmlIndent='\t', xmlStandalone=None):
+def _genEclipseBuilder(dotProjectDoc, p, name, mxCommand, refresh=True, async=False, logToConsole=False, logToFile=False, appendToLogFile=True, xmlIndent='\t', xmlStandalone=None):
+    externalToolDir = join(p.dir, '.externalToolBuilders')
     launchOut = XMLDoc()
     consoleOn = 'true' if logToConsole else 'false'
     launchOut.open('launchConfiguration', {'type' : 'org.eclipse.ui.externaltools.ProgramBuilderLaunchConfigurationType'})
@@ -3059,6 +3067,10 @@
         launchOut.element('stringAttribute', {'key' : 'org.eclipse.debug.core.ATTR_REFRESH_SCOPE', 'value': '${project}'})
     launchOut.element('booleanAttribute', {'key' : 'org.eclipse.debug.ui.ATTR_CONSOLE_OUTPUT_ON', 'value': consoleOn})
     launchOut.element('booleanAttribute', {'key' : 'org.eclipse.debug.ui.ATTR_LAUNCH_IN_BACKGROUND', 'value': 'true' if async else 'false'})
+    if logToFile:
+        logFile = join(externalToolDir, name + '.log')
+        launchOut.element('stringAttribute', {'key' : 'org.eclipse.debug.ui.ATTR_CAPTURE_IN_FILE', 'value': logFile})
+        launchOut.element('booleanAttribute', {'key' : 'org.eclipse.debug.ui.ATTR_APPEND_TO_FILE', 'value': 'true' if appendToLogFile else 'false'})
 
     # expect to find the OS command to invoke mx in the same directory
     baseDir = dirname(os.path.abspath(__file__))
@@ -3082,11 +3094,9 @@
 
     launchOut.close('launchConfiguration')
 
-    externalToolDir = join(p.dir, '.externalToolBuilders')
-
     if not exists(externalToolDir):
         os.makedirs(externalToolDir)
-    update_file(join(externalToolDir, name), launchOut.xml(indent=xmlIndent, standalone=xmlStandalone, newl='\n'))
+    update_file(join(externalToolDir, name + '.launch'), launchOut.xml(indent=xmlIndent, standalone=xmlStandalone, newl='\n'))
 
     dotProjectDoc.open('buildCommand')
     dotProjectDoc.element('name', data='org.eclipse.ui.externaltools.ExternalToolBuilder')
@@ -3094,7 +3104,7 @@
     dotProjectDoc.open('arguments')
     dotProjectDoc.open('dictionary')
     dotProjectDoc.element('key', data='LaunchConfigHandle')
-    dotProjectDoc.element('value', data='<project>/.externalToolBuilders/' + name)
+    dotProjectDoc.element('value', data='<project>/.externalToolBuilders/' + name + '.launch')
     dotProjectDoc.close('dictionary')
     dotProjectDoc.open('dictionary')
     dotProjectDoc.element('key', data='incclean')
@@ -3117,7 +3127,7 @@
     if os.environ.has_key('WORKSPACE'):
         expected_wsroot = os.environ['WORKSPACE']
     else:
-        expected_wsroot = _mainSuite.dir
+        expected_wsroot = _primary_suite.dir
 
     wsroot = _find_eclipse_wsroot(expected_wsroot)
     if wsroot is None:
@@ -3255,7 +3265,7 @@
     if refreshOnly and not timestamp.exists():
         return
 
-    if not timestamp.outOfDate(projectsFile):
+    if not timestamp.isOlderThan(projectsFile) and not TimeStampFile(projectsFile).isOlderThan(__file__):
         logv('[NetBeans configurations are up to date - skipping]')
         return
 
@@ -3521,8 +3531,8 @@
                 if '.hg' in dirnames:
                     dirnames.remove('.hg')
                 # if there are nested suites must not scan those now, as they are not in projectDirs
-                if _src_suitemodel._nestedsuites_dirname() in dirnames:
-                    dirnames.remove(_src_suitemodel._nestedsuites_dirname())
+                if _src_suitemodel.nestedsuites_dirname() in dirnames:
+                    dirnames.remove(_src_suitemodel.nestedsuites_dirname())
             elif dirpath in projectDirs:
                 # don't traverse subdirs of an existing project in this suite
                 dirnames[:] = []
@@ -3674,7 +3684,7 @@
             names.append(p.name)
 
         links = ['-link', 'http://docs.oracle.com/javase/' + str(_java.javaCompliance.value) + '/docs/api/']
-        out = join(_mainSuite.dir, docDir)
+        out = join(_primary_suite.dir, docDir)
         if args.base is not None:
             out = join(args.base, docDir)
         cp = classpath()
@@ -3943,7 +3953,7 @@
 
 def sclone(args):
     """clone a suite repository, and its imported suites"""
-    _hg._check(True)
+    _hg.check()
     parser = ArgumentParser(prog='mx sclone')
     parser.add_argument('--source', help='url/path of repo containing suite', metavar='<url>')
     parser.add_argument('--dest', help='destination directory (default basename of source)', metavar='<path>')
@@ -3960,11 +3970,11 @@
 
     if args.source is None:
         # must be primary suite and dest is required
-        if _mainSuite is None:
+        if _primary_suite is None:
             abort('--source missing and no primary suite found')
         if args.dest is None:
             abort('--dest required when --source is not given')
-        source = _mainSuite.dir
+        source = _primary_suite.dir
     else:
         source = args.source
 
@@ -3975,8 +3985,8 @@
 
     dest = os.path.abspath(dest)
     # We can now set the primary dir for the src/dst suitemodel
-    _dst_suitemodel._set_primary_dir(dest)
-    _src_suitemodel._set_primary_dir(source)
+    _dst_suitemodel.set_primary_dir(dest)
+    _src_suitemodel.set_primary_dir(source)
 
     _sclone(source, dest, None, args.no_imports)
 
@@ -3998,12 +4008,12 @@
     # create a Suite (without loading) to enable imports visitor
     s = Suite(mxDir, False, load=False)
     if not no_imports:
-        s._visit_imports(_scloneimports_visitor, source=source)
+        s.visit_imports(_scloneimports_visitor, source=source)
     return s
 
 def _scloneimports_visitor(s, suite_import, source, **extra_args):
     """
-    cloneimports visitor for Suite._visit_imports.
+    cloneimports visitor for Suite.visit_imports.
     The destination information is encapsulated by 's'
     """
     _scloneimports(s, suite_import, source)
@@ -4018,18 +4028,21 @@
 
 def _scloneimports(s, suite_import, source):
     # clone first, then visit imports once we can locate them
-    importee_source = _src_suitemodel._importee_dir(source, suite_import.name)
-    importee_dest = _dst_suitemodel._importee_dir(s.dir, suite_import.name)
+    importee_source = _src_suitemodel.importee_dir(source, suite_import.name)
+    importee_dest = _dst_suitemodel.importee_dir(s.dir, suite_import.name)
     if exists(importee_dest):
+        # already exists in the suite model, but may be wrong version
         importee_suite = _scloneimports_suitehelper(importee_dest)
-        importee_suite._visit_imports(_scloneimports_visitor, source=importee_source)
+        if suite_import.version is not None and importee_suite.version() != suite_import.version:
+            abort("imported version of " + suite_import.name + " in " + s.name + " does not match the version in already existing suite: " + importee_suite.dir)
+        importee_suite.visit_imports(_scloneimports_visitor, source=importee_source)
     else:
         _sclone(importee_source, importee_dest, suite_import.version, False)
         # _clone handles the recursive visit of the new imports
 
 def scloneimports(args):
     """clone the imports of an existing suite"""
-    _hg._check(True)
+    _hg.check()
     parser = ArgumentParser(prog='mx scloneimports')
     parser.add_argument('--source', help='url/path of repo containing suite', metavar='<url>')
     parser.add_argument('nonKWArgs', nargs=REMAINDER, metavar='source [dest]...')
@@ -4043,40 +4056,40 @@
 
     s = _scloneimports_suitehelper(args.source)
 
-    default_path = _hg._default_push(args.source)
+    default_path = _hg.default_push(args.source)
 
     if default_path is None:
         abort('no default path in ' + join(args.source, '.hg', 'hgrc'))
 
     # We can now set the primary dir for the dst suitemodel
     # N.B. source is effectively the destination and the default_path is the (original) source
-    _dst_suitemodel._set_primary_dir(args.source)
-
-    s._visit_imports(_scloneimports_visitor, source=default_path)
+    _dst_suitemodel.set_primary_dir(args.source)
+
+    s.visit_imports(_scloneimports_visitor, source=default_path)
 
 def _spush_import_visitor(s, suite_import, dest, checks, clonemissing, **extra_args):
-    """push visitor for Suite._visit_imports"""
+    """push visitor for Suite.visit_imports"""
     if dest is not None:
-        dest = _dst_suitemodel._importee_dir(dest, suite_import.name)
+        dest = _dst_suitemodel.importee_dir(dest, suite_import.name)
     _spush(suite(suite_import.name), suite_import, dest, checks, clonemissing)
 
 def _spush_check_import_visitor(s, suite_import, **extra_args):
-    """push check visitor for Suite._visit_imports"""
-    currentTip = _hg._tip(suite(suite_import.name))
+    """push check visitor for Suite.visit_imports"""
+    currentTip = suite(suite_import.name).version()
     if currentTip != suite_import.version:
-        abort('import version of ' + suite_import.name + ' in suite ' + s.name + ' does not match tip')
+        abort('imported version of ' + suite_import.name + ' in suite ' + s.name + ' does not match tip')
 
 def _spush(s, suite_import, dest, checks, clonemissing):
     if checks:
-        if not _hg._canpush(s):
+        if not _hg.can_push(s):
             abort('working directory ' + s.dir + ' contains uncommitted changes, push aborted')
 
     # check imports first
     if checks:
-        s._visit_imports(_spush_check_import_visitor)
+        s.visit_imports(_spush_check_import_visitor)
 
     # ok, push imports
-    s._visit_imports(_spush_import_visitor, dest=dest, checks=checks, clonemissing=clonemissing)
+    s.visit_imports(_spush_import_visitor, dest=dest, checks=checks, clonemissing=clonemissing)
 
     dest_exists = True
 
@@ -4108,7 +4121,7 @@
 
 def spush(args):
     """push primary suite and all its imports"""
-    _hg._check(True)
+    _hg.check()
     parser = ArgumentParser(prog='mx spush')
     parser.add_argument('--dest', help='url/path of repo to push to (default as per hg push)', metavar='<path>')
     parser.add_argument('--no-checks', action='store_true', help='checks on status, versions are disabled')
@@ -4131,7 +4144,7 @@
         args.nochecks = True
 
     if args.dest is not None:
-        _dst_suitemodel._set_primary_dir(args.dest)
+        _dst_suitemodel.set_primary_dir(args.dest)
 
     _spush(s, None, args.dest, not args.no_checks, args.clonemissing)
 
@@ -4139,29 +4152,29 @@
     _supdate(suite(suite_import.name), suite_import)
 
 def _supdate(s, suite_import):
-    s._visit_imports(_supdate_import_visitor)
+    s.visit_imports(_supdate_import_visitor)
 
     run(['hg', '-R', s.dir, 'update'])
 
 def supdate(args):
     """update primary suite and all its imports"""
 
-    _hg._check(True)
+    _hg.check()
     s = _check_primary_suite()
 
     _supdate(s, None)
 
 def _scheck_imports_visitor(s, suite_import, update_versions, updated_imports):
-    """checkimportversions visitor for Suite._visit_imports"""
-    _scheck_imports(suite(suite_import.name), suite_import, update_versions, updated_imports)
-
-def _scheck_imports(s, suite_import, update_versions, updated_imports):
+    """scheckimports visitor for Suite.visit_imports"""
+    _scheck_imports(s, suite(suite_import.name), suite_import, update_versions, updated_imports)
+
+def _scheck_imports(importing_suite, imported_suite, suite_import, update_versions, updated_imports):
     # check imports recursively
-    s._visit_imports(_scheck_imports_visitor, update_versions=update_versions)
-
-    currentTip = _hg._tip(s)
+    imported_suite.visit_imports(_scheck_imports_visitor, update_versions=update_versions)
+
+    currentTip = imported_suite.version()
     if currentTip != suite_import.version:
-        print('import version of ' + s.name + ' does not match tip' + (': updating' if update_versions else ''))
+        print('imported version of ' + imported_suite.name + ' in ' + importing_suite.name + ' does not match tip' + (': updating' if update_versions else ''))
 
     if update_versions:
         suite_import.version = currentTip
@@ -4173,25 +4186,25 @@
     parser = ArgumentParser(prog='mx scheckimports')
     parser.add_argument('--update-versions', help='update imported version ids', action='store_true')
     args = parser.parse_args(args)
-    _check_primary_suite()._visit_imports(_scheck_imports_visitor, update_versions=args.update_versions)
+    _check_primary_suite().visit_imports(_scheck_imports_visitor, update_versions=args.update_versions)
 
 def _spull_import_visitor(s, suite_import, update_versions, updated_imports):
-    """pull visitor for Suite._visit_imports"""
+    """pull visitor for Suite.visit_imports"""
     _spull(suite(suite_import.name), update_versions, updated_imports)
 
 def _spull(s, update_versions, updated_imports):
-    _hg._check(True)
+    _hg.check()
     # pull imports first
-    s._visit_imports(_spull_import_visitor, update_versions=update_versions)
+    s.visit_imports(_spull_import_visitor, update_versions=update_versions)
 
     run(['hg', '-R', s.dir, 'pull', '-u'])
     if update_versions and updated_imports is not None:
-        tip = _hg._tip(s)
-        updated_imports.write(SuiteImport._tostring(s.name, tip) + '\n')
+        tip = s.version()
+        updated_imports.write(SuiteImport.tostring(s.name, tip) + '\n')
 
 def spull(args):
     """pull primary suite and all its imports"""
-    _hg._check(True)
+    _hg.check()
     parser = ArgumentParser(prog='mx spull')
     parser.add_argument('--update-versions', action='store_true', help='update version ids of imported suites')
     args = parser.parse_args(args)
@@ -4372,48 +4385,74 @@
                     return mxDir
 
 def _check_primary_suite():
-    if _mainSuite is None:
+    if _primary_suite is None:
         abort('no primary suite found')
     else:
-        return _mainSuite
+        return _primary_suite
 
 def _needs_primary_suite(command):
     return not command.startswith("sclone")
 
-def _findPrimarySuiteMxDir():
-    # try current working directory first, the look up the tree
-    curdir = os.getcwd()
-    while curdir:
-        mxDir = _is_suite_dir(curdir)
+def _needs_primary_suite_cl():
+    return not any("sclone" in s for s in sys.argv[1:])
+
+def _findPrimarySuiteMxDirFrom(d):
+    """ search for a suite directory upwards from 'd' """
+    while d:
+        mxDir = _is_suite_dir(d)
         if mxDir is not None:
             return mxDir
-        parent = dirname(curdir)
-        if curdir == parent:
+        parent = dirname(d)
+        if d == parent:
             return None
-        curdir = parent
+        d = parent
 
     return None
 
+def _findPrimarySuiteMxDir():
+    # check for explicit setting
+    if _primary_suite_path is not None:
+        mxDir = _is_suite_dir(_primary_suite_path)
+        if mxDir is not None:
+            return mxDir
+        else:
+            abort(_primary_suite_path + ' does not contain an mx suite')
+
+    # try current working directory first
+    mxDir = _findPrimarySuiteMxDirFrom(os.getcwd())
+    if mxDir is not None:
+        return mxDir
+    # backwards compatibility: search from path of this file
+    return _findPrimarySuiteMxDirFrom(dirname(__file__))
+
 def main():
-    SuiteModel._parse_options()
+    SuiteModel.parse_options()
 
     global _hg
     _hg = HgConfig()
 
+    primary_suite_error = 'no primary suite found'
     primarySuiteMxDir = _findPrimarySuiteMxDir()
     if primarySuiteMxDir:
-        _src_suitemodel._set_primary_dir(dirname(primarySuiteMxDir))
-        global _mainSuite
-        _mainSuite = _loadSuite(primarySuiteMxDir, True)
+        _src_suitemodel.set_primary_dir(dirname(primarySuiteMxDir))
+        global _primary_suite
+        _primary_suite = _loadSuite(primarySuiteMxDir, True)
+    else:
+        # in general this is an error, except for the sclone/scloneimports commands,
+        # and an extensions command will likely not parse in this case, as any extra arguments
+        # will not have been added to _argParser.
+        # If the command line does not contain a string matching one of the exceptions, we can safely abort,
+        # but not otherwise, as we can't be sure the string isn't in a value for some other option.
+        if _needs_primary_suite_cl():
+            abort(primary_suite_error)
 
     opts, commandAndArgs = _argParser._parse_cmd_line()
 
     if primarySuiteMxDir is None:
-        msg = 'no primary suite found'
         if len(commandAndArgs) > 0 and _needs_primary_suite(commandAndArgs[0]):
-            abort(msg)
+            abort(primary_suite_error)
         else:
-            warn(msg)
+            warn(primary_suite_error)
 
     global _opts, _java
     _opts = opts
--- a/src/gpu/ptx/vm/gpu_ptx.cpp	Wed Oct 23 13:40:56 2013 +0200
+++ b/src/gpu/ptx/vm/gpu_ptx.cpp	Wed Oct 23 13:41:10 2013 +0200
@@ -385,7 +385,7 @@
      case T_INT:
        {
          int return_val;
-         status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._return_value_ptr, T_INT_BYTE_SIZE);
+         status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._dev_return_value, T_INT_BYTE_SIZE);
          if (status != GRAAL_CUDA_SUCCESS) {
            tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status);
            return false;
@@ -396,7 +396,7 @@
      case T_BOOLEAN:
        {
          int return_val;
-         status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._return_value_ptr, T_INT_BYTE_SIZE);
+         status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._dev_return_value, T_INT_BYTE_SIZE);
          if (status != GRAAL_CUDA_SUCCESS) {
            tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status);
            return false;
@@ -407,7 +407,7 @@
      case T_FLOAT:
        {
          float return_val;
-         status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._return_value_ptr, T_FLOAT_BYTE_SIZE);
+         status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._dev_return_value, T_FLOAT_BYTE_SIZE);
          if (status != GRAAL_CUDA_SUCCESS) {
            tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status);
            return false;
@@ -418,7 +418,7 @@
      case T_DOUBLE:
        {
          double return_val;
-         status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._return_value_ptr, T_DOUBLE_BYTE_SIZE);
+         status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._dev_return_value, T_DOUBLE_BYTE_SIZE);
          if (status != GRAAL_CUDA_SUCCESS) {
            tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status);
            return false;
@@ -429,7 +429,7 @@
      case T_LONG:
        {
          long return_val;
-         status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._return_value_ptr, T_LONG_BYTE_SIZE);
+         status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._dev_return_value, T_LONG_BYTE_SIZE);
          if (status != GRAAL_CUDA_SUCCESS) {
            tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status);
            return false;
@@ -443,11 +443,11 @@
        tty->print_cr("[CUDA] TODO *** Unhandled return type: %d", return_type);
   }
 
-  // handle post-invocation object and array arguemtn
-  ptxka.reiterate();
+  // Copy all reference arguments from device to host memory.
+  ptxka.copyRefArgsFromDtoH();
 
   // Free device memory allocated for result
-  status = gpu::Ptx::_cuda_cu_memfree(ptxka._return_value_ptr);
+  status = gpu::Ptx::_cuda_cu_memfree(ptxka._dev_return_value);
   if (status != GRAAL_CUDA_SUCCESS) {
     tty->print_cr("[CUDA] *** Error (%d) Failed to free device memory of return value", status);
     return false;
--- a/src/gpu/ptx/vm/ptxKernelArguments.cpp	Wed Oct 23 13:40:56 2013 +0200
+++ b/src/gpu/ptx/vm/ptxKernelArguments.cpp	Wed Oct 23 13:41:10 2013 +0200
@@ -32,127 +32,132 @@
 // Get next java argument
 oop PTXKernelArguments::next_arg(BasicType expectedType) {
   assert(_index < _args->length(), "out of bounds");
-
   oop arg = ((objArrayOop) (_args))->obj_at(_index++);
   assert(expectedType == T_OBJECT ||
          java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch");
-
   return arg;
 }
 
 void PTXKernelArguments::do_int() {
-    if (is_after_invocation()) {
+  // If the parameter is a return value,
+  if (is_return_type()) {
+    if (is_kernel_arg_setup()) {
+      // Allocate device memory for T_INT return value pointer on device. Size in bytes
+      int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_INT_BYTE_SIZE);
+      if (status != GRAAL_CUDA_SUCCESS) {
+        tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+        _success = false;
         return;
+      }
+      // Push _dev_return_value to _kernelBuffer
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
     }
-    // If the parameter is a return value,
-    if (is_return_type()) {
-        // Allocate device memory for T_INT return value pointer on device. Size in bytes
-        int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_INT_BYTE_SIZE);
-        if (status != GRAAL_CUDA_SUCCESS) {
-            tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-            _success = false;
-            return;
-        }
-        // Push _return_value_ptr to _kernelBuffer
-        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
-        _bufferOffset += sizeof(_return_value_ptr);
-    } else {
-        // Get the next java argument and its value which should be a T_INT
-        oop arg = next_arg(T_INT);
-        // Copy the java argument value to kernelArgBuffer
-        jvalue intval;
-        if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) {
-            tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
-            _success = false;
-            return;
-        }
-        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = intval.i;
-        _bufferOffset += sizeof(intval.i);
+    _bufferOffset += sizeof(_dev_return_value);
+  } else {
+    // Get the next java argument and its value which should be a T_INT
+    oop arg = next_arg(T_INT);
+    // Copy the java argument value to kernelArgBuffer
+    jvalue intval;
+    if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) {
+      tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
+      _success = false;
+      return;
     }
-    return;
+    if (is_kernel_arg_setup()) {
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = intval.i;
+    }
+    // Advance _bufferOffset
+    _bufferOffset += sizeof(intval.i);
+  }
+  return;
 }
 
 void PTXKernelArguments::do_float() {
-    if (is_after_invocation()) {
+  // If the parameter is a return value,
+  if (is_return_type()) {
+    if (is_kernel_arg_setup()) {
+      // Allocate device memory for T_INT return value pointer on device. Size in bytes
+      int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_FLOAT_BYTE_SIZE);
+      if (status != GRAAL_CUDA_SUCCESS) {
+        tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+        _success = false;
         return;
+      }
+      // Push _dev_return_value to _kernelBuffer
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
     }
-    // If the parameter is a return value,
-    if (is_return_type()) {
-        // Allocate device memory for T_INT return value pointer on device. Size in bytes
-        int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_FLOAT_BYTE_SIZE);
-        if (status != GRAAL_CUDA_SUCCESS) {
-            tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-            _success = false;
-            return;
-        }
-        // Push _return_value_ptr to _kernelBuffer
-        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
-        _bufferOffset += sizeof(_return_value_ptr);
-    } else {
-        // Get the next java argument and its value which should be a T_INT
-        oop arg = next_arg(T_FLOAT);
-        // Copy the java argument value to kernelArgBuffer
-        jvalue floatval;
-        if (java_lang_boxing_object::get_value(arg, &floatval) != T_FLOAT) {
-            tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
-            _success = false;
-            return;
-        }
-        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) floatval.f;
-        _bufferOffset += sizeof(floatval.f);
+    // Advance _bufferOffset
+    _bufferOffset += sizeof(_dev_return_value);
+  } else {
+    // Get the next java argument and its value which should be a T_FLOAT
+    oop arg = next_arg(T_FLOAT);
+    // Copy the java argument value to kernelArgBuffer
+    jvalue floatval;
+    if (java_lang_boxing_object::get_value(arg, &floatval) != T_FLOAT) {
+      tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_FLOAT");
+      _success = false;
+      return;
     }
-    return;
+    if (is_kernel_arg_setup()) {
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = floatval.f;
+    }
+    // Advance _bufferOffset
+    _bufferOffset += sizeof(floatval.f);
+  }
+  return;
 }
 
 void PTXKernelArguments::do_double() {
-    if (is_after_invocation()) {
+  // If the parameter is a return value,
+  jvalue doubleval;
+  if (is_return_type()) {
+    if (is_kernel_arg_setup()) {
+      // Allocate device memory for T_INT return value pointer on device. Size in bytes
+      int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_DOUBLE_BYTE_SIZE);
+      if (status != GRAAL_CUDA_SUCCESS) {
+        tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+        _success = false;
         return;
+      }
+      // Push _dev_return_value to _kernelBuffer
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
     }
-    // If the parameter is a return value,
-    jvalue doubleval;
-    if (is_return_type()) {
-        // Allocate device memory for T_INT return value pointer on device. Size in bytes
-        int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_DOUBLE_BYTE_SIZE);
-        if (status != GRAAL_CUDA_SUCCESS) {
-            tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-            _success = false;
-            return;
-        }
-        // Push _return_value_ptr to _kernelBuffer
-        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
-        // _bufferOffset += sizeof(_return_value_ptr);
-        _bufferOffset += sizeof(doubleval.d);
-    } else {
-        // Get the next java argument and its value which should be a T_INT
-        oop arg = next_arg(T_FLOAT);
-        // Copy the java argument value to kernelArgBuffer
-        if (java_lang_boxing_object::get_value(arg, &doubleval) != T_DOUBLE) {
-            tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
-            _success = false;
-            return;
-        }
-        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = (gpu::Ptx::CUdeviceptr) doubleval.d;
-        _bufferOffset += sizeof(doubleval.d);
+    // Advance _bufferOffset
+    _bufferOffset += sizeof(doubleval.d);
+  } else {
+    // Get the next java argument and its value which should be a T_INT
+    oop arg = next_arg(T_FLOAT);
+    // Copy the java argument value to kernelArgBuffer
+    if (java_lang_boxing_object::get_value(arg, &doubleval) != T_DOUBLE) {
+      tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
+      _success = false;
+      return;
     }
-    return;
+    if (is_kernel_arg_setup()) {
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = doubleval.d;
+    }
+    // Advance _bufferOffset
+    _bufferOffset += sizeof(doubleval.d);
+  }
+  return;
 }
 
 void PTXKernelArguments::do_long() {
-  if (is_after_invocation()) {
-    return;
-  }
   // If the parameter is a return value,
   if (is_return_type()) {
-    // Allocate device memory for T_LONG return value pointer on device. Size in bytes
-    int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_LONG_BYTE_SIZE);
-    if (status != GRAAL_CUDA_SUCCESS) {
-      tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-      _success = false;
-      return;
+    if (is_kernel_arg_setup()) {
+      // Allocate device memory for T_LONG return value pointer on device. Size in bytes
+      int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_LONG_BYTE_SIZE);
+      if (status != GRAAL_CUDA_SUCCESS) {
+        tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+        _success = false;
+        return;
+      }
+      // Push _dev_return_value to _kernelBuffer
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
     }
-    // Push _return_value_ptr to _kernelBuffer
-    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
-    _bufferOffset += sizeof(_return_value_ptr);
+    // Advance _bufferOffset
+    _bufferOffset += sizeof(_dev_return_value);
   } else {
     // Get the next java argument and its value which should be a T_LONG
     oop arg = next_arg(T_LONG);
@@ -163,119 +168,132 @@
       _success = false;
       return;
     }
-    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.j;
+    if (is_kernel_arg_setup()) {
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.j;
+    }
+    // Advance _bufferOffset
     _bufferOffset += sizeof(val.j);
   }
   return;
 }
 
 void PTXKernelArguments::do_byte() {
-    if (is_after_invocation()) {
+  // If the parameter is a return value,
+  if (is_return_type()) {
+    if (is_kernel_arg_setup()) {
+      // Allocate device memory for T_BYTE return value pointer on device. Size in bytes
+      int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_BYTE_SIZE);
+      if (status != GRAAL_CUDA_SUCCESS) {
+        tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+        _success = false;
         return;
+      }
+      // Push _dev_return_value to _kernelBuffer
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
     }
-    // If the parameter is a return value,
-    if (is_return_type()) {
-        // Allocate device memory for T_BYTE return value pointer on device. Size in bytes
-        int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BYTE_SIZE);
-        if (status != GRAAL_CUDA_SUCCESS) {
-            tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-            _success = false;
-            return;
-        }
-        // Push _return_value_ptr to _kernelBuffer
-        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
-        _bufferOffset += sizeof(_return_value_ptr);
-    } else {
-        // Get the next java argument and its value which should be a T_BYTE
-        oop arg = next_arg(T_BYTE);
-        // Copy the java argument value to kernelArgBuffer
-        jvalue val;
-        if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) {
-            tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE");
-            _success = false;
-            return;
-        }
-        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b;
-        _bufferOffset += sizeof(val.b);
+    // Advance _bufferOffset
+    _bufferOffset += sizeof(_dev_return_value);
+  } else {
+    // Get the next java argument and its value which should be a T_BYTE
+    oop arg = next_arg(T_BYTE);
+    // Copy the java argument value to kernelArgBuffer
+    jvalue val;
+    if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) {
+      tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE");
+      _success = false;
+      return;
     }
-    return;
+    if (is_kernel_arg_setup()) {
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b;
+    }
+    // Advance _bufferOffset
+    _bufferOffset += sizeof(val.b);
+  }
+  return;
 }
 
 void PTXKernelArguments::do_bool() {
-    if (is_after_invocation()) {
+  // If the parameter is a return value,
+  if (is_return_type()) {
+    if (is_kernel_arg_setup()) {
+      // Allocate device memory for T_BYTE return value pointer on device. Size in bytes
+      int status = gpu::Ptx::_cuda_cu_memalloc(&_dev_return_value, T_BOOLEAN_SIZE);
+      if (status != GRAAL_CUDA_SUCCESS) {
+        tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+        _success = false;
         return;
+      }
+      // Push _dev_return_value to _kernelBuffer
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _dev_return_value;
     }
-    // If the parameter is a return value,
-    if (is_return_type()) {
-        // Allocate device memory for T_BYTE return value pointer on device. Size in bytes
-        int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BOOLEAN_SIZE);
-        if (status != GRAAL_CUDA_SUCCESS) {
-            tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-            _success = false;
-            return;
-        }
-        // Push _return_value_ptr to _kernelBuffer
-        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
-        _bufferOffset += sizeof(_return_value_ptr);
-    } else {
-        // Get the next java argument and its value which should be a T_BYTE
-        oop arg = next_arg(T_BYTE);
-        // Copy the java argument value to kernelArgBuffer
-        jvalue val;
-        if (java_lang_boxing_object::get_value(arg, &val) != T_BOOLEAN) {
-            tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE");
-            _success = false;
-            return;
-        }
-        *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.z;
-        _bufferOffset += sizeof(val.z);
+    // Advance _bufferOffset
+    _bufferOffset += sizeof(_dev_return_value);
+  } else {
+    // Get the next java argument and its value which should be a T_BYTE
+    oop arg = next_arg(T_BYTE);
+    // Copy the java argument value to kernelArgBuffer
+    jvalue val;
+    if (java_lang_boxing_object::get_value(arg, &val) != T_BOOLEAN) {
+      tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE");
+      _success = false;
+      return;
     }
-    return;
+    if (is_kernel_arg_setup()) {
+      *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.z;
+    }
+    // Advance _bufferOffset
+    _bufferOffset += sizeof(val.z);
+  }
+  return;
 }
 
 void PTXKernelArguments::do_array(int begin, int end) {
-    gpu::Ptx::CUdeviceptr _array_ptr;
-    int status;
-
-    // Get the next java argument and its value which should be a T_ARRAY
-    oop arg = next_arg(T_OBJECT);
-    int array_size = arg->size() * HeapWordSize;
+  // Get the next java argument and its value which should be a T_ARRAY
+  oop arg = next_arg(T_OBJECT);
+  assert(arg->is_array(), "argument value not an array");
+  // Size of array argument
+  int argSize = arg->size() * HeapWordSize;
+  // Device pointer to array argument.
+  gpu::Ptx::CUdeviceptr arrayArgOnDev;
+  int status;
 
-    if (is_after_invocation()) {
-        _array_ptr = *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]);
-        status = gpu::Ptx::_cuda_cu_memcpy_dtoh(arg, _array_ptr, array_size);
-        if (status != GRAAL_CUDA_SUCCESS) {
-            tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument to host", status);
-            _success = false;
-            return;
-        } else {
-            // tty->print_cr("device: %x host: %x size: %d", _array_ptr, arg, array_size);
-        }
-        return;
+  if (is_kernel_arg_setup()) {
+    // Allocate device memory for array argument on device. Size in bytes
+    status = gpu::Ptx::_cuda_cu_memalloc(&arrayArgOnDev, argSize);
+    if (status != GRAAL_CUDA_SUCCESS) {
+      tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for array argument on device",
+                    status);
+      _success = false;
+      return;
     }
-    // Allocate device memory for T_ARRAY return value pointer on device. Size in bytes
-    status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, array_size);
-    if (status != GRAAL_CUDA_SUCCESS) {
-        tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
-        _success = false;
-        return;
-    }
-    status = gpu::Ptx::_cuda_cu_memcpy_htod(_return_value_ptr, arg, array_size);
+    // Copy array argument to device
+    status = gpu::Ptx::_cuda_cu_memcpy_htod(arrayArgOnDev, arg, argSize);
     if (status != GRAAL_CUDA_SUCCESS) {
-        tty->print_cr("[CUDA] *** Error (%d) Failed to copy array to device argument", status);
-        _success = false;
-        return;
-    } else {
-        // tty->print_cr("host: %x device: %x size: %d", arg, _return_value_ptr, array_size);
+      tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument content to device memory",
+                    status);
+      _success = false;
+      return;
     }
-    // Push _return_value_ptr to _kernelBuffer
-    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
-    _bufferOffset += sizeof(_return_value_ptr);
-    return;
+
+    // Push device array argument to _kernelBuffer
+    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = arrayArgOnDev;
+  } else {
+    arrayArgOnDev = *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]);
+    status = gpu::Ptx::_cuda_cu_memcpy_dtoh(arg, arrayArgOnDev, argSize);
+    if (status != GRAAL_CUDA_SUCCESS) {
+      tty->print_cr("[CUDA] *** Error (%d) Failed to copy array argument to host", status);
+      _success = false;
+      return;
+    }
+  }
+
+  // Advance _bufferOffset
+  _bufferOffset += sizeof(arrayArgOnDev);
+  return;
 }
 
 void PTXKernelArguments::do_void() {
-    return;
+  return;
 }
 
 // TODO implement other do_*
--- a/src/gpu/ptx/vm/ptxKernelArguments.hpp	Wed Oct 23 13:40:56 2013 +0200
+++ b/src/gpu/ptx/vm/ptxKernelArguments.hpp	Wed Oct 23 13:41:10 2013 +0200
@@ -42,7 +42,13 @@
   char _kernelArgBuffer[1024];
   // Current offset into _kernelArgBuffer
   size_t _bufferOffset;
-  gpu::Ptx::CUdeviceptr _return_value_ptr;
+  // Device pointer holding return value
+  gpu::Ptx::CUdeviceptr _dev_return_value;
+
+  // Indicates if signature iteration is being done during kernel
+  // setup i.e., java arguments are being copied to device pointers.
+  bool _kernelArgSetup;
+
 private:
   // Array of java argument oops
   arrayOop _args;
@@ -51,7 +57,6 @@
   // Flag to indicate successful creation of kernel argument buffer
   bool _success;
 
-    bool _afterInvoocation;
   // Get next java argument
   oop next_arg(BasicType expectedType);
 
@@ -62,7 +67,9 @@
     _args = args;
     _success = true;
     _bufferOffset = 0;
-    _return_value_ptr = 0;
+    _dev_return_value = 0;
+    _kernelArgSetup = true;
+    //_dev_call_by_reference_args_index = 0;
     if (!is_static) {
       // TODO : Create a device argument for receiver object and add it to _kernelBuffer
       tty->print_cr("{CUDA] ****** TODO: Support for execution of non-static java methods not implemented yet.");
@@ -80,23 +87,23 @@
     return _bufferOffset;
   }
 
-    void reiterate() {
-        _afterInvoocation = true;
-        _bufferOffset = 0;
-        _index = 0;
-        iterate();
-    }
+  void copyRefArgsFromDtoH() {
+    _kernelArgSetup = false;
+    _bufferOffset = 0;
+    _index = 0;
+    iterate();
+  }
 
-    inline bool is_after_invocation() {
-        return _afterInvoocation;
-    }
+  inline bool is_kernel_arg_setup() {
+    return _kernelArgSetup;
+  }
 
   // Get the return oop value
   oop get_return_oop();
 
   // get device return value ptr
-  gpu::Ptx::CUdeviceptr get_return_value_ptr() {
-      return _return_value_ptr;
+  gpu::Ptx::CUdeviceptr get_dev_return_value() {
+      return _dev_return_value;
   }
 
 
--- a/src/share/tools/IdealGraphVisualizer/nbproject/project.properties	Wed Oct 23 13:40:56 2013 +0200
+++ b/src/share/tools/IdealGraphVisualizer/nbproject/project.properties	Wed Oct 23 13:41:10 2013 +0200
@@ -40,5 +40,5 @@
 
 # Disable assertions for RequestProcessor to prevent annoying messages in case
 # of multiple SceneAnimator update tasks in the default RequestProcessor.
-run.args.extra = -J-server -J-da:org.openide.util.RequestProcessor -J-Xms2g -J-Xmx4g
+run.args.extra = -J-server -J-da:org.openide.util.RequestProcessor -J-Xms2g -J-Xmx8g
 debug.args.extra = -J-server -J-da:org.openide.util.RequestProcessor
--- a/src/share/vm/graal/graalGlobals.hpp	Wed Oct 23 13:40:56 2013 +0200
+++ b/src/share/vm/graal/graalGlobals.hpp	Wed Oct 23 13:41:10 2013 +0200
@@ -55,7 +55,7 @@
   product(intx, TraceGraal, 0,                                              \
           "Trace level for Graal")                                          \
                                                                             \
-  product(bool, GraalDeferredInitBarriers, true,                            \
+  product(bool, GraalDeferredInitBarriers, false,                           \
           "Defer write barriers of young objects")                          \
                                                                             \
   develop(bool, GraalUseFastLocking, true,                                  \
--- a/src/share/vm/graal/graalJavaAccess.hpp	Wed Oct 23 13:40:56 2013 +0200
+++ b/src/share/vm/graal/graalJavaAccess.hpp	Wed Oct 23 13:41:10 2013 +0200
@@ -85,6 +85,7 @@
   start_class(HotSpotNmethod)                                                                                                                                  \
     boolean_field(HotSpotNmethod, isDefault)                                                                                                                   \
     boolean_field(HotSpotNmethod, isExternal)                                                                                                                  \
+    oop_field(HotSpotNmethod, name, "Ljava/lang/String;")                                                                                                      \
   end_class                                                                                                                                                    \
   start_class(HotSpotCompiledCode)                                                                                                                             \
     oop_field(HotSpotCompiledCode, comp, "Lcom/oracle/graal/api/code/CompilationResult;")                                                                      \
--- a/src/share/vm/runtime/deoptimization.cpp	Wed Oct 23 13:40:56 2013 +0200
+++ b/src/share/vm/runtime/deoptimization.cpp	Wed Oct 23 13:41:10 2013 +0200
@@ -89,6 +89,7 @@
 
 #ifdef GRAAL
 #include "graal/graalCompiler.hpp"
+#include "graal/graalJavaAccess.hpp"
 #endif
 
 
@@ -1420,6 +1421,19 @@
       if (TraceDeoptimization) {  // make noise on the tty
         tty->print("Uncommon trap occurred in");
         nm->method()->print_short_name(tty);
+#ifdef GRAAL
+        oop installedCode = nm->graal_installed_code();
+        if (installedCode != NULL) {
+          oop installedCodeName = HotSpotNmethod::name(installedCode);
+          if (installedCodeName != NULL) {
+            tty->print(" (Graal: installedCodeName=%s) ", java_lang_String::as_utf8_string(installedCodeName));
+          } else {
+            tty->print(" (Graal: installed code has no name) ");
+          }
+        } else {
+          tty->print(" (Graal: no installed code) ");
+        }
+#endif //GRAAL
         tty->print(" (@" INTPTR_FORMAT ") thread=" UINTX_FORMAT " reason=%s action=%s unloaded_class_index=%d",
                    fr.pc(),
                    os::current_thread_id(),