view graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java @ 18443:1c92d437179b

FrameMapBuilder: move into package.
author Josef Eisl <josef.eisl@jku.at>
date Mon, 17 Nov 2014 16:41:44 +0100
parents 6b58802e45b2
children 6c7efa29b626
line wrap: on
line source

/*
 * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
package com.oracle.graal.hotspot.hsail;

import static com.oracle.graal.api.code.CallingConvention.Type.*;
import static com.oracle.graal.api.code.CodeUtil.*;
import static com.oracle.graal.api.code.ValueUtil.*;
import static com.oracle.graal.api.meta.LocationIdentity.*;
import static com.oracle.graal.compiler.GraalCompiler.*;
import static com.oracle.graal.hotspot.hsail.HSAILHotSpotBackend.Options.*;
import static com.oracle.graal.hotspot.hsail.replacements.HSAILNewObjectSnippets.Options.*;

import java.lang.reflect.*;
import java.util.*;
import java.util.Map.Entry;
import java.util.function.*;
import java.util.stream.*;

import com.amd.okra.*;
import com.oracle.graal.api.code.*;
import com.oracle.graal.api.code.Assumptions.Assumption;
import com.oracle.graal.api.code.CallingConvention.Type;
import com.oracle.graal.api.code.CompilationResult.Call;
import com.oracle.graal.api.code.CompilationResult.CodeAnnotation;
import com.oracle.graal.api.code.CompilationResult.DataPatch;
import com.oracle.graal.api.code.CompilationResult.DataSectionReference;
import com.oracle.graal.api.code.CompilationResult.ExceptionHandler;
import com.oracle.graal.api.code.CompilationResult.Infopoint;
import com.oracle.graal.api.code.CompilationResult.Mark;
import com.oracle.graal.api.code.DataSection.Data;
import com.oracle.graal.api.meta.*;
import com.oracle.graal.asm.*;
import com.oracle.graal.asm.hsail.*;
import com.oracle.graal.compiler.common.*;
import com.oracle.graal.compiler.common.cfg.*;
import com.oracle.graal.compiler.common.type.*;
import com.oracle.graal.debug.*;
import com.oracle.graal.debug.Debug.Scope;
import com.oracle.graal.gpu.*;
import com.oracle.graal.hotspot.*;
import com.oracle.graal.hotspot.bridge.CompilerToVM.CodeInstallResult;
import com.oracle.graal.hotspot.meta.*;
import com.oracle.graal.hotspot.nodes.*;
import com.oracle.graal.hsail.*;
import com.oracle.graal.java.*;
import com.oracle.graal.lir.*;
import com.oracle.graal.lir.asm.*;
import com.oracle.graal.lir.framemap.*;
import com.oracle.graal.lir.gen.*;
import com.oracle.graal.lir.hsail.*;
import com.oracle.graal.lir.hsail.HSAILControlFlow.DeoptimizingOp;
import com.oracle.graal.nodes.*;
import com.oracle.graal.nodes.StructuredGraph.GuardsStage;
import com.oracle.graal.nodes.extended.*;
import com.oracle.graal.nodes.java.*;
import com.oracle.graal.nodes.spi.*;
import com.oracle.graal.nodes.virtual.*;
import com.oracle.graal.options.*;
import com.oracle.graal.phases.*;
import com.oracle.graal.phases.tiers.*;
import com.oracle.graal.virtual.nodes.*;

/**
 * HSAIL specific backend.
 */
public class HSAILHotSpotBackend extends HotSpotBackend {

    public static class Options {

        // @formatter:off
        @Option(help = "Number of TLABs used for HSAIL kernels which allocate")
        public static  final OptionValue<Integer> HsailKernelTlabs = new OptionValue<>(4);
        // @formatter:on
    }

    private Map<String, String> paramTypeMap = new HashMap<>();
    private final boolean deviceInitialized;
    // TODO: get maximum Concurrency from okra
    private int maxDeoptIndex = 8 * 40 * 64;   // see gpu_hsail.hpp

    public HSAILHotSpotBackend(HotSpotGraalRuntime runtime, HotSpotProviders providers) {
        super(runtime, providers);
        paramTypeMap.put("HotSpotResolvedPrimitiveType<int>", "s32");
        paramTypeMap.put("HotSpotResolvedPrimitiveType<float>", "f32");
        paramTypeMap.put("HotSpotResolvedPrimitiveType<double>", "f64");
        paramTypeMap.put("HotSpotResolvedPrimitiveType<long>", "s64");

        /*
         * The order of the conjunction below is important: the OkraUtil call may provision the
         * native library required by the initialize() call
         */
        deviceInitialized = OkraUtil.okraLibExists() && initialize();
    }

    @Override
    public boolean shouldAllocateRegisters() {
        return true;
    }

    /**
     * Initializes the GPU device.
     *
     * @return whether or not initialization was successful
     */
    private static native boolean initialize();

    /**
     * Determines if the GPU device (or simulator) is available and initialized.
     */
    public boolean isDeviceInitialized() {
        return deviceInitialized;
    }

    /**
     * Completes the initialization of the HSAIL backend. This includes initializing the providers
     * and registering any method substitutions specified by the HSAIL backend.
     */
    @Override
    public void completeInitialization() {
        final HotSpotProviders providers = getProviders();
        HotSpotVMConfig config = getRuntime().getConfig();
        // Initialize the lowering provider.
        final HotSpotLoweringProvider lowerer = (HotSpotLoweringProvider) providers.getLowerer();
        lowerer.initialize(providers, config);

        // Register the replacements used by the HSAIL backend.
        HSAILHotSpotReplacementsImpl replacements = (HSAILHotSpotReplacementsImpl) providers.getReplacements();
        replacements.completeInitialization();
    }

    /**
     * Compiles and installs a given method to a GPU binary.
     */
    public HotSpotNmethod compileAndInstallKernel(Method method) {
        ResolvedJavaMethod javaMethod = getProviders().getMetaAccess().lookupJavaMethod(method);
        HotSpotNmethod nm = installKernel(javaMethod, compileKernel(javaMethod, true));
        try (Scope s = Debug.scope("HostCodeGen")) {
            if (Debug.isLogEnabled()) {
                DisassemblerProvider dis = getRuntime().getHostBackend().getDisassembler();
                if (dis != null) {
                    String disasm = dis.disassemble(nm);
                    Debug.log("host code generated for %s%n%s", javaMethod, disasm);
                } else {
                    Debug.log("host code disassembler is null");
                }
            }
        } catch (Throwable e) {
            throw Debug.handle(e);
        }
        return nm;
    }

    /**
     * Compiles a given method to HSAIL code.
     *
     * @param makeBinary specifies whether a GPU binary should also be generated for the HSAIL code.
     *            If true, the returned value is guaranteed to have a non-zero
     *            {@linkplain ExternalCompilationResult#getEntryPoint() entry point}.
     * @return the HSAIL code compiled from {@code method}'s bytecode
     */
    public ExternalCompilationResult compileKernel(ResolvedJavaMethod method, boolean makeBinary) {
        StructuredGraph graph = new StructuredGraph(method);
        HotSpotProviders providers = getProviders();
        MetaAccessProvider metaAccess = getProviders().getMetaAccess();

        // changed this from default to help us generate deopts when needed
        OptimisticOptimizations optimisticOpts = OptimisticOptimizations.ALL;
        optimisticOpts.remove(OptimisticOptimizations.Optimization.UseExceptionProbabilityForOperations);
        new GraphBuilderPhase.Instance(metaAccess, GraphBuilderConfiguration.getSnippetDefault(), optimisticOpts).apply(graph);
        PhaseSuite<HighTierContext> graphBuilderSuite = providers.getSuites().getDefaultGraphBuilderSuite();
        CallingConvention cc = getCallingConvention(providers.getCodeCache(), Type.JavaCallee, graph.method(), false);

        // append special HSAILNonNullParametersPhase
        int numArgs = cc.getArguments().length;
        graphBuilderSuite.appendPhase(new HSAILNonNullParametersPhase(numArgs));

        Suites suites = providers.getSuites().getDefaultSuites();
        ExternalCompilationResult hsailCode = compileGraph(graph, null, cc, method, providers, this, this.getTarget(), null, graphBuilderSuite, optimisticOpts, getProfilingInfo(graph), null, suites,
                        new ExternalCompilationResult(), CompilationResultBuilderFactory.Default);

        // this code added to dump infopoints
        try (Scope s = Debug.scope("CodeGen")) {
            if (Debug.isLogEnabled()) {
                // show infopoints
                List<Infopoint> infoList = hsailCode.getInfopoints();
                Debug.log("%d HSAIL infopoints", infoList.size());
                for (Infopoint info : infoList) {
                    Debug.log(info.toString());
                    Debug.log(info.debugInfo.frame().toString());
                }
            }
        } catch (Throwable e) {
            throw Debug.handle(e);
        }

        if (makeBinary) {
            if (!deviceInitialized) {
                throw new GraalInternalError("Cannot generate GPU kernel if device is not initialized");
            }
            try (Scope ds = Debug.scope("GeneratingKernelBinary")) {
                long kernel = generateKernel(hsailCode.getTargetCode(), method.getName());
                if (kernel == 0) {
                    throw new GraalInternalError("Failed to compile HSAIL kernel");
                }
                hsailCode.setEntryPoint(kernel);
            } catch (Throwable e) {
                throw Debug.handle(e);
            }
        }
        return hsailCode;
    }

    private static class HSAILNonNullParametersPhase extends Phase {
        // we use this to limit the stamping to exclude the final argument in an obj stream method
        private int numArgs;

        public HSAILNonNullParametersPhase(int numArgs) {
            this.numArgs = numArgs;
        }

        @Override
        protected void run(StructuredGraph graph) {
            int argCount = 0;
            Stamp nonNull = StampFactory.objectNonNull();
            for (ParameterNode param : graph.getNodes(ParameterNode.class)) {
                argCount++;
                if (argCount < numArgs && param.stamp() instanceof ObjectStamp) {
                    ObjectStamp paramStamp = (ObjectStamp) param.stamp();
                    param.setStamp(paramStamp.join(nonNull));
                }
            }
        }
    }

    /**
     * Generates a GPU binary from HSAIL code.
     */
    static native long generateKernel(byte[] hsailCode, String name);

    /**
     * Installs the {@linkplain ExternalCompilationResult#getEntryPoint() GPU binary} associated
     * with some given HSAIL code in the code cache and returns a {@link HotSpotNmethod} handle to
     * the installed code.
     *
     * @param hsailCode HSAIL compilation result for which a GPU binary has been generated
     * @return a handle to the binary as installed in the HotSpot code cache
     */
    public final HotSpotNmethod installKernel(ResolvedJavaMethod method, ExternalCompilationResult hsailCode) {
        assert hsailCode.getEntryPoint() != 0L;
        // Code here based on HotSpotCodeCacheProvider.addExternalMethod().
        HotSpotResolvedJavaMethod javaMethod = (HotSpotResolvedJavaMethod) method;
        if (hsailCode.getId() == -1) {
            hsailCode.setId(javaMethod.allocateCompileId(hsailCode.getEntryBCI()));
        }
        CompilationResult compilationResult = hsailCode;
        StructuredGraph hostGraph = hsailCode.getHostGraph();
        if (hostGraph != null) {
            // TODO get rid of the unverified entry point in the host code
            try (Scope ds = Debug.scope("GeneratingHostGraph", new DebugDumpScope("HostGraph"))) {
                HotSpotBackend hostBackend = getRuntime().getHostBackend();
                JavaType[] parameterTypes = new JavaType[hostGraph.getNodes(ParameterNode.class).count()];
                Debug.log("Param count: %d", parameterTypes.length);
                for (int i = 0; i < parameterTypes.length; i++) {
                    ParameterNode parameter = hostGraph.getParameter(i);
                    Debug.log("Param [%d]=%s", i, parameter);
                    parameterTypes[i] = parameter.stamp().javaType(hostBackend.getProviders().getMetaAccess());
                    Debug.log(" %s", parameterTypes[i]);
                }
                CallingConvention cc = hostBackend.getProviders().getCodeCache().getRegisterConfig().getCallingConvention(Type.JavaCallee, method.getSignature().getReturnType(null), parameterTypes,
                                hostBackend.getTarget(), false);
                CompilationResult hostCode = compileGraph(hostGraph, null, cc, method, hostBackend.getProviders(), hostBackend, this.getTarget(), null,
                                hostBackend.getProviders().getSuites().getDefaultGraphBuilderSuite(), OptimisticOptimizations.NONE, null, null,
                                hostBackend.getProviders().getSuites().getDefaultSuites(), new CompilationResult(), CompilationResultBuilderFactory.Default);
                compilationResult = merge(hostCode, hsailCode);
            } catch (Throwable e) {
                throw Debug.handle(e);
            }
        }

        HSAILHotSpotNmethod code = new HSAILHotSpotNmethod(javaMethod, hsailCode.getName(), false, true);
        code.setOopMapArray(hsailCode.getOopMapArray());
        code.setUsesAllocationFlag(hsailCode.getUsesAllocationFlag());
        HotSpotCompiledNmethod compiled = new HotSpotCompiledNmethod(javaMethod, compilationResult);
        CodeInstallResult result = getRuntime().getCompilerToVM().installCode(compiled, code, null);
        if (result != CodeInstallResult.OK) {
            return null;
        }
        return code;
    }

    private static ExternalCompilationResult merge(CompilationResult hostCode, ExternalCompilationResult hsailCode) {
        ExternalCompilationResult result = new ExternalCompilationResult();

        // from hsail code
        result.setEntryPoint(hsailCode.getEntryPoint());
        result.setId(hsailCode.getId());
        result.setEntryBCI(hsailCode.getEntryBCI());
        assert hsailCode.getMarks().isEmpty();
        assert hsailCode.getExceptionHandlers().isEmpty();
        assert hsailCode.getDataPatches().isEmpty();

        // from host code
        result.setTotalFrameSize(hostCode.getTotalFrameSize());
        result.setCustomStackAreaOffset(hostCode.getCustomStackAreaOffset());
        result.setTargetCode(hostCode.getTargetCode(), hostCode.getTargetCodeSize());
        for (CodeAnnotation annotation : hostCode.getAnnotations()) {
            result.addAnnotation(annotation);
        }
        for (Mark mark : hostCode.getMarks()) {
            result.recordMark(mark.pcOffset, mark.id);
        }
        for (ExceptionHandler handler : hostCode.getExceptionHandlers()) {
            result.recordExceptionHandler(handler.pcOffset, handler.handlerPos);
        }
        for (DataPatch patch : hostCode.getDataPatches()) {
            if (patch.reference instanceof DataSectionReference) {
                Data hostData = hostCode.getDataSection().findData((DataSectionReference) patch.reference);
                Data resultData = new Data(hostData.getAlignment(), hostData.getSize(), hostData.getBuilder());
                patch.reference = result.getDataSection().insertData(resultData);
            }
            result.recordDataPatch(patch.pcOffset, patch.reference);
        }
        for (Infopoint infopoint : hostCode.getInfopoints()) {
            if (infopoint instanceof Call) {
                Call call = (Call) infopoint;
                result.recordCall(call.pcOffset, call.size, call.target, call.debugInfo, call.direct);
            } else {
                result.recordInfopoint(infopoint.pcOffset, infopoint.debugInfo, infopoint.reason);
            }
        }

        // merged
        Assumptions mergedAssumptions = new Assumptions(true);
        if (hostCode.getAssumptions() != null) {
            for (Assumption assumption : hostCode.getAssumptions().getAssumptions()) {
                if (assumption != null) {
                    mergedAssumptions.record(assumption);
                }
            }
        }
        if (hsailCode.getAssumptions() != null) {
            for (Assumption assumption : hsailCode.getAssumptions().getAssumptions()) {
                if (assumption != null) {
                    mergedAssumptions.record(assumption);
                }
            }
        }
        if (!mergedAssumptions.isEmpty()) {
            result.setAssumptions(mergedAssumptions);
        }
        return result;
    }

    public boolean executeKernel(HotSpotInstalledCode kernel, int jobSize, Object[] args) throws InvalidInstalledCodeException {
        if (!deviceInitialized) {
            throw new GraalInternalError("Cannot execute GPU kernel if device is not initialized");
        }
        int[] oopMapArray = ((HSAILHotSpotNmethod) kernel).getOopMapArray();

        // Pass HsailKernelTlabs number if this kernel uses allocation, otherwise 0
        int numTlabs = ((HSAILHotSpotNmethod) kernel).getUsesAllocationFlag() ? HsailKernelTlabs.getValue() : 0;
        return executeKernel0(kernel, jobSize, args, numTlabs, HsailAllocBytesPerWorkitem.getValue(), oopMapArray);
    }

    private static native boolean executeKernel0(HotSpotInstalledCode kernel, int jobSize, Object[] args, int numTlabs, int allocBytesPerWorkitem, int[] oopMapArray)
                    throws InvalidInstalledCodeException;

    @Override
    public FrameMapBuilder newFrameMapBuilder(RegisterConfig registerConfig) {
        return new DelayedFrameMapBuilder(this::newFrameMap, getCodeCache(), registerConfig);
    }

    /**
     * Use the HSAIL register set when the compilation target is HSAIL.
     */
    @Override
    public FrameMap newFrameMap(RegisterConfig registerConfig) {
        return new HSAILFrameMap(getCodeCache(), registerConfig);
    }

    @Override
    public LIRGeneratorTool newLIRGenerator(CallingConvention cc, LIRGenerationResult lirGenRes) {
        return new HSAILHotSpotLIRGenerator(getProviders(), getRuntime().getConfig(), cc, lirGenRes);
    }

    @Override
    public LIRGenerationResult newLIRGenerationResult(LIR lir, FrameMapBuilder frameMapBuilder, ResolvedJavaMethod method, Object stub) {
        return new HSAILHotSpotLIRGenerationResult(lir, frameMapBuilder);
    }

    @Override
    public NodeLIRBuilderTool newNodeLIRBuilder(StructuredGraph graph, LIRGeneratorTool lirGen) {
        return new HSAILHotSpotNodeLIRBuilder(graph, lirGen);
    }

    class HotSpotFrameContext implements FrameContext {

        public boolean hasFrame() {
            return true;
        }

        @Override
        public void enter(CompilationResultBuilder crb) {
            Debug.log("Nothing to do here");
        }

        @Override
        public void leave(CompilationResultBuilder crb) {
            Debug.log("Nothing to do here");
        }
    }

    /**
     * a class to allow us to save lirGen.
     */
    static class HSAILCompilationResultBuilder extends CompilationResultBuilder {
        public HSAILHotSpotLIRGenerationResult lirGenRes;

        public HSAILCompilationResultBuilder(CodeCacheProvider codeCache, ForeignCallsProvider foreignCalls, FrameMap frameMap, Assembler asm, FrameContext frameContext,
                        CompilationResult compilationResult, HSAILHotSpotLIRGenerationResult lirGenRes) {
            super(codeCache, foreignCalls, frameMap, asm, frameContext, compilationResult);
            this.lirGenRes = lirGenRes;
        }
    }

    static class HSAILHotSpotNmethod extends HotSpotNmethod {
        private int[] oopMapArray;
        private boolean usesAllocation;

        HSAILHotSpotNmethod(HotSpotResolvedJavaMethod method, String name, boolean isDefault, boolean isExternal) {
            super(method, name, isDefault, isExternal);
        }

        void setOopMapArray(int[] array) {
            oopMapArray = array;
        }

        int[] getOopMapArray() {
            return oopMapArray;
        }

        public void setUsesAllocationFlag(boolean val) {
            usesAllocation = val;
        }

        public boolean getUsesAllocationFlag() {
            return usesAllocation;
        }
    }

    @Override
    protected Assembler createAssembler(FrameMap frameMap) {
        return new HSAILHotSpotAssembler(getTarget());
    }

    @Override
    public CompilationResultBuilder newCompilationResultBuilder(LIRGenerationResult lirGenRes, FrameMap frameMap, CompilationResult compilationResult, CompilationResultBuilderFactory factory) {
        Assembler masm = createAssembler(frameMap);
        HotSpotFrameContext frameContext = new HotSpotFrameContext();
        // save lirGen for later use by setHostGraph
        CompilationResultBuilder crb = new HSAILCompilationResultBuilder(getCodeCache(), getForeignCalls(), frameMap, masm, frameContext, compilationResult,
                        (HSAILHotSpotLIRGenerationResult) lirGenRes);
        crb.setTotalFrameSize(frameMap.totalFrameSize());
        return crb;
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, LIR lir, ResolvedJavaMethod method) {
        assert method != null : lir + " is not associated with a method";
        Kind wordKind = getProviders().getCodeCache().getTarget().wordKind;
        LIRKind wordLIRKind = LIRKind.value(wordKind);

        HotSpotVMConfig config = getRuntime().getConfig();
        boolean useHSAILDeoptimization = config.useHSAILDeoptimization;
        boolean useHSAILSafepoints = config.useHSAILSafepoints;

        if ((useHSAILSafepoints == true) && (useHSAILDeoptimization == false)) {
            Debug.log("+UseHSAILSafepoints requires +UseHSAILDeoptimization");
        }

        /*
         * See what graph nodes we have to see if we are using the thread register. If not, we don't
         * have to emit the code that sets it up. Maybe there is a better way to do this?
         */
        boolean usesAllocation = false;
        search: for (AbstractBlock<?> b : lir.linearScanOrder()) {
            for (LIRInstruction op : lir.getLIRforBlock(b)) {
                if ((op instanceof HSAILMove.LoadOp) && ((HSAILMove.LoadOp) op).usesThreadRegister()) {
                    usesAllocation = true;
                    assert useHSAILDeoptimization : "cannot use thread register if HSAIL deopt support is disabled";
                    break search;
                }
            }
        }
        // save usesAllocation flag in ExternalCompilationResult
        ((ExternalCompilationResult) crb.compilationResult).setUsesAllocationFlag(usesAllocation);

        // Emit the prologue.
        HSAILAssembler asm = (HSAILAssembler) crb.asm;
        asm.emitString0("version 0:95: $full : $large;\n");

        Signature signature = method.getSignature();
        int sigParamCount = signature.getParameterCount(false);
        // We're subtracting 1 because we're not making the final gid as a parameter.

        int nonConstantParamCount = sigParamCount - 1;
        boolean isStatic = (method.isStatic());
        // Determine if this is an object lambda.
        boolean isObjectLambda = true;

        if (signature.getParameterType(nonConstantParamCount, null).getKind() == Kind.Int) {
            isObjectLambda = false;
        } else {
            // Add space for gid int reg.
            nonConstantParamCount++;
        }

        // If this is an instance method, include the "this" parameter
        if (!isStatic) {
            nonConstantParamCount++;
        }
        // Add in any "constant" parameters (currently none).
        int totalParamCount = nonConstantParamCount;
        JavaType[] paramtypes = new JavaType[totalParamCount];
        String[] paramNames = new String[totalParamCount];
        int pidx = 0;
        MetaAccessProvider metaAccess = getProviders().getMetaAccess();
        for (int i = 0; i < totalParamCount; i++) {
            if (i == 0 && !isStatic) {
                paramtypes[i] = metaAccess.lookupJavaType(Object.class);
                paramNames[i] = "%_this";
            } else if (i < nonConstantParamCount) {
                if (isObjectLambda && (i == (nonConstantParamCount))) {
                    // Set up the gid register mapping.
                    paramtypes[i] = metaAccess.lookupJavaType(int.class);
                    paramNames[i] = "%_gid";
                } else {
                    paramtypes[i] = signature.getParameterType(pidx++, null);
                    paramNames[i] = "%_arg" + i;
                }
            }
        }

        asm.emitString0("// " + (isStatic ? "static" : "instance") + " method " + method + "\n");
        asm.emitString0("kernel &run ( \n");

        FrameMap frameMap = crb.frameMap;
        RegisterConfig regConfig = frameMap.getRegisterConfig();
        // Build list of param types which does include the gid (for cc register mapping query).
        JavaType[] ccParamTypes = new JavaType[nonConstantParamCount + 1];
        // Include the gid.
        System.arraycopy(paramtypes, 0, ccParamTypes, 0, nonConstantParamCount);

        /*
         * Last entry is always int (its register gets used in the workitemabsid instruction). This
         * is true even for object stream lambdas.
         */
        if (sigParamCount > 0) {
            ccParamTypes[ccParamTypes.length - 1] = metaAccess.lookupJavaType(int.class);
        }
        CallingConvention cc = regConfig.getCallingConvention(JavaCallee, null, ccParamTypes, getTarget(), false);

        /**
         * Compute the hsail size mappings up to but not including the last non-constant parameter
         * (which is the gid).
         *
         */
        String[] paramHsailSizes = new String[totalParamCount];
        for (int i = 0; i < totalParamCount; i++) {
            String paramtypeStr = paramtypes[i].toString();
            String sizeStr = paramTypeMap.get(paramtypeStr);
            // Catch all for any unmapped paramtype that is u64 (address of an object).
            paramHsailSizes[i] = (sizeStr != null ? sizeStr : "u64");
        }
        // Emit the kernel function parameters.
        for (int i = 0; i < totalParamCount; i++) {
            String str = "align 8 kernarg_" + paramHsailSizes[i] + " " + paramNames[i];

            if (useHSAILDeoptimization || (i != totalParamCount - 1)) {
                str += ",";
            }
            asm.emitString(str);
        }

        if (useHSAILDeoptimization) {
            // add in the deoptInfo parameter
            asm.emitString("kernarg_u64 " + asm.getDeoptInfoName());
        }

        asm.emitString(") {");

        /*
         * End of parameters start of prolog code. Emit the load instructions for loading of the
         * kernel non-constant parameters into registers. The constant class parameters will not be
         * loaded up front but will be loaded as needed.
         */
        for (int i = 0; i < nonConstantParamCount; i++) {
            asm.emitString("ld_kernarg_" + paramHsailSizes[i] + "  " + HSAIL.mapRegister(cc.getArgument(i)) + ", [" + paramNames[i] + "];");
        }

        /*
         * Emit the workitemaid instruction for loading the hidden gid parameter. This is assigned
         * the register as if it were the last of the nonConstant parameters.
         */
        String workItemReg = "$s" + Integer.toString(asRegister(cc.getArgument(nonConstantParamCount)).encoding());
        asm.emitString("workitemabsid_u32 " + workItemReg + ", 0;");

        final String deoptInProgressLabel = "@LHandleDeoptInProgress";

        if (useHSAILDeoptimization) {
            // Aliases for d16
            RegisterValue d16DeoptInfo = HSAIL.d16.asValue(wordLIRKind);

            // Aliases for d17
            RegisterValue d17TlabIndex = HSAIL.d17.asValue(wordLIRKind);
            RegisterValue d17SafepointFlagAddrIndex = d17TlabIndex;

            // Aliases for s34
            RegisterValue s34DeoptOccurred = HSAIL.s34.asValue(LIRKind.value(Kind.Int));
            RegisterValue s34TlabIndex = s34DeoptOccurred;

            asm.emitLoadKernelArg(d16DeoptInfo, asm.getDeoptInfoName(), "u64");
            asm.emitComment("// Check if a deopt or safepoint has occurred and abort if true before doing any work");

            if (useHSAILSafepoints) {
                // Load address of _notice_safepoints field
                asm.emitLoad(wordKind, d17SafepointFlagAddrIndex, new HSAILAddressValue(wordLIRKind, d16DeoptInfo, config.hsailNoticeSafepointsOffset).toAddress());
                // Load int value from that field
                asm.emitLoadAcquire(s34DeoptOccurred, new HSAILAddressValue(wordLIRKind, d17SafepointFlagAddrIndex, 0).toAddress());
                asm.emitCompare(Kind.Int, s34DeoptOccurred, JavaConstant.forInt(0), "ne", false, false);
                asm.cbr(deoptInProgressLabel);
            }
            asm.emitLoadAcquire(s34DeoptOccurred, new HSAILAddressValue(wordLIRKind, d16DeoptInfo, config.hsailDeoptOccurredOffset).toAddress());
            asm.emitCompare(Kind.Int, s34DeoptOccurred, JavaConstant.forInt(0), "ne", false, false);
            asm.cbr(deoptInProgressLabel);
            // load thread register if this kernel performs allocation
            if (usesAllocation) {
                RegisterValue threadReg = getProviders().getRegisters().getThreadRegister().asValue(wordLIRKind);
                assert HsailKernelTlabs.getValue() > 0;
                asm.emitLoad(wordKind, threadReg, new HSAILAddressValue(wordLIRKind, d16DeoptInfo, config.hsailCurTlabInfoOffset).toAddress());
                if (HsailKernelTlabs.getValue() != 1) {
                    asm.emitComment("// map workitem to a tlab");
                    asm.emitString(String.format("rem_u32  $%s, %s, %d;", s34TlabIndex.getRegister(), workItemReg, HsailKernelTlabs.getValue()));
                    asm.emitConvert(d17TlabIndex, s34TlabIndex, wordKind, Kind.Int);
                    asm.emit("mad", threadReg, d17TlabIndex, JavaConstant.forInt(8), threadReg);
                } else {
                    // workitem is already mapped to solitary tlab
                }
                asm.emitComment("// $" + getProviders().getRegisters().getThreadRegister() + " will point to holder of tlab thread info for this workitem");
            }
        }

        /*
         * Note the logic used for this spillseg size is to leave space and then go back and patch
         * in the correct size once we have generated all the instructions. This should probably be
         * done in a more robust way by implementing something like asm.insertString.
         */
        int spillsegDeclarationPosition = asm.position() + 1;
        String spillsegTemplate = "align 4 spill_u8 %spillseg[123456];";
        asm.emitString(spillsegTemplate);
        // Emit object array load prologue here.
        if (isObjectLambda) {
            boolean useCompressedOops = config.useCompressedOops;
            final int arrayElementsOffset = HotSpotGraalRuntime.getArrayBaseOffset(wordKind);
            String iterationObjArgReg = HSAIL.mapRegister(cc.getArgument(nonConstantParamCount - 1));
            /*
             * iterationObjArgReg will be the highest $d register in use (it is the last parameter)
             * so tempReg can be the next higher $d register. As of 1.0 spec, we cannot use
             * ld_global_u32 $dxx, [addr]; so we need a temporary $s register. We can use
             * workItemReg+1;
             */
            String tmpReg = "$d" + (asRegister(cc.getArgument(nonConstantParamCount - 1)).encoding() + 1);
            // Convert gid to long.
            asm.emitString("cvt_u64_s32 " + tmpReg + ", " + workItemReg + "; // Convert gid to long");
            // Adjust index for sizeof ref. Where to pull this size from?
            asm.emitString("mul_u64 " + tmpReg + ", " + tmpReg + ", " + (useCompressedOops ? 4 : 8) + "; // Adjust index for sizeof ref");
            // Adjust for actual data start.
            asm.emitString("add_u64 " + tmpReg + ", " + tmpReg + ", " + arrayElementsOffset + "; // Adjust for actual elements data start");
            // Add to array ref ptr.
            asm.emitString("add_u64 " + tmpReg + ", " + tmpReg + ", " + iterationObjArgReg + "; // Add to array ref ptr");
            // Load the object into the parameter reg.
            if (useCompressedOops) {
                int workItemRegEncoding = asRegister(cc.getArgument(nonConstantParamCount)).encoding();
                String tmpReg32 = "$s" + Integer.toString(workItemRegEncoding + 1);

                // Load u32 into the temporary $s reg since it will become an object address

                asm.emitString("ld_global_u32 " + tmpReg32 + ", " + "[" + tmpReg + "]" + "; // Load compressed ptr from array");
                asm.emitString("cvt_u64_u32 " + tmpReg + ", " + tmpReg32 + ";      // cvt to 64 bits");

                long narrowOopBase = config.narrowOopBase;
                long narrowOopShift = config.narrowOopShift;

                if (narrowOopBase == 0 && narrowOopShift == 0) {
                    // No more calculation to do, mov to target register
                    asm.emitString("mov_b64 " + iterationObjArgReg + ", " + tmpReg + "; // no shift or base addition");
                } else {
                    if (narrowOopBase == 0) {
                        asm.emitString("shl_u64 " + iterationObjArgReg + ", " + tmpReg + ", " + narrowOopShift + "; // do narrowOopShift");
                    } else if (narrowOopShift == 0) {
                        // not sure if we ever get add with 0 shift but just in case
                        asm.emitString("cmp_eq_b1_u64  $c0, " + tmpReg + ", 0x0; // avoid add if compressed is null");
                        asm.emitString("add_u64 " + iterationObjArgReg + ", " + tmpReg + ", " + narrowOopBase + "; // add narrowOopBase");
                        asm.emitString("cmov_b64 " + iterationObjArgReg + ", $c0, 0x0, " + iterationObjArgReg + "; // avoid add if compressed is null");
                    } else {
                        asm.emitString("cmp_eq_b1_u64  $c0, " + tmpReg + ", 0x0; // avoid shift-add if compressed is null");
                        asm.emitString("mad_u64 " + iterationObjArgReg + ", " + tmpReg + ", " + (1 << narrowOopShift) + ", " + narrowOopBase + "; // shift and add narrowOopBase");
                        asm.emitString("cmov_b64 " + iterationObjArgReg + ", $c0, 0x0, " + iterationObjArgReg + "; // avoid shift-add if compressed is null");
                    }
                }

            } else {
                asm.emitString("ld_global_u64 " + iterationObjArgReg + ", " + "[" + tmpReg + "]" + "; // Load from array element into parameter reg");
            }
        }
        // Prologue done, Emit code for the LIR.
        crb.emit(lir);
        // Now that code is emitted go back and figure out what the upper Bound stack size was.
        long maxStackSize = ((HSAILAssembler) crb.asm).upperBoundStackSize();
        String spillsegStringFinal;
        if (maxStackSize == 0) {
            // If no spilling, get rid of spillseg declaration.
            char[] array = new char[spillsegTemplate.length()];
            Arrays.fill(array, ' ');
            spillsegStringFinal = new String(array);
        } else {
            spillsegStringFinal = spillsegTemplate.replace("123456", String.format("%6d", maxStackSize));
        }
        asm.emitString(spillsegStringFinal, spillsegDeclarationPosition);
        // Emit the epilogue.

        HSAILHotSpotLIRGenerationResult lirGenRes = ((HSAILCompilationResultBuilder) crb).lirGenRes;

        int numSRegs = 0;
        int numDRegs = 0;
        int numStackSlotBytes = 0;
        if (useHSAILDeoptimization) {
            /*
             * Get the union of registers and stack slots needed to be saved at the infopoints.
             * While doing this compute the highest register in each category.
             */
            HSAILHotSpotRegisterConfig hsailRegConfig = (HSAILHotSpotRegisterConfig) regConfig;
            Set<Register> infoUsedRegs = new TreeSet<>();
            Set<StackSlot> infoUsedStackSlots = new HashSet<>();
            List<Infopoint> infoList = crb.compilationResult.getInfopoints();
            Queue<JavaValue[]> workList = new LinkedList<>();
            for (Infopoint info : infoList) {
                BytecodeFrame frame = info.debugInfo.frame();
                while (frame != null) {
                    workList.add(frame.values);
                    frame = frame.caller();
                }
            }
            while (!workList.isEmpty()) {
                JavaValue[] values = workList.poll();
                for (JavaValue val : values) {
                    if (!Value.ILLEGAL.equals(val)) {
                        if (val instanceof RegisterValue) {
                            Register reg = ((RegisterValue) val).getRegister();
                            infoUsedRegs.add(reg);
                            if (hsailRegConfig.isAllocatableSReg(reg)) {
                                numSRegs = Math.max(numSRegs, reg.encoding + 1);
                            } else if (hsailRegConfig.isAllocatableDReg(reg)) {
                                numDRegs = Math.max(numDRegs, reg.encoding + 1);
                            }
                        } else if (val instanceof StackSlot) {
                            StackSlot slot = (StackSlot) val;
                            Kind slotKind = slot.getKind();
                            int slotSizeBytes = (slotKind.isObject() ? 8 : slotKind.getByteCount());
                            int slotOffsetMax = HSAIL.getStackOffsetStart(slot, slotSizeBytes * 8) + slotSizeBytes;
                            numStackSlotBytes = Math.max(numStackSlotBytes, slotOffsetMax);
                            infoUsedStackSlots.add(slot);
                        } else if (val instanceof VirtualObject) {
                            workList.add(((VirtualObject) val).getValues());
                        } else {
                            assert val instanceof JavaConstant : "Unsupported value: " + val;
                        }
                    }
                }
            }

            // round up numSRegs to even number so dregs start on aligned boundary
            numSRegs += (numSRegs & 1);

            // numStackSlots is the number of 8-byte locations used for stack variables
            int numStackSlots = (numStackSlotBytes + 7) / 8;

            final int offsetToDeoptSaveStates = config.hsailDeoptimizationInfoHeaderSize;
            final int bytesPerSaveArea = 4 * numSRegs + 8 * numDRegs + 8 * numStackSlots;
            final int sizeofKernelDeopt = config.hsailKernelDeoptimizationHeaderSize + config.hsailFrameHeaderSize + bytesPerSaveArea;
            final int offsetToNeverRanArray = config.hsailNeverRanArrayOffset;
            final int offsetToDeoptNextIndex = config.hsailDeoptNextIndexOffset;
            final int offsetToDeoptimizationWorkItem = config.hsailDeoptimizationWorkItem;
            final int offsetToDeoptimizationReason = config.hsailDeoptimizationReason;
            final int offsetToDeoptimizationFrame = config.hsailKernelDeoptimizationHeaderSize;
            final int offsetToFramePc = config.hsailFramePcOffset;
            final int offsetToNumSaves = config.hsailFrameNumSRegOffset;
            final int offsetToSaveArea = config.hsailFrameHeaderSize;

            AllocatableValue scratch64 = HSAIL.d16.asValue(wordLIRKind);
            AllocatableValue cuSaveAreaPtr = HSAIL.d17.asValue(wordLIRKind);
            AllocatableValue waveMathScratch1 = HSAIL.d18.asValue(wordLIRKind);
            AllocatableValue waveMathScratch2 = HSAIL.d19.asValue(wordLIRKind);

            AllocatableValue actionAndReasonReg = HSAIL.actionAndReasonReg.asValue(LIRKind.value(Kind.Int));
            AllocatableValue codeBufferOffsetReg = HSAIL.codeBufferOffsetReg.asValue(LIRKind.value(Kind.Int));
            AllocatableValue scratch32 = HSAIL.s34.asValue(LIRKind.value(Kind.Int));
            AllocatableValue workidreg = HSAIL.s35.asValue(LIRKind.value(Kind.Int));

            HSAILAddress deoptNextIndexAddr = new HSAILAddressValue(wordLIRKind, scratch64, offsetToDeoptNextIndex).toAddress();
            HSAILAddress neverRanArrayAddr = new HSAILAddressValue(wordLIRKind, scratch64, offsetToNeverRanArray).toAddress();

            // The just-started lanes that see the deopt flag will jump here
            asm.emitString0(deoptInProgressLabel + ":\n");
            asm.emitLoad(wordKind, waveMathScratch1, neverRanArrayAddr);
            asm.emitWorkItemAbsId(workidreg);
            asm.emitConvert(waveMathScratch2, workidreg, wordKind, Kind.Int);
            asm.emit("add", waveMathScratch1, waveMathScratch1, waveMathScratch2);
            HSAILAddress neverRanStoreAddr = new HSAILAddressValue(wordLIRKind, waveMathScratch1, 0).toAddress();
            asm.emitStore(Kind.Byte, JavaConstant.forInt(1), neverRanStoreAddr);
            asm.emitString("ret;");

            // The deoptimizing lanes will jump here
            asm.emitString0(asm.getDeoptLabelName() + ":\n");
            String labelExit = asm.getDeoptLabelName() + "_Exit";

            HSAILAddress deoptInfoAddr = new HSAILAddressValue(wordLIRKind, scratch64, config.hsailDeoptOccurredOffset).toAddress();
            asm.emitLoadKernelArg(scratch64, asm.getDeoptInfoName(), "u64");

            // Set deopt occurred flag
            asm.emitMov(Kind.Int, scratch32, JavaConstant.forInt(1));
            asm.emitStoreRelease(scratch32, deoptInfoAddr);

            asm.emitComment("// Determine next deopt save slot");
            asm.emitAtomicAdd(scratch32, deoptNextIndexAddr, JavaConstant.forInt(1));
            /*
             * scratch32 now holds next index to use set error condition if no room in save area
             */
            asm.emitComment("// assert room to save deopt");
            asm.emitCompare(Kind.Int, scratch32, JavaConstant.forInt(maxDeoptIndex), "lt", false, false);
            asm.cbr("@L_StoreDeopt");
            /*
             * if assert fails, store a guaranteed negative workitemid in top level deopt occurred
             * flag
             */
            asm.emitWorkItemAbsId(scratch32);
            asm.emit("mad", scratch32, scratch32, JavaConstant.forInt(-1), JavaConstant.forInt(-1));
            asm.emitStore(scratch32, deoptInfoAddr);
            asm.emitString("ret;");

            asm.emitString0("@L_StoreDeopt" + ":\n");

            // Store deopt for this workitem into its slot in the HSAILComputeUnitSaveStates array

            asm.emitComment("// Convert id's for ptr math");
            asm.emitConvert(cuSaveAreaPtr, scratch32, wordKind, Kind.Int);
            asm.emitComment("// multiply by sizeof KernelDeoptArea");
            asm.emit("mul", cuSaveAreaPtr, cuSaveAreaPtr, JavaConstant.forInt(sizeofKernelDeopt));
            asm.emitComment("// Add computed offset to deoptInfoPtr base");
            asm.emit("add", cuSaveAreaPtr, cuSaveAreaPtr, scratch64);
            // Add offset to _deopt_save_states[0]
            asm.emit("add", scratch64, cuSaveAreaPtr, JavaConstant.forInt(offsetToDeoptSaveStates));

            HSAILAddress workItemAddr = new HSAILAddressValue(wordLIRKind, scratch64, offsetToDeoptimizationWorkItem).toAddress();
            HSAILAddress actionReasonStoreAddr = new HSAILAddressValue(wordLIRKind, scratch64, offsetToDeoptimizationReason).toAddress();

            asm.emitComment("// Get _deopt_info._first_frame");
            asm.emit("add", waveMathScratch1, scratch64, JavaConstant.forInt(offsetToDeoptimizationFrame));
            // Now scratch64 is the _deopt_info._first_frame
            HSAILAddress pcStoreAddr = new HSAILAddressValue(wordLIRKind, waveMathScratch1, offsetToFramePc).toAddress();
            HSAILAddress regCountsAddr = new HSAILAddressValue(wordLIRKind, waveMathScratch1, offsetToNumSaves).toAddress();
            asm.emitComment("// store deopting workitem");
            asm.emitWorkItemAbsId(scratch32);
            asm.emitStore(Kind.Int, scratch32, workItemAddr);
            asm.emitComment("// store actionAndReason");
            asm.emitStore(Kind.Int, actionAndReasonReg, actionReasonStoreAddr);
            asm.emitComment("// store PC");
            asm.emitStore(Kind.Int, codeBufferOffsetReg, pcStoreAddr);

            asm.emitComment("// store regCounts (" + numSRegs + " $s registers, " + numDRegs + " $d registers, " + numStackSlots + " stack slots)");
            asm.emitStore(Kind.Int, JavaConstant.forInt(numSRegs + (numDRegs << 8) + (numStackSlots << 16)), regCountsAddr);

            /*
             * Loop thru the usedValues storing each of the registers that are used. We always store
             * in a fixed location, even if some registers are skipped.
             */
            asm.emitComment("// store used regs");
            for (Register reg : infoUsedRegs) {
                if (hsailRegConfig.isAllocatableSReg(reg)) {
                    // 32 bit registers
                    Kind kind = Kind.Int;
                    int ofst = offsetToSaveArea + reg.encoding * 4;
                    HSAILAddress addr = new HSAILAddressValue(wordLIRKind, waveMathScratch1, ofst).toAddress();
                    AllocatableValue regValue = reg.asValue(LIRKind.value(kind));
                    asm.emitStore(kind, regValue, addr);
                } else if (hsailRegConfig.isAllocatableDReg(reg)) {
                    // 64 bit registers
                    Kind kind = Kind.Long;
                    // d reg ofst starts past the 32 sregs
                    int ofst = offsetToSaveArea + (numSRegs * 4) + reg.encoding * 8;
                    HSAILAddress addr = new HSAILAddressValue(wordLIRKind, waveMathScratch1, ofst).toAddress();
                    AllocatableValue regValue = reg.asValue(LIRKind.value(kind));
                    asm.emitStore(kind, regValue, addr);
                } else {
                    throw GraalInternalError.unimplemented();
                }
            }

            // loop thru the usedStackSlots creating instructions to save in the save area
            if (numStackSlotBytes > 0) {
                asm.emitComment("// store stack slots (uses " + numStackSlotBytes + " bytes)");
                for (StackSlot slot : infoUsedStackSlots) {
                    asm.emitComment("// store " + slot);
                    Kind kind = slot.getKind();
                    int sizeInBits = (kind.isObject() || kind.getByteCount() == 8 ? 64 : 32);
                    int ofst = offsetToSaveArea + (numSRegs * 4) + (numDRegs * 8) + HSAIL.getStackOffsetStart(slot, sizeInBits);
                    HSAILAddress addr = new HSAILAddressValue(wordLIRKind, waveMathScratch1, ofst).toAddress();
                    if (sizeInBits == 64) {
                        asm.emitSpillLoad(kind, scratch64, slot);
                        asm.emitStore(kind, scratch64, addr);
                    } else {
                        asm.emitSpillLoad(kind, scratch32, slot);
                        asm.emitStore(kind, scratch32, addr);
                    }
                }
            }

            asm.emitString0(labelExit + ":\n");

            // and emit the return
            crb.frameContext.leave(crb);
            asm.exit();
            // build the oopMap Array
            int[] oopMapArray = new OopMapArrayBuilder().build(infoList, numSRegs, numDRegs, numStackSlots, hsailRegConfig);
            ((ExternalCompilationResult) crb.compilationResult).setOopMapArray(oopMapArray);
        } else {
            // Deoptimization is explicitly off, so emit simple return
            asm.emitString0(asm.getDeoptLabelName() + ":\n");
            asm.emitComment("// No deoptimization");
            asm.emitString("ret;");
        }

        asm.emitString0("}; \n");

        ExternalCompilationResult compilationResult = (ExternalCompilationResult) crb.compilationResult;
        if (useHSAILDeoptimization) {
            compilationResult.setHostGraph(prepareHostGraph(method, lirGenRes.getDeopts(), getProviders(), config, numSRegs, numDRegs));
        }
    }

    private static class OopMapArrayBuilder {
        // oopMapArray struct
        // int bytesPerSaveArea; (not strictly part of oopsmap but convenient to put here)
        // int intsPerInfopoint;
        static final int SAVEAREACOUNTS_OFST = 0;
        static final int INTSPERINFOPOINT_OFST = 1;
        static final int HEADERSIZE = 2;
        // for each infopoint:
        // int deoptId
        // one or more ints of bits for the oopmap

        private int[] array;
        private int intsPerInfopoint;

        int[] build(List<Infopoint> infoList, int numSRegs, int numDRegs, int numStackSlots, HSAILHotSpotRegisterConfig hsailRegConfig) {
            /*
             * We are told that infoList is always sorted. Each infoPoint can have a different
             * oopMap. Since numStackSlots is the number of 8-byte stack slots used, it is an upper
             * limit on the number of oop stack slots
             */
            int bitsPerInfopoint = numDRegs + numStackSlots;
            int intsForBits = (bitsPerInfopoint + 31) / 32;
            int numInfopoints = infoList.size();
            intsPerInfopoint = intsForBits + 1;  // +1 for the pcoffset
            int arraySize = HEADERSIZE + (numInfopoints * intsPerInfopoint);
            array = new int[arraySize];
            array[INTSPERINFOPOINT_OFST] = intsPerInfopoint;
            // compute saveAreaCounts
            int saveAreaCounts = (numSRegs & 0xff) + (numDRegs << 8) + (numStackSlots << 16);
            array[SAVEAREACOUNTS_OFST] = saveAreaCounts;

            // loop thru the infoList
            int infoIndex = 0;
            for (Infopoint info : infoList) {
                setOopMapPcOffset(infoIndex, info.pcOffset);
                BytecodeFrame frame = info.debugInfo.frame();
                while (frame != null) {
                    for (int i = 0; i < frame.numLocals + frame.numStack; i++) {
                        JavaValue val = frame.values[i];
                        if (val instanceof RegisterValue) {
                            Register reg = ((RegisterValue) val).getRegister();
                            if (val.getKind().isObject()) {
                                assert (hsailRegConfig.isAllocatableDReg(reg));
                                int bitIndex = reg.encoding();
                                setOopMapBit(infoIndex, bitIndex);
                            }
                        } else if (val instanceof StackSlot) {
                            StackSlot slot = (StackSlot) val;
                            if (val.getKind().isObject()) {
                                assert (HSAIL.getStackOffsetStart(slot, 64) % 8 == 0);
                                int bitIndex = numDRegs + HSAIL.getStackOffsetStart(slot, 64) / 8;
                                setOopMapBit(infoIndex, bitIndex);
                            }
                        }
                    }
                    frame = frame.caller();
                }
                infoIndex++;
            }
            try (Scope s = Debug.scope("CodeGen")) {
                if (Debug.isLogEnabled()) {
                    Debug.log("numSRegs=%d, numDRegs=%d, numStackSlots=%d", numSRegs, numDRegs, numStackSlots);
                    // show infopoint oopmap details
                    for (infoIndex = 0; infoIndex < infoList.size(); infoIndex++) {
                        String infoString = "Infopoint " + infoIndex + ", pcOffset=" + getOopMapPcOffset(infoIndex) + ",   oopmap=";
                        for (int i = 0; i < intsForBits; i++) {
                            infoString += (i != 0 ? ", " : "") + Integer.toHexString(getOopMapBitsAsInt(infoIndex, i));
                        }
                        Debug.log(infoString);
                    }
                }
            } catch (Throwable e) {
                throw Debug.handle(e);
            }

            return array;
        }

        private void setOopMapPcOffset(int infoIndex, int pcOffset) {
            int arrIndex = HEADERSIZE + infoIndex * intsPerInfopoint;
            array[arrIndex] = pcOffset;
        }

        private int getOopMapPcOffset(int infoIndex) {
            int arrIndex = HEADERSIZE + infoIndex * intsPerInfopoint;
            return array[arrIndex];
        }

        private void setOopMapBit(int infoIndex, int bitIndex) {
            int arrIndex = HEADERSIZE + infoIndex * intsPerInfopoint + 1 + bitIndex / 32;
            array[arrIndex] |= (1 << (bitIndex % 32));
        }

        private int getOopMapBitsAsInt(int infoIndex, int intIndex) {
            int arrIndex = HEADERSIZE + infoIndex * intsPerInfopoint + 1 + intIndex;
            return array[arrIndex];
        }
    }

    private static StructuredGraph prepareHostGraph(ResolvedJavaMethod method, List<DeoptimizingOp> deopts, HotSpotProviders providers, HotSpotVMConfig config, int numSRegs, int numDRegs) {
        if (deopts.isEmpty()) {
            return null;
        }
        StructuredGraph hostGraph = new StructuredGraph(method, -2);
        ParameterNode deoptId = hostGraph.unique(ParameterNode.create(0, StampFactory.intValue()));
        ParameterNode hsailFrame = hostGraph.unique(ParameterNode.create(1, StampFactory.forKind(providers.getCodeCache().getTarget().wordKind)));
        ParameterNode reasonAndAction = hostGraph.unique(ParameterNode.create(2, StampFactory.intValue()));
        ParameterNode speculation = hostGraph.unique(ParameterNode.create(3, StampFactory.object()));
        BeginNode[] branches = new BeginNode[deopts.size() + 1];
        int[] keys = new int[deopts.size()];
        int[] keySuccessors = new int[deopts.size() + 1];
        double[] keyProbabilities = new double[deopts.size() + 1];
        int i = 0;
        Collections.sort(deopts, new Comparator<DeoptimizingOp>() {
            public int compare(DeoptimizingOp o1, DeoptimizingOp o2) {
                return o1.getCodeBufferPos() - o2.getCodeBufferPos();
            }
        });
        for (DeoptimizingOp deopt : deopts) {
            keySuccessors[i] = i;
            keyProbabilities[i] = 1.0 / deopts.size();
            keys[i] = deopt.getCodeBufferPos();
            assert keys[i] >= 0;
            branches[i] = createHostDeoptBranch(deopt, hsailFrame, reasonAndAction, speculation, providers, config, numSRegs, numDRegs);

            i++;
        }
        keyProbabilities[deopts.size()] = 0; // default
        keySuccessors[deopts.size()] = deopts.size();
        branches[deopts.size()] = createHostCrashBranch(hostGraph, deoptId);
        IntegerSwitchNode switchNode = hostGraph.add(IntegerSwitchNode.create(deoptId, branches, keys, keyProbabilities, keySuccessors));
        StartNode start = hostGraph.start();
        start.setNext(switchNode);
        /*
         * printf.setNext(printf2); printf2.setNext(switchNode);
         */
        hostGraph.setGuardsStage(GuardsStage.AFTER_FSA);
        return hostGraph;
    }

    private static BeginNode createHostCrashBranch(StructuredGraph hostGraph, ValueNode deoptId) {
        VMErrorNode vmError = hostGraph.add(VMErrorNode.create("Error in HSAIL deopt. DeoptId=%d", deoptId));
        // ConvertNode.convert(hostGraph, Kind.Long, deoptId)));
        vmError.setNext(hostGraph.add(ReturnNode.create(ConstantNode.defaultForKind(hostGraph.method().getSignature().getReturnKind(), hostGraph))));
        return BeginNode.begin(vmError);
    }

    private static BeginNode createHostDeoptBranch(DeoptimizingOp deopt, ParameterNode hsailFrame, ValueNode reasonAndAction, ValueNode speculation, HotSpotProviders providers,
                    HotSpotVMConfig config, int numSRegs, int numDRegs) {
        BeginNode branch = hsailFrame.graph().add(BeginNode.create());
        DynamicDeoptimizeNode deoptimization = hsailFrame.graph().add(DynamicDeoptimizeNode.create(reasonAndAction, speculation));
        deoptimization.setStateBefore(createFrameState(deopt.getFrameState().topFrame, hsailFrame, providers, config, numSRegs, numDRegs));
        branch.setNext(deoptimization);
        return branch;
    }

    private static FrameState createFrameState(BytecodeFrame lowLevelFrame, ParameterNode hsailFrame, HotSpotProviders providers, HotSpotVMConfig config, int numSRegs, int numDRegs) {
        return createFrameState(lowLevelFrame, hsailFrame, providers, config, numSRegs, numDRegs, new HashMap<VirtualObject, VirtualObjectNode>());
    }

    private static FrameState createFrameState(BytecodeFrame lowLevelFrame, ParameterNode hsailFrame, HotSpotProviders providers, HotSpotVMConfig config, int numSRegs, int numDRegs,
                    Map<VirtualObject, VirtualObjectNode> virtualObjects) {
        FrameState outterFrameState = null;
        if (lowLevelFrame.caller() != null) {
            outterFrameState = createFrameState(lowLevelFrame.caller(), hsailFrame, providers, config, numSRegs, numDRegs, virtualObjects);
        }
        StructuredGraph hostGraph = hsailFrame.graph();
        Function<? super JavaValue, ? extends ValueNode> lirValueToHirNode = v -> getNodeForValueFromFrame(v, hsailFrame, hostGraph, providers, config, numSRegs, numDRegs, virtualObjects);
        ValueNode[] locals = new ValueNode[lowLevelFrame.numLocals];
        for (int i = 0; i < lowLevelFrame.numLocals; i++) {
            locals[i] = lirValueToHirNode.apply(lowLevelFrame.getLocalValue(i));
        }
        List<ValueNode> stack = new ArrayList<>(lowLevelFrame.numStack);
        for (int i = 0; i < lowLevelFrame.numStack; i++) {
            stack.add(lirValueToHirNode.apply(lowLevelFrame.getStackValue(i)));
        }
        ValueNode[] locks = new ValueNode[lowLevelFrame.numLocks];
        MonitorIdNode[] monitorIds = new MonitorIdNode[lowLevelFrame.numLocks];
        for (int i = 0; i < lowLevelFrame.numLocks; i++) {
            HotSpotMonitorValue lockValue = (HotSpotMonitorValue) lowLevelFrame.getLockValue(i);
            locks[i] = lirValueToHirNode.apply(lockValue);
            monitorIds[i] = getMonitorIdForHotSpotMonitorValueFromFrame(lockValue, hsailFrame, hostGraph);
        }
        FrameState frameState = hostGraph.add(FrameState.create(lowLevelFrame.getMethod(), lowLevelFrame.getBCI(), locals, stack, locks, monitorIds, lowLevelFrame.rethrowException, false));
        if (outterFrameState != null) {
            frameState.setOuterFrameState(outterFrameState);
        }
        Map<VirtualObject, VirtualObjectNode> virtualObjectsCopy;
        // TODO this could be implemented more efficiently with a mark into the map
        // unfortunately LinkedHashMap doesn't seem to provide that.
        List<VirtualObjectState> virtualStates = new ArrayList<>(virtualObjects.size());
        do {
            virtualObjectsCopy = new HashMap<>(virtualObjects);
            virtualStates.clear();
            for (Entry<VirtualObject, VirtualObjectNode> entry : virtualObjectsCopy.entrySet()) {
                VirtualObject virtualObject = entry.getKey();
                VirtualObjectNode virtualObjectNode = entry.getValue();
                List<ValueNode> fieldValues = Arrays.stream(virtualObject.getValues()).map(lirValueToHirNode).collect(Collectors.toList());
                virtualStates.add(VirtualObjectState.create(virtualObjectNode, fieldValues));
            }
            // New virtual objects may have been discovered while processing the previous set.
            // Wait until a fixed point is reached
        } while (virtualObjectsCopy.size() < virtualObjects.size());
        virtualStates.forEach(vos -> frameState.addVirtualObjectMapping(hostGraph.unique(vos)));
        return frameState;
    }

    @SuppressWarnings("unused")
    private static MonitorIdNode getMonitorIdForHotSpotMonitorValueFromFrame(HotSpotMonitorValue lockValue, ParameterNode hsailFrame, StructuredGraph hsailGraph) {
        if (lockValue.isEliminated()) {
            return null;
        }
        throw GraalInternalError.unimplemented();
    }

    private static ValueNode getNodeForValueFromFrame(JavaValue localValue, ParameterNode hsailFrame, StructuredGraph hostGraph, HotSpotProviders providers, HotSpotVMConfig config, int numSRegs,
                    int numDRegs, Map<VirtualObject, VirtualObjectNode> virtualObjects) {
        ValueNode valueNode;
        if (localValue instanceof JavaConstant) {
            valueNode = ConstantNode.forConstant((JavaConstant) localValue, providers.getMetaAccess(), hostGraph);
        } else if (localValue instanceof VirtualObject) {
            valueNode = getNodeForVirtualObjectFromFrame((VirtualObject) localValue, virtualObjects, hostGraph);
        } else if (localValue instanceof StackSlot) {
            StackSlot slot = (StackSlot) localValue;
            valueNode = getNodeForStackSlotFromFrame(slot, localValue.getKind(), hsailFrame, hostGraph, providers, config, numSRegs, numDRegs);
        } else if (localValue instanceof HotSpotMonitorValue) {
            HotSpotMonitorValue hotSpotMonitorValue = (HotSpotMonitorValue) localValue;
            return getNodeForValueFromFrame(hotSpotMonitorValue.getOwner(), hsailFrame, hostGraph, providers, config, numSRegs, numDRegs, virtualObjects);
        } else if (localValue instanceof RegisterValue) {
            RegisterValue registerValue = (RegisterValue) localValue;
            int regNumber = registerValue.getRegister().number;
            valueNode = getNodeForRegisterFromFrame(regNumber, localValue.getKind(), hsailFrame, hostGraph, providers, config, numSRegs);
        } else if (Value.ILLEGAL.equals(localValue)) {
            valueNode = null;
        } else {
            throw GraalInternalError.shouldNotReachHere();
        }
        return valueNode;
    }

    private static ValueNode getNodeForVirtualObjectFromFrame(VirtualObject virtualObject, Map<VirtualObject, VirtualObjectNode> virtualObjects, StructuredGraph hostGraph) {
        return virtualObjects.computeIfAbsent(virtualObject, vo -> {
            if (vo.getType().isArray()) {
                return hostGraph.add(VirtualArrayNode.create(vo.getType().getComponentType(), vo.getValues().length));
            } else {
                return hostGraph.add(VirtualInstanceNode.create(vo.getType(), true));
            }
        });
    }

    private static ValueNode getNodeForRegisterFromFrame(int regNumber, Kind valueKind, ParameterNode hsailFrame, StructuredGraph hostGraph, HotSpotProviders providers, HotSpotVMConfig config,
                    int numSRegs) {
        ValueNode valueNode;
        LocationNode location;
        int longSize = providers.getCodeCache().getTarget().arch.getSizeInBytes(Kind.Long);
        int intSize = providers.getCodeCache().getTarget().arch.getSizeInBytes(Kind.Int);
        if (regNumber >= HSAIL.s0.number && regNumber <= HSAIL.s31.number) {
            long offset = config.hsailFrameHeaderSize + intSize * (regNumber - HSAIL.s0.number);
            location = ConstantLocationNode.create(FINAL_LOCATION, valueKind, offset, hostGraph);
        } else if (regNumber >= HSAIL.d0.number && regNumber <= HSAIL.d15.number) {
            long offset = config.hsailFrameHeaderSize + intSize * numSRegs + longSize * (regNumber - HSAIL.d0.number);
            location = ConstantLocationNode.create(FINAL_LOCATION, valueKind, offset, hostGraph);
        } else {
            throw GraalInternalError.shouldNotReachHere("unknown hsail register: " + regNumber);
        }
        valueNode = hostGraph.unique(FloatingReadNode.create(hsailFrame, location, null, StampFactory.forKind(valueKind)));
        return valueNode;
    }

    private static ValueNode getNodeForStackSlotFromFrame(StackSlot slot, Kind valueKind, ParameterNode hsailFrame, StructuredGraph hostGraph, HotSpotProviders providers, HotSpotVMConfig config,
                    int numSRegs, int numDRegs) {
        int slotSizeInBits = (valueKind == Kind.Object ? 64 : valueKind.getByteCount() * 8);
        if ((slotSizeInBits == 32) || (slotSizeInBits == 64)) {
            int longSize = providers.getCodeCache().getTarget().arch.getSizeInBytes(Kind.Long);
            int intSize = providers.getCodeCache().getTarget().arch.getSizeInBytes(Kind.Int);
            long offset = config.hsailFrameHeaderSize + (intSize * numSRegs) + (longSize * numDRegs) + HSAIL.getStackOffsetStart(slot, slotSizeInBits);
            LocationNode location = ConstantLocationNode.create(FINAL_LOCATION, valueKind, offset, hostGraph);
            ValueNode valueNode = hostGraph.unique(FloatingReadNode.create(hsailFrame, location, null, StampFactory.forKind(valueKind)));
            return valueNode;
        } else {
            throw GraalInternalError.shouldNotReachHere("unsupported stack slot kind: " + valueKind);
        }
    }
}