view graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java @ 15018:db4254246f9a

Remove Constant.forObject and Constant.asObject to improve compiler/VM separation
author Christian Wimmer <christian.wimmer@oracle.com>
date Mon, 07 Apr 2014 16:09:17 -0700
parents c8e575742f36
children 2ee777221036
line wrap: on
line source

/*
 * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
package com.oracle.graal.hotspot.hsail;

import static com.oracle.graal.api.code.CallingConvention.Type.*;
import static com.oracle.graal.api.code.CodeUtil.*;
import static com.oracle.graal.api.code.ValueUtil.*;
import static com.oracle.graal.api.meta.LocationIdentity.*;
import static com.oracle.graal.compiler.GraalCompiler.*;

import java.lang.reflect.*;
import java.util.*;

import com.amd.okra.*;
import com.oracle.graal.api.code.*;
import com.oracle.graal.api.code.Assumptions.Assumption;
import com.oracle.graal.api.code.CallingConvention.Type;
import com.oracle.graal.api.code.CompilationResult.Call;
import com.oracle.graal.api.code.CompilationResult.CodeAnnotation;
import com.oracle.graal.api.code.CompilationResult.DataPatch;
import com.oracle.graal.api.code.CompilationResult.ExceptionHandler;
import com.oracle.graal.api.code.CompilationResult.Infopoint;
import com.oracle.graal.api.code.CompilationResult.Mark;
import com.oracle.graal.api.meta.*;
import com.oracle.graal.asm.*;
import com.oracle.graal.asm.hsail.*;
import com.oracle.graal.compiler.gen.*;
import com.oracle.graal.debug.*;
import com.oracle.graal.debug.Debug.Scope;
import com.oracle.graal.gpu.*;
import com.oracle.graal.graph.*;
import com.oracle.graal.hotspot.*;
import com.oracle.graal.hotspot.bridge.CompilerToVM.CodeInstallResult;
import com.oracle.graal.hotspot.meta.*;
import com.oracle.graal.hotspot.nodes.*;
import com.oracle.graal.hsail.*;
import com.oracle.graal.java.*;
import com.oracle.graal.lir.*;
import com.oracle.graal.lir.asm.*;
import com.oracle.graal.lir.hsail.*;
import com.oracle.graal.lir.hsail.HSAILControlFlow.DeoptimizeOp;
import com.oracle.graal.nodes.*;
import com.oracle.graal.nodes.StructuredGraph.GuardsStage;
import com.oracle.graal.nodes.calc.*;
import com.oracle.graal.nodes.extended.*;
import com.oracle.graal.nodes.java.*;
import com.oracle.graal.nodes.type.*;
import com.oracle.graal.phases.*;
import com.oracle.graal.phases.tiers.*;

/**
 * HSAIL specific backend.
 */
public class HSAILHotSpotBackend extends HotSpotBackend {

    private Map<String, String> paramTypeMap = new HashMap<>();
    private final boolean deviceInitialized;
    // TODO: get maximum Concurrency from okra
    private int maxDeoptIndex = 8 * 40 * 64;   // see gpu_hsail.hpp

    public HSAILHotSpotBackend(HotSpotGraalRuntime runtime, HotSpotProviders providers) {
        super(runtime, providers);
        paramTypeMap.put("HotSpotResolvedPrimitiveType<int>", "s32");
        paramTypeMap.put("HotSpotResolvedPrimitiveType<float>", "f32");
        paramTypeMap.put("HotSpotResolvedPrimitiveType<double>", "f64");
        paramTypeMap.put("HotSpotResolvedPrimitiveType<long>", "s64");

        // The order of the conjunction below is important: the OkraUtil
        // call may provision the native library required by the initialize() call
        deviceInitialized = OkraUtil.okraLibExists() && initialize();
    }

    @Override
    public boolean shouldAllocateRegisters() {
        return true;
    }

    /**
     * Initializes the GPU device.
     *
     * @return whether or not initialization was successful
     */
    private static native boolean initialize();

    /**
     * Control how many threads run on simulator (used only from junit tests).
     */
    public void setSimulatorSingleThreaded() {
        String simThrEnv = System.getenv("SIMTHREADS");
        if (simThrEnv == null || !simThrEnv.equals("1")) {
            setSimulatorSingleThreaded0();
        }
    }

    private static native void setSimulatorSingleThreaded0();

    /**
     * Determines if the GPU device (or simulator) is available and initialized.
     */
    public boolean isDeviceInitialized() {
        return deviceInitialized;
    }

    /**
     * Completes the initialization of the HSAIL backend. This includes initializing the providers
     * and registering any method substitutions specified by the HSAIL backend.
     */
    @Override
    public void completeInitialization() {
        final HotSpotProviders providers = getProviders();
        HotSpotVMConfig config = getRuntime().getConfig();
        // Initialize the lowering provider.
        final HotSpotLoweringProvider lowerer = (HotSpotLoweringProvider) providers.getLowerer();
        lowerer.initialize(providers, config);

        // Register the replacements used by the HSAIL backend.
        HSAILHotSpotReplacementsImpl replacements = (HSAILHotSpotReplacementsImpl) providers.getReplacements();
        replacements.completeInitialization();
    }

    /**
     * Compiles and installs a given method to a GPU binary.
     */
    public HotSpotNmethod compileAndInstallKernel(Method method) {
        ResolvedJavaMethod javaMethod = getProviders().getMetaAccess().lookupJavaMethod(method);
        return installKernel(javaMethod, compileKernel(javaMethod, true));
    }

    /**
     * Compiles a given method to HSAIL code.
     *
     * @param makeBinary specifies whether a GPU binary should also be generated for the HSAIL code.
     *            If true, the returned value is guaranteed to have a non-zero
     *            {@linkplain ExternalCompilationResult#getEntryPoint() entry point}.
     * @return the HSAIL code compiled from {@code method}'s bytecode
     */
    public ExternalCompilationResult compileKernel(ResolvedJavaMethod method, boolean makeBinary) {
        StructuredGraph graph = new StructuredGraph(method);
        HotSpotProviders providers = getProviders();
        MetaAccessProvider metaAccess = getProviders().getMetaAccess();

        // changed this from default to help us generate deopts when needed
        OptimisticOptimizations optimisticOpts = OptimisticOptimizations.ALL;
        optimisticOpts.remove(OptimisticOptimizations.Optimization.UseExceptionProbabilityForOperations);
        new GraphBuilderPhase.Instance(metaAccess, GraphBuilderConfiguration.getSnippetDefault(), optimisticOpts).apply(graph);
        PhaseSuite<HighTierContext> graphBuilderSuite = providers.getSuites().getDefaultGraphBuilderSuite();
        CallingConvention cc = getCallingConvention(providers.getCodeCache(), Type.JavaCallee, graph.method(), false);

        // append special HSAILNonNullParametersPhase
        int numArgs = cc.getArguments().length;
        graphBuilderSuite.appendPhase(new HSAILNonNullParametersPhase(numArgs));

        Suites suites = providers.getSuites().getDefaultSuites();
        ExternalCompilationResult hsailCode = compileGraph(graph, null, cc, method, providers, this, this.getTarget(), null, graphBuilderSuite, optimisticOpts, getProfilingInfo(graph), null, suites,
                        new ExternalCompilationResult(), CompilationResultBuilderFactory.Default);

        // this code added to dump infopoints
        try (Scope s = Debug.scope("CodeGen")) {
            if (Debug.isLogEnabled()) {
                // show infopoints
                List<Infopoint> infoList = hsailCode.getInfopoints();
                Debug.log("%d HSAIL infopoints", infoList.size());
                for (Infopoint info : infoList) {
                    Debug.log(info.toString());
                    Debug.log(info.debugInfo.frame().toString());
                }
            }
        } catch (Throwable e) {
            throw Debug.handle(e);
        }

        if (makeBinary) {
            if (!deviceInitialized) {
                throw new GraalInternalError("Cannot generate GPU kernel if device is not initialized");
            }
            try (Scope ds = Debug.scope("GeneratingKernelBinary")) {
                long kernel = generateKernel(hsailCode.getTargetCode(), method.getName());
                if (kernel == 0) {
                    throw new GraalInternalError("Failed to compile HSAIL kernel");
                }
                hsailCode.setEntryPoint(kernel);
            } catch (Throwable e) {
                throw Debug.handle(e);
            }
        }
        return hsailCode;
    }

    private static class HSAILNonNullParametersPhase extends Phase {
        // we use this to limit the stamping to exclude the final argument in an obj stream method
        private int numArgs;

        public HSAILNonNullParametersPhase(int numArgs) {
            this.numArgs = numArgs;
        }

        @Override
        protected void run(StructuredGraph graph) {
            int argCount = 0;
            for (ParameterNode param : graph.getNodes(ParameterNode.class)) {
                argCount++;
                if (argCount < numArgs && param.stamp() instanceof ObjectStamp) {
                    param.setStamp(StampFactory.declaredNonNull(((ObjectStamp) param.stamp()).type()));
                }
            }
        }
    }

    /**
     * Generates a GPU binary from HSAIL code.
     */
    private static native long generateKernel(byte[] hsailCode, String name);

    /**
     * Installs the {@linkplain ExternalCompilationResult#getEntryPoint() GPU binary} associated
     * with some given HSAIL code in the code cache and returns a {@link HotSpotNmethod} handle to
     * the installed code.
     *
     * @param hsailCode HSAIL compilation result for which a GPU binary has been generated
     * @return a handle to the binary as installed in the HotSpot code cache
     */
    public final HotSpotNmethod installKernel(ResolvedJavaMethod method, ExternalCompilationResult hsailCode) {
        assert hsailCode.getEntryPoint() != 0L;
        // code below here lifted from HotSpotCodeCacheProviders.addExternalMethod
        // used to be return getProviders().getCodeCache().addExternalMethod(method, hsailCode);
        HotSpotResolvedJavaMethod javaMethod = (HotSpotResolvedJavaMethod) method;
        if (hsailCode.getId() == -1) {
            hsailCode.setId(javaMethod.allocateCompileId(hsailCode.getEntryBCI()));
        }
        CompilationResult compilationResult = hsailCode;
        StructuredGraph hostGraph = hsailCode.getHostGraph();
        if (hostGraph != null) {
            // TODO get rid of the unverified entry point in the host code
            try (Scope ds = Debug.scope("GeneratingHostGraph")) {
                HotSpotBackend hostBackend = getRuntime().getHostBackend();
                JavaType[] parameterTypes = new JavaType[hostGraph.getNodes(ParameterNode.class).count()];
                Debug.log("Param count: %d", parameterTypes.length);
                for (int i = 0; i < parameterTypes.length; i++) {
                    ParameterNode parameter = hostGraph.getParameter(i);
                    Debug.log("Param [%d]=%d", i, parameter);
                    parameterTypes[i] = parameter.stamp().javaType(hostBackend.getProviders().getMetaAccess());
                    Debug.log(" %s", parameterTypes[i]);
                }
                CallingConvention cc = hostBackend.getProviders().getCodeCache().getRegisterConfig().getCallingConvention(Type.JavaCallee, method.getSignature().getReturnType(null), parameterTypes,
                                hostBackend.getTarget(), false);
                CompilationResult hostCode = compileGraph(hostGraph, null, cc, method, hostBackend.getProviders(), hostBackend, this.getTarget(), null,
                                hostBackend.getProviders().getSuites().getDefaultGraphBuilderSuite(), OptimisticOptimizations.NONE, null, null,
                                hostBackend.getProviders().getSuites().getDefaultSuites(), new CompilationResult(), CompilationResultBuilderFactory.Default);
                compilationResult = merge(hostCode, hsailCode);
            } catch (Throwable e) {
                throw Debug.handle(e);
            }
        }

        HotSpotNmethod code = new HotSpotNmethod(javaMethod, hsailCode.getName(), false, true);
        HotSpotCompiledNmethod compiled = new HotSpotCompiledNmethod(getTarget(), javaMethod, compilationResult);
        CodeInstallResult result = getRuntime().getCompilerToVM().installCode(compiled, code, null);
        if (result != CodeInstallResult.OK) {
            return null;
        }
        return code;
    }

    private static ExternalCompilationResult merge(CompilationResult hostCode, ExternalCompilationResult hsailCode) {
        ExternalCompilationResult result = new ExternalCompilationResult();

        // from hsail code
        result.setEntryPoint(hsailCode.getEntryPoint());
        result.setId(hsailCode.getId());
        result.setEntryBCI(hsailCode.getEntryBCI());
        assert hsailCode.getMarks().isEmpty();
        assert hsailCode.getExceptionHandlers().isEmpty();
        assert hsailCode.getDataReferences().isEmpty();

        // from host code
        result.setFrameSize(hostCode.getFrameSize());
        result.setCustomStackAreaOffset(hostCode.getCustomStackAreaOffset());
        result.setRegisterRestoreEpilogueOffset(hostCode.getRegisterRestoreEpilogueOffset());
        result.setTargetCode(hostCode.getTargetCode(), hostCode.getTargetCodeSize());
        for (CodeAnnotation annotation : hostCode.getAnnotations()) {
            result.addAnnotation(annotation);
        }
        CompilationResult.Mark[] noMarks = {};
        for (Mark mark : hostCode.getMarks()) {
            result.recordMark(mark.pcOffset, mark.id, noMarks);
        }
        for (ExceptionHandler handler : hostCode.getExceptionHandlers()) {
            result.recordExceptionHandler(handler.pcOffset, handler.handlerPos);
        }
        for (DataPatch patch : hostCode.getDataReferences()) {
            if (patch.data != null) {
                if (patch.inline) {
                    result.recordInlineData(patch.pcOffset, patch.data);
                } else {
                    result.recordDataReference(patch.pcOffset, patch.data);
                }
            }
        }
        for (Infopoint infopoint : hostCode.getInfopoints()) {
            if (infopoint instanceof Call) {
                Call call = (Call) infopoint;
                result.recordCall(call.pcOffset, call.size, call.target, call.debugInfo, call.direct);
            } else {
                result.recordInfopoint(infopoint.pcOffset, infopoint.debugInfo, infopoint.reason);
            }
        }

        // merged
        Assumptions mergedAssumptions = new Assumptions(true);
        if (hostCode.getAssumptions() != null) {
            for (Assumption assumption : hostCode.getAssumptions().getAssumptions()) {
                if (assumption != null) {
                    mergedAssumptions.record(assumption);
                }
            }
        }
        if (hsailCode.getAssumptions() != null) {
            for (Assumption assumption : hsailCode.getAssumptions().getAssumptions()) {
                if (assumption != null) {
                    mergedAssumptions.record(assumption);
                }
            }
        }
        if (!mergedAssumptions.isEmpty()) {
            result.setAssumptions(mergedAssumptions);
        }
        return result;
    }

    public boolean executeKernel(HotSpotInstalledCode kernel, int jobSize, Object[] args) throws InvalidInstalledCodeException {
        if (!deviceInitialized) {
            throw new GraalInternalError("Cannot execute GPU kernel if device is not initialized");
        }
        Object[] oopsSaveArea = new Object[maxDeoptIndex * 16];
        return executeKernel0(kernel, jobSize, args, oopsSaveArea);
    }

    private static native boolean executeKernel0(HotSpotInstalledCode kernel, int jobSize, Object[] args, Object[] oopsSave) throws InvalidInstalledCodeException;

    /**
     * Use the HSAIL register set when the compilation target is HSAIL.
     */
    @Override
    public FrameMap newFrameMap(RegisterConfig registerConfig) {
        return new HSAILFrameMap(getCodeCache(), registerConfig);
    }

    @Override
    public LIRGenerator newLIRGenerator(CallingConvention cc, LIRGenerationResult lirGenRes) {
        return new HSAILHotSpotLIRGenerator(getProviders(), getRuntime().getConfig(), cc, lirGenRes);
    }

    @Override
    public LIRGenerationResult newLIRGenerationResult(LIR lir, FrameMap frameMap, Object stub) {
        return new HSAILHotSpotLIRGenerationResult(lir, frameMap);
    }

    @Override
    public NodeLIRBuilder newNodeLIRGenerator(StructuredGraph graph, LIRGenerator lirGen) {
        return new HSAILHotSpotNodeLIRBuilder(graph, lirGen);
    }

    class HotSpotFrameContext implements FrameContext {

        public boolean hasFrame() {
            return true;
        }

        @Override
        public void enter(CompilationResultBuilder crb) {
            Debug.log("Nothing to do here");
        }

        @Override
        public void leave(CompilationResultBuilder crb) {
            Debug.log("Nothing to do here");
        }
    }

    /**
     * a class to allow us to save lirGen.
     */
    static class HSAILCompilationResultBuilder extends CompilationResultBuilder {
        public HSAILHotSpotLIRGenerationResult lirGenRes;

        public HSAILCompilationResultBuilder(CodeCacheProvider codeCache, ForeignCallsProvider foreignCalls, FrameMap frameMap, Assembler asm, FrameContext frameContext,
                        CompilationResult compilationResult, HSAILHotSpotLIRGenerationResult lirGenRes) {
            super(codeCache, foreignCalls, frameMap, asm, frameContext, compilationResult);
            this.lirGenRes = lirGenRes;
        }
    }

    @Override
    protected Assembler createAssembler(FrameMap frameMap) {
        return new HSAILHotSpotAssembler(getTarget());
    }

    @Override
    public CompilationResultBuilder newCompilationResultBuilder(LIRGenerationResult lirGenRes, CompilationResult compilationResult, CompilationResultBuilderFactory factory) {
        FrameMap frameMap = lirGenRes.getFrameMap();
        Assembler masm = createAssembler(frameMap);
        HotSpotFrameContext frameContext = new HotSpotFrameContext();
        // save lirGen for later use by setHostGraph
        CompilationResultBuilder crb = new HSAILCompilationResultBuilder(getCodeCache(), getForeignCalls(), frameMap, masm, frameContext, compilationResult,
                        (HSAILHotSpotLIRGenerationResult) lirGenRes);
        crb.setFrameSize(frameMap.frameSize());
        return crb;
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, LIR lir, ResolvedJavaMethod method) {
        assert method != null : lir + " is not associated with a method";

        boolean useHSAILDeoptimization = getRuntime().getConfig().useHSAILDeoptimization;

        // Emit the prologue.
        HSAILAssembler asm = (HSAILAssembler) crb.asm;
        asm.emitString0("version 0:95: $full : $large;\n");

        Signature signature = method.getSignature();
        int sigParamCount = signature.getParameterCount(false);
        // We're subtracting 1 because we're not making the final gid as a parameter.

        int nonConstantParamCount = sigParamCount - 1;
        boolean isStatic = (Modifier.isStatic(method.getModifiers()));
        // Determine if this is an object lambda.
        boolean isObjectLambda = true;

        if (signature.getParameterType(nonConstantParamCount, null).getKind() == Kind.Int) {
            isObjectLambda = false;
        } else {
            // Add space for gid int reg.
            nonConstantParamCount++;
        }

        // If this is an instance method, include mappings for the "this" parameter
        // as the first parameter.
        if (!isStatic) {
            nonConstantParamCount++;
        }
        // Add in any "constant" parameters (currently none).
        int totalParamCount = nonConstantParamCount;
        JavaType[] paramtypes = new JavaType[totalParamCount];
        String[] paramNames = new String[totalParamCount];
        int pidx = 0;
        MetaAccessProvider metaAccess = getProviders().getMetaAccess();
        for (int i = 0; i < totalParamCount; i++) {
            if (i == 0 && !isStatic) {
                paramtypes[i] = metaAccess.lookupJavaType(Object.class);
                paramNames[i] = "%_this";
            } else if (i < nonConstantParamCount) {
                if (isObjectLambda && (i == (nonConstantParamCount))) {
                    // Set up the gid register mapping.
                    paramtypes[i] = metaAccess.lookupJavaType(int.class);
                    paramNames[i] = "%_gid";
                } else {
                    paramtypes[i] = signature.getParameterType(pidx++, null);
                    paramNames[i] = "%_arg" + i;
                }
            }
        }

        asm.emitString0("// " + (isStatic ? "static" : "instance") + " method " + method + "\n");
        asm.emitString0("kernel &run ( \n");

        FrameMap frameMap = crb.frameMap;
        RegisterConfig regConfig = frameMap.registerConfig;
        // Build list of param types which does include the gid (for cc register mapping query).
        JavaType[] ccParamTypes = new JavaType[nonConstantParamCount + 1];
        // Include the gid.
        System.arraycopy(paramtypes, 0, ccParamTypes, 0, nonConstantParamCount);

        // Last entry is always int (its register gets used in the workitemabsid instruction)
        // this is true even for object stream labmdas
        if (sigParamCount > 0) {
            ccParamTypes[ccParamTypes.length - 1] = metaAccess.lookupJavaType(int.class);
        }
        CallingConvention cc = regConfig.getCallingConvention(JavaCallee, null, ccParamTypes, getTarget(), false);

        /**
         * Compute the hsail size mappings up to but not including the last non-constant parameter
         * (which is the gid).
         *
         */
        String[] paramHsailSizes = new String[totalParamCount];
        for (int i = 0; i < totalParamCount; i++) {
            String paramtypeStr = paramtypes[i].toString();
            String sizeStr = paramTypeMap.get(paramtypeStr);
            // Catch all for any unmapped paramtype that is u64 (address of an object).
            paramHsailSizes[i] = (sizeStr != null ? sizeStr : "u64");
        }
        // Emit the kernel function parameters.
        for (int i = 0; i < totalParamCount; i++) {
            String str = "align 8 kernarg_" + paramHsailSizes[i] + " " + paramNames[i];

            if (useHSAILDeoptimization || (i != totalParamCount - 1)) {
                str += ",";
            }
            asm.emitString(str);
        }

        if (useHSAILDeoptimization) {
            // add in the deoptInfo parameter
            asm.emitString("kernarg_u64 " + asm.getDeoptInfoName());
        }

        asm.emitString(") {");

        /*
         * End of parameters start of prolog code. Emit the load instructions for loading of the
         * kernel non-constant parameters into registers. The constant class parameters will not be
         * loaded up front but will be loaded as needed.
         */
        for (int i = 0; i < nonConstantParamCount; i++) {
            asm.emitString("ld_kernarg_" + paramHsailSizes[i] + "  " + HSAIL.mapRegister(cc.getArgument(i)) + ", [" + paramNames[i] + "];");
        }

        /*
         * Emit the workitemaid instruction for loading the hidden gid parameter. This is assigned
         * the register as if it were the last of the nonConstant parameters.
         */
        String workItemReg = "$s" + Integer.toString(asRegister(cc.getArgument(nonConstantParamCount)).encoding());
        asm.emitString("workitemabsid_u32 " + workItemReg + ", 0;");

        final int offsetToDeopt = getRuntime().getConfig().hsailDeoptOffset;
        final String deoptInProgressLabel = "@LHandleDeoptInProgress";

        if (useHSAILDeoptimization) {
            AllocatableValue scratch64 = HSAIL.d16.asValue(Kind.Object);
            AllocatableValue scratch32 = HSAIL.s34.asValue(Kind.Int);
            HSAILAddress deoptInfoAddr = new HSAILAddressValue(Kind.Int, scratch64, offsetToDeopt).toAddress();
            asm.emitLoadKernelArg(scratch64, asm.getDeoptInfoName(), "u64");
            asm.emitComment("// Check if a deopt has occurred and abort if true before doing any work");
            asm.emitLoadAcquire(scratch32, deoptInfoAddr);
            asm.emitCompare(scratch32, Constant.forInt(0), "ne", false, false);
            asm.cbr(deoptInProgressLabel);
        }

        /*
         * Note the logic used for this spillseg size is to leave space and then go back and patch
         * in the correct size once we have generated all the instructions. This should probably be
         * done in a more robust way by implementing something like asm.insertString.
         */
        int spillsegDeclarationPosition = asm.position() + 1;
        String spillsegTemplate = "align 4 spill_u8 %spillseg[123456];";
        asm.emitString(spillsegTemplate);
        // Emit object array load prologue here.
        if (isObjectLambda) {
            boolean useCompressedOops = getRuntime().getConfig().useCompressedOops;
            final int arrayElementsOffset = HotSpotGraalRuntime.getArrayBaseOffset(Kind.Object);
            String iterationObjArgReg = HSAIL.mapRegister(cc.getArgument(nonConstantParamCount - 1));
            // iterationObjArgReg will be the highest $d register in use (it is the last parameter)
            // so tempReg can be the next higher $d register
            String tmpReg = "$d" + (asRegister(cc.getArgument(nonConstantParamCount - 1)).encoding() + 1);
            // Convert gid to long.
            asm.emitString("cvt_u64_s32 " + tmpReg + ", " + workItemReg + "; // Convert gid to long");
            // Adjust index for sizeof ref. Where to pull this size from?
            asm.emitString("mul_u64 " + tmpReg + ", " + tmpReg + ", " + (useCompressedOops ? 4 : 8) + "; // Adjust index for sizeof ref");
            // Adjust for actual data start.
            asm.emitString("add_u64 " + tmpReg + ", " + tmpReg + ", " + arrayElementsOffset + "; // Adjust for actual elements data start");
            // Add to array ref ptr.
            asm.emitString("add_u64 " + tmpReg + ", " + tmpReg + ", " + iterationObjArgReg + "; // Add to array ref ptr");
            // Load the object into the parameter reg.
            if (useCompressedOops) {

                // Load u32 into the d 64 reg since it will become an object address
                asm.emitString("ld_global_u32 " + tmpReg + ", " + "[" + tmpReg + "]" + "; // Load compressed ptr from array");

                long narrowOopBase = getRuntime().getConfig().narrowOopBase;
                long narrowOopShift = getRuntime().getConfig().narrowOopShift;

                if (narrowOopBase == 0 && narrowOopShift == 0) {
                    // No more calculation to do, mov to target register
                    asm.emitString("mov_b64 " + iterationObjArgReg + ", " + tmpReg + "; // no shift or base addition");
                } else {
                    if (narrowOopBase == 0) {
                        asm.emitString("shl_u64 " + iterationObjArgReg + ", " + tmpReg + ", " + narrowOopShift + "; // do narrowOopShift");
                    } else if (narrowOopShift == 0) {
                        // not sure if we ever get add with 0 shift but just in case
                        asm.emitString("cmp_eq_b1_u64  $c0, " + tmpReg + ", 0x0; // avoid add if compressed is null");
                        asm.emitString("add_u64 " + iterationObjArgReg + ", " + tmpReg + ", " + narrowOopBase + "; // add narrowOopBase");
                        asm.emitString("cmov_b64 " + iterationObjArgReg + ", $c0, 0x0, " + iterationObjArgReg + "; // avoid add if compressed is null");
                    } else {
                        asm.emitString("cmp_eq_b1_u64  $c0, " + tmpReg + ", 0x0; // avoid shift-add if compressed is null");
                        asm.emitString("mad_u64 " + iterationObjArgReg + ", " + tmpReg + ", " + (1 << narrowOopShift) + ", " + narrowOopBase + "; // shift and add narrowOopBase");
                        asm.emitString("cmov_b64 " + iterationObjArgReg + ", $c0, 0x0, " + iterationObjArgReg + "; // avoid shift-add if compressed is null");
                    }
                }

            } else {
                asm.emitString("ld_global_u64 " + iterationObjArgReg + ", " + "[" + tmpReg + "]" + "; // Load from array element into parameter reg");
            }
        }
        // Prologue done, Emit code for the LIR.
        crb.emit(lir);
        // Now that code is emitted go back and figure out what the upper Bound stack size was.
        long maxStackSize = ((HSAILAssembler) crb.asm).upperBoundStackSize();
        String spillsegStringFinal;
        if (maxStackSize == 0) {
            // If no spilling, get rid of spillseg declaration.
            char[] array = new char[spillsegTemplate.length()];
            Arrays.fill(array, ' ');
            spillsegStringFinal = new String(array);
        } else {
            spillsegStringFinal = spillsegTemplate.replace("123456", String.format("%6d", maxStackSize));
        }
        asm.emitString(spillsegStringFinal, spillsegDeclarationPosition);
        // Emit the epilogue.

        // TODO: keep track of whether we need it
        if (useHSAILDeoptimization) {
            final int offsetToDeoptSaveStates = getRuntime().getConfig().hsailSaveStatesOffset0;
            final int sizeofKernelDeopt = getRuntime().getConfig().hsailSaveStatesOffset1 - getRuntime().getConfig().hsailSaveStatesOffset0;
            final int offsetToNeverRanArray = getRuntime().getConfig().hsailNeverRanArrayOffset;
            final int offsetToDeoptNextIndex = getRuntime().getConfig().hsailDeoptNextIndexOffset;
            final int offsetToDeoptimizationWorkItem = getRuntime().getConfig().hsailDeoptimizationWorkItem;
            final int offsetToDeoptimizationReason = getRuntime().getConfig().hsailDeoptimizationReason;
            final int offsetToDeoptimizationFrame = getRuntime().getConfig().hsailDeoptimizationFrame;
            final int offsetToFramePc = getRuntime().getConfig().hsailFramePcOffset;
            final int offsetToNumSaves = getRuntime().getConfig().hsailFrameNumSRegOffset;
            final int offsetToSaveArea = getRuntime().getConfig().hsailFrameSaveAreaOffset;

            AllocatableValue scratch64 = HSAIL.d16.asValue(Kind.Object);
            AllocatableValue cuSaveAreaPtr = HSAIL.d17.asValue(Kind.Object);
            AllocatableValue waveMathScratch1 = HSAIL.d18.asValue(Kind.Object);
            AllocatableValue waveMathScratch2 = HSAIL.d19.asValue(Kind.Object);

            AllocatableValue actionAndReasonReg = HSAIL.s32.asValue(Kind.Int);
            AllocatableValue codeBufferOffsetReg = HSAIL.s33.asValue(Kind.Int);
            AllocatableValue scratch32 = HSAIL.s34.asValue(Kind.Int);
            AllocatableValue workidreg = HSAIL.s35.asValue(Kind.Int);
            AllocatableValue dregOopMapReg = HSAIL.s39.asValue(Kind.Int);

            HSAILAddress deoptNextIndexAddr = new HSAILAddressValue(Kind.Int, scratch64, offsetToDeoptNextIndex).toAddress();
            HSAILAddress neverRanArrayAddr = new HSAILAddressValue(Kind.Int, scratch64, offsetToNeverRanArray).toAddress();

            // The just-started lanes that see the deopt flag will jump here
            asm.emitString0(deoptInProgressLabel + ":\n");
            asm.emitLoad(Kind.Object, waveMathScratch1, neverRanArrayAddr);
            asm.emitWorkItemAbsId(workidreg);
            asm.emitConvert(waveMathScratch2, workidreg, Kind.Object, Kind.Int);
            asm.emit("add", waveMathScratch1, waveMathScratch1, waveMathScratch2);
            HSAILAddress neverRanStoreAddr = new HSAILAddressValue(Kind.Byte, waveMathScratch1, 0).toAddress();
            asm.emitStore(Kind.Byte, Constant.forInt(1), neverRanStoreAddr);
            asm.emitString("ret;");

            // The deoptimizing lanes will jump here
            asm.emitString0(asm.getDeoptLabelName() + ":\n");
            String labelExit = asm.getDeoptLabelName() + "_Exit";

            HSAILAddress deoptInfoAddr = new HSAILAddressValue(Kind.Int, scratch64, offsetToDeopt).toAddress();
            asm.emitLoadKernelArg(scratch64, asm.getDeoptInfoName(), "u64");

            // Set deopt occurred flag
            asm.emitMov(Kind.Int, scratch32, Constant.forInt(1));
            asm.emitStoreRelease(scratch32, deoptInfoAddr);

            asm.emitComment("// Determine next deopt save slot");
            asm.emitAtomicAdd(scratch32, deoptNextIndexAddr, Constant.forInt(1));
            // scratch32 now holds next index to use
            // set error condition if no room in save area
            asm.emitComment("// assert room to save deopt");
            asm.emitCompare(scratch32, Constant.forInt(maxDeoptIndex), "lt", false, false);
            asm.cbr("@L_StoreDeopt");
            // if assert fails, store a guaranteed negative workitemid in top level deopt occurred
            // flag
            asm.emitWorkItemAbsId(scratch32);
            asm.emit("mad", scratch32, scratch32, Constant.forInt(-1), Constant.forInt(-1));
            asm.emitStore(scratch32, deoptInfoAddr);
            asm.emitString("ret;");

            asm.emitString0("@L_StoreDeopt" + ":\n");

            // Store deopt for this workitem into its slot in the HSAILComputeUnitSaveStates array

            asm.emitComment("// Convert id's for ptr math");
            asm.emitConvert(cuSaveAreaPtr, scratch32, Kind.Object, Kind.Int);
            asm.emitComment("// multiply by sizeof KernelDeoptArea");
            asm.emit("mul", cuSaveAreaPtr, cuSaveAreaPtr, Constant.forInt(sizeofKernelDeopt));
            asm.emitComment("// Add computed offset to deoptInfoPtr base");
            asm.emit("add", cuSaveAreaPtr, cuSaveAreaPtr, scratch64);
            // Add offset to _deopt_save_states[0]
            asm.emit("add", scratch64, cuSaveAreaPtr, Constant.forInt(offsetToDeoptSaveStates));

            HSAILAddress workItemAddr = new HSAILAddressValue(Kind.Int, scratch64, offsetToDeoptimizationWorkItem).toAddress();
            HSAILAddress actionReasonStoreAddr = new HSAILAddressValue(Kind.Int, scratch64, offsetToDeoptimizationReason).toAddress();

            asm.emitComment("// Get _deopt_info._first_frame");
            asm.emit("add", waveMathScratch1, scratch64, Constant.forInt(offsetToDeoptimizationFrame));
            // Now scratch64 is the _deopt_info._first_frame
            HSAILAddress pcStoreAddr = new HSAILAddressValue(Kind.Int, waveMathScratch1, offsetToFramePc).toAddress();
            HSAILAddress regCountsAddr = new HSAILAddressValue(Kind.Int, waveMathScratch1, offsetToNumSaves).toAddress();
            HSAILAddress dregOopMapAddr = new HSAILAddressValue(Kind.Int, waveMathScratch1, offsetToNumSaves + 2).toAddress();

            asm.emitComment("// store deopting workitem");
            asm.emitWorkItemAbsId(scratch32);
            asm.emitStore(Kind.Int, scratch32, workItemAddr);
            asm.emitComment("// store actionAndReason");
            asm.emitStore(Kind.Int, actionAndReasonReg, actionReasonStoreAddr);
            asm.emitComment("// store PC");
            asm.emitStore(Kind.Int, codeBufferOffsetReg, pcStoreAddr);
            asm.emitComment("// store regCounts");
            asm.emitStore(Kind.Short, Constant.forInt(32 + (16 << 8) + (0 << 16)), regCountsAddr);
            asm.emitComment("// store dreg ref map bits");
            asm.emitStore(Kind.Short, dregOopMapReg, dregOopMapAddr);

            // get the union of registers needed to be saved at the infopoints
            // usedRegs array assumes d15 has the highest register number we wish to save
            // and initially has all registers as false
            boolean[] infoUsedRegs = new boolean[HSAIL.d15.number + 1];
            List<Infopoint> infoList = crb.compilationResult.getInfopoints();
            for (Infopoint info : infoList) {
                BytecodeFrame frame = info.debugInfo.frame();
                for (int i = 0; i < frame.numLocals + frame.numStack; i++) {
                    Value val = frame.values[i];
                    if (isLegal(val) && isRegister(val)) {
                        Register reg = asRegister(val);
                        infoUsedRegs[reg.number] = true;
                    }
                }
            }

            // loop storing each of the 32 s registers that are used by infopoints
            // we always store in a fixed location, even if some registers are not stored
            asm.emitComment("// store used s regs");
            int ofst = offsetToSaveArea;
            for (Register sreg : HSAIL.sRegisters) {
                if (infoUsedRegs[sreg.number]) {
                    Kind kind = Kind.Int;
                    HSAILAddress addr = new HSAILAddressValue(kind, waveMathScratch1, ofst).toAddress();
                    AllocatableValue sregValue = sreg.asValue(kind);
                    asm.emitStore(kind, sregValue, addr);
                }
                ofst += 4;
            }

            // loop storing each of the 16 d registers that are used by infopoints
            asm.emitComment("// store used d regs");
            for (Register dreg : HSAIL.dRegisters) {
                if (infoUsedRegs[dreg.number]) {
                    Kind kind = Kind.Long;
                    HSAILAddress addr = new HSAILAddressValue(kind, waveMathScratch1, ofst).toAddress();
                    AllocatableValue dregValue = dreg.asValue(kind);
                    asm.emitStore(kind, dregValue, addr);
                }
                ofst += 8;
            }

            // for now, ignore saving the spill variables but that would come here

            asm.emitString0(labelExit + ":\n");

            // and emit the return
            crb.frameContext.leave(crb);
            asm.exit();
        } else {
            // Deoptimization is explicitly off, so emit simple return
            asm.emitString0(asm.getDeoptLabelName() + ":\n");
            asm.emitComment("// No deoptimization");
            asm.emitString("ret;");
        }

        asm.emitString0("}; \n");

        ExternalCompilationResult compilationResult = (ExternalCompilationResult) crb.compilationResult;
        HSAILHotSpotLIRGenerationResult lirGenRes = ((HSAILCompilationResultBuilder) crb).lirGenRes;
        compilationResult.setHostGraph(prepareHostGraph(method, lirGenRes.getDeopts(), getProviders(), getRuntime().getConfig()));
    }

    private static StructuredGraph prepareHostGraph(ResolvedJavaMethod method, List<DeoptimizeOp> deopts, HotSpotProviders providers, HotSpotVMConfig config) {
        if (deopts.isEmpty()) {
            return null;
        }
        StructuredGraph hostGraph = new StructuredGraph(method, -2);
        ParameterNode deoptId = hostGraph.unique(new ParameterNode(0, StampFactory.intValue()));
        ParameterNode hsailFrame = hostGraph.unique(new ParameterNode(1, StampFactory.forKind(providers.getCodeCache().getTarget().wordKind)));
        ParameterNode reasonAndAction = hostGraph.unique(new ParameterNode(2, StampFactory.intValue()));
        ParameterNode speculation = hostGraph.unique(new ParameterNode(3, StampFactory.object()));
        AbstractBeginNode[] branches = new AbstractBeginNode[deopts.size() + 1];
        int[] keys = new int[deopts.size()];
        int[] keySuccessors = new int[deopts.size() + 1];
        double[] keyProbabilities = new double[deopts.size() + 1];
        int i = 0;
        Collections.sort(deopts, new Comparator<DeoptimizeOp>() {
            public int compare(DeoptimizeOp o1, DeoptimizeOp o2) {
                return o1.getCodeBufferPos() - o2.getCodeBufferPos();
            }
        });
        for (DeoptimizeOp deopt : deopts) {
            keySuccessors[i] = i;
            keyProbabilities[i] = 1.0 / deopts.size();
            keys[i] = deopt.getCodeBufferPos();
            assert keys[i] >= 0;
            branches[i] = createHostDeoptBranch(deopt, hsailFrame, reasonAndAction, speculation, providers, config);

            i++;
        }
        keyProbabilities[deopts.size()] = 0; // default
        keySuccessors[deopts.size()] = deopts.size();
        branches[deopts.size()] = createHostCrashBranch(hostGraph, deoptId);
        IntegerSwitchNode switchNode = hostGraph.add(new IntegerSwitchNode(deoptId, branches, keys, keyProbabilities, keySuccessors));
        StartNode start = hostGraph.start();
        start.setNext(switchNode);
        /*
         * printf.setNext(printf2); printf2.setNext(switchNode);
         */
        hostGraph.setGuardsStage(GuardsStage.AFTER_FSA);
        return hostGraph;
    }

    private static AbstractBeginNode createHostCrashBranch(StructuredGraph hostGraph, ValueNode deoptId) {
        VMErrorNode vmError = hostGraph.add(new VMErrorNode("Error in HSAIL deopt. DeoptId=%d", deoptId));
        // ConvertNode.convert(hostGraph, Kind.Long, deoptId)));
        vmError.setNext(hostGraph.add(new ReturnNode(ConstantNode.defaultForKind(hostGraph.method().getSignature().getReturnKind(), hostGraph))));
        return BeginNode.begin(vmError);
    }

    private static AbstractBeginNode createHostDeoptBranch(DeoptimizeOp deopt, ParameterNode hsailFrame, ValueNode reasonAndAction, ValueNode speculation, HotSpotProviders providers,
                    HotSpotVMConfig config) {
        BeginNode branch = hsailFrame.graph().add(new BeginNode());
        DynamicDeoptimizeNode deoptimization = hsailFrame.graph().add(new DynamicDeoptimizeNode(reasonAndAction, speculation));
        deoptimization.setStateBefore(createFrameState(deopt.getFrameState().topFrame, hsailFrame, providers, config));
        branch.setNext(deoptimization);
        return branch;
    }

    private static FrameState createFrameState(BytecodeFrame lowLevelFrame, ParameterNode hsailFrame, HotSpotProviders providers, HotSpotVMConfig config) {
        StructuredGraph hostGraph = hsailFrame.graph();
        ValueNode[] locals = new ValueNode[lowLevelFrame.numLocals];
        for (int i = 0; i < lowLevelFrame.numLocals; i++) {
            locals[i] = getNodeForValueFromFrame(lowLevelFrame.getLocalValue(i), hsailFrame, hostGraph, providers, config);
        }
        List<ValueNode> stack = new ArrayList<>(lowLevelFrame.numStack);
        for (int i = 0; i < lowLevelFrame.numStack; i++) {
            stack.add(getNodeForValueFromFrame(lowLevelFrame.getStackValue(i), hsailFrame, hostGraph, providers, config));
        }
        ValueNode[] locks = new ValueNode[lowLevelFrame.numLocks];
        MonitorIdNode[] monitorIds = new MonitorIdNode[lowLevelFrame.numLocks];
        for (int i = 0; i < lowLevelFrame.numLocks; i++) {
            HotSpotMonitorValue lockValue = (HotSpotMonitorValue) lowLevelFrame.getLockValue(i);
            locks[i] = getNodeForValueFromFrame(lockValue, hsailFrame, hostGraph, providers, config);
            monitorIds[i] = getMonitorIdForHotSpotMonitorValueFromFrame(lockValue, hsailFrame, hostGraph);
        }
        FrameState frameState = hostGraph.add(new FrameState(lowLevelFrame.getMethod(), lowLevelFrame.getBCI(), locals, stack, locks, monitorIds, lowLevelFrame.rethrowException, false));
        if (lowLevelFrame.caller() != null) {
            frameState.setOuterFrameState(createFrameState(lowLevelFrame.caller(), hsailFrame, providers, config));
        }
        return frameState;
    }

    @SuppressWarnings({"unused"})
    private static MonitorIdNode getMonitorIdForHotSpotMonitorValueFromFrame(HotSpotMonitorValue lockValue, ParameterNode hsailFrame, StructuredGraph hsailGraph) {
        if (lockValue.isEliminated()) {
            return null;
        }
        throw GraalInternalError.unimplemented();
    }

    private static ValueNode getNodeForValueFromFrame(Value localValue, ParameterNode hsailFrame, StructuredGraph hostGraph, HotSpotProviders providers, HotSpotVMConfig config) {
        ValueNode valueNode;
        if (localValue instanceof Constant) {
            valueNode = ConstantNode.forConstant((Constant) localValue, providers.getMetaAccess(), hostGraph);
        } else if (localValue instanceof VirtualObject) {
            throw GraalInternalError.unimplemented();
        } else if (localValue instanceof StackSlot) {
            throw GraalInternalError.unimplemented();
        } else if (localValue instanceof HotSpotMonitorValue) {
            HotSpotMonitorValue hotSpotMonitorValue = (HotSpotMonitorValue) localValue;
            return getNodeForValueFromFrame(hotSpotMonitorValue.getOwner(), hsailFrame, hostGraph, providers, config);
        } else if (localValue instanceof RegisterValue) {
            RegisterValue registerValue = (RegisterValue) localValue;
            int regNumber = registerValue.getRegister().number;
            valueNode = getNodeForRegisterFromFrame(regNumber, localValue.getKind(), hsailFrame, hostGraph, providers, config);
        } else if (Value.ILLEGAL.equals(localValue)) {
            valueNode = null;
        } else {
            throw GraalInternalError.shouldNotReachHere();
        }
        return valueNode;
    }

    private static ValueNode getNodeForRegisterFromFrame(int regNumber, Kind valueKind, ParameterNode hsailFrame, StructuredGraph hostGraph, HotSpotProviders providers, HotSpotVMConfig config) {
        ValueNode valueNode;
        LocationNode location;
        if (regNumber >= HSAIL.s0.number && regNumber <= HSAIL.s31.number) {
            int intSize = providers.getCodeCache().getTarget().arch.getSizeInBytes(Kind.Int);
            long offset = config.hsailFrameSaveAreaOffset + intSize * (regNumber - HSAIL.s0.number);
            location = ConstantLocationNode.create(FINAL_LOCATION, valueKind, offset, hostGraph);
        } else if (regNumber >= HSAIL.d0.number && regNumber <= HSAIL.d15.number) {
            int longSize = providers.getCodeCache().getTarget().arch.getSizeInBytes(Kind.Long);
            long offset = config.hsailFrameSaveAreaOffset + longSize * (regNumber - HSAIL.d0.number);
            LocationNode numSRegsLocation = ConstantLocationNode.create(FINAL_LOCATION, Kind.Byte, config.hsailFrameNumSRegOffset, hostGraph);
            ValueNode numSRegs = hostGraph.unique(new FloatingReadNode(hsailFrame, numSRegsLocation, null, StampFactory.forInteger(8, false)));
            numSRegs = SignExtendNode.convert(numSRegs, StampFactory.forKind(Kind.Byte));
            location = IndexedLocationNode.create(FINAL_LOCATION, valueKind, offset, numSRegs, hostGraph, 4);
        } else {
            throw GraalInternalError.shouldNotReachHere("unknown hsail register: " + regNumber);
        }
        valueNode = hostGraph.unique(new FloatingReadNode(hsailFrame, location, null, StampFactory.forKind(valueKind)));
        return valueNode;
    }

}