# HG changeset patch # User Doug Simon # Date 1397688272 -7200 # Node ID 66e3af78ea961fb4382200c268b50b3d8e5c489e # Parent 78f1a1a7062871b13a892057565e62a6690d66cc HSAIL: added safepoint support Contributed-by: Eric Caspole diff -r 78f1a1a70628 -r 66e3af78ea96 graal/com.oracle.graal.compiler.hsail/src/com/oracle/graal/compiler/hsail/HSAILNodeLIRBuilder.java --- a/graal/com.oracle.graal.compiler.hsail/src/com/oracle/graal/compiler/hsail/HSAILNodeLIRBuilder.java Wed Apr 16 22:54:48 2014 +0200 +++ b/graal/com.oracle.graal.compiler.hsail/src/com/oracle/graal/compiler/hsail/HSAILNodeLIRBuilder.java Thu Apr 17 00:44:32 2014 +0200 @@ -25,7 +25,6 @@ import com.oracle.graal.api.meta.*; import com.oracle.graal.compiler.gen.*; -import com.oracle.graal.debug.*; import com.oracle.graal.graph.*; import com.oracle.graal.lir.*; import com.oracle.graal.lir.hsail.*; @@ -63,11 +62,6 @@ } @Override - public void visitSafepointNode(SafepointNode i) { - Debug.log("visitSafePointNode unimplemented"); - } - - @Override public void emitNullCheck(ValueNode v, DeoptimizingNode deopting) { assert v.stamp() instanceof ObjectStamp; Variable obj = newVariable(Kind.Object); diff -r 78f1a1a70628 -r 66e3af78ea96 graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java --- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java Wed Apr 16 22:54:48 2014 +0200 +++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java Thu Apr 17 00:44:32 2014 +0200 @@ -61,7 +61,7 @@ import com.oracle.graal.lir.*; import com.oracle.graal.lir.asm.*; import com.oracle.graal.lir.hsail.*; -import com.oracle.graal.lir.hsail.HSAILControlFlow.DeoptimizeOp; +import com.oracle.graal.lir.hsail.HSAILControlFlow.DeoptimizingOp; import com.oracle.graal.lir.hsail.HSAILMove.AtomicGetAndAddOp; import com.oracle.graal.nodes.*; import com.oracle.graal.nodes.StructuredGraph.GuardsStage; @@ -443,6 +443,7 @@ HotSpotVMConfig config = getRuntime().getConfig(); boolean useHSAILDeoptimization = config.useHSAILDeoptimization; + boolean useHSAILSafepoints = config.useHSAILSafepoints; // see what graph nodes we have to see if we are using the thread register // if not, we don't have to emit the code that sets that up @@ -576,13 +577,23 @@ // Aliases for d17 RegisterValue d17_donorThreadIndex = HSAIL.d17.asValue(wordKind); + RegisterValue d17_safepointFlagAddrIndex = d17_donorThreadIndex; // Aliases for s34 RegisterValue s34_deoptOccurred = HSAIL.s34.asValue(Kind.Int); RegisterValue s34_donorThreadIndex = s34_deoptOccurred; asm.emitLoadKernelArg(d16_deoptInfo, asm.getDeoptInfoName(), "u64"); - asm.emitComment("// Check if a deopt has occurred and abort if true before doing any work"); + asm.emitComment("// Check if a deopt or safepoint has occurred and abort if true before doing any work"); + + if (useHSAILSafepoints) { + // Load address of _notice_safepoints field + asm.emitLoad(wordKind, d17_safepointFlagAddrIndex, new HSAILAddressValue(wordKind, d16_deoptInfo, config.hsailNoticeSafepointsOffset).toAddress()); + // Load int value from that field + asm.emitLoadAcquire(s34_deoptOccurred, new HSAILAddressValue(Kind.Int, d17_safepointFlagAddrIndex, 0).toAddress()); + asm.emitCompare(Kind.Int, s34_deoptOccurred, Constant.forInt(0), "ne", false, false); + asm.cbr(deoptInProgressLabel); + } asm.emitLoadAcquire(s34_deoptOccurred, new HSAILAddressValue(Kind.Int, d16_deoptInfo, config.hsailDeoptOccurredOffset).toAddress()); asm.emitCompare(Kind.Int, s34_deoptOccurred, Constant.forInt(0), "ne", false, false); asm.cbr(deoptInProgressLabel); @@ -693,11 +704,11 @@ AllocatableValue waveMathScratch1 = HSAIL.d18.asValue(wordKind); AllocatableValue waveMathScratch2 = HSAIL.d19.asValue(wordKind); - AllocatableValue actionAndReasonReg = HSAIL.s32.asValue(Kind.Int); - AllocatableValue codeBufferOffsetReg = HSAIL.s33.asValue(Kind.Int); + AllocatableValue actionAndReasonReg = HSAIL.actionAndReasonReg.asValue(Kind.Int); + AllocatableValue codeBufferOffsetReg = HSAIL.codeBufferOffsetReg.asValue(Kind.Int); AllocatableValue scratch32 = HSAIL.s34.asValue(Kind.Int); AllocatableValue workidreg = HSAIL.s35.asValue(Kind.Int); - AllocatableValue dregOopMapReg = HSAIL.s39.asValue(Kind.Int); + AllocatableValue dregOopMapReg = HSAIL.dregOopMapReg.asValue(Kind.Int); HSAILAddress deoptNextIndexAddr = new HSAILAddressValue(Kind.Int, scratch64, offsetToDeoptNextIndex).toAddress(); HSAILAddress neverRanArrayAddr = new HSAILAddressValue(Kind.Int, scratch64, offsetToNeverRanArray).toAddress(); @@ -833,7 +844,7 @@ compilationResult.setHostGraph(prepareHostGraph(method, lirGenRes.getDeopts(), getProviders(), config)); } - private static StructuredGraph prepareHostGraph(ResolvedJavaMethod method, List deopts, HotSpotProviders providers, HotSpotVMConfig config) { + private static StructuredGraph prepareHostGraph(ResolvedJavaMethod method, List deopts, HotSpotProviders providers, HotSpotVMConfig config) { if (deopts.isEmpty()) { return null; } @@ -847,12 +858,12 @@ int[] keySuccessors = new int[deopts.size() + 1]; double[] keyProbabilities = new double[deopts.size() + 1]; int i = 0; - Collections.sort(deopts, new Comparator() { - public int compare(DeoptimizeOp o1, DeoptimizeOp o2) { + Collections.sort(deopts, new Comparator() { + public int compare(DeoptimizingOp o1, DeoptimizingOp o2) { return o1.getCodeBufferPos() - o2.getCodeBufferPos(); } }); - for (DeoptimizeOp deopt : deopts) { + for (DeoptimizingOp deopt : deopts) { keySuccessors[i] = i; keyProbabilities[i] = 1.0 / deopts.size(); keys[i] = deopt.getCodeBufferPos(); @@ -881,7 +892,7 @@ return BeginNode.begin(vmError); } - private static BeginNode createHostDeoptBranch(DeoptimizeOp deopt, ParameterNode hsailFrame, ValueNode reasonAndAction, ValueNode speculation, HotSpotProviders providers, HotSpotVMConfig config) { + private static BeginNode createHostDeoptBranch(DeoptimizingOp deopt, ParameterNode hsailFrame, ValueNode reasonAndAction, ValueNode speculation, HotSpotProviders providers, HotSpotVMConfig config) { BeginNode branch = hsailFrame.graph().add(new BeginNode()); DynamicDeoptimizeNode deoptimization = hsailFrame.graph().add(new DynamicDeoptimizeNode(reasonAndAction, speculation)); deoptimization.setStateBefore(createFrameState(deopt.getFrameState().topFrame, hsailFrame, providers, config)); diff -r 78f1a1a70628 -r 66e3af78ea96 graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotLIRGenerationResult.java --- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotLIRGenerationResult.java Wed Apr 16 22:54:48 2014 +0200 +++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotLIRGenerationResult.java Thu Apr 17 00:44:32 2014 +0200 @@ -26,21 +26,21 @@ import com.oracle.graal.compiler.gen.*; import com.oracle.graal.lir.*; -import com.oracle.graal.lir.hsail.HSAILControlFlow.DeoptimizeOp; +import com.oracle.graal.lir.hsail.HSAILControlFlow.DeoptimizingOp; public class HSAILHotSpotLIRGenerationResult extends LIRGenerationResultBase { - private List deopts = new ArrayList<>(); + private List deopts = new ArrayList<>(); public HSAILHotSpotLIRGenerationResult(LIR lir, FrameMap frameMap) { super(lir, frameMap); } - public List getDeopts() { + public List getDeopts() { return deopts; } - public void addDeopt(DeoptimizeOp deopt) { + public void addDeopt(DeoptimizingOp deopt) { deopts.add(deopt); } diff -r 78f1a1a70628 -r 66e3af78ea96 graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotNodeLIRBuilder.java --- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotNodeLIRBuilder.java Wed Apr 16 22:54:48 2014 +0200 +++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotNodeLIRBuilder.java Thu Apr 17 00:44:32 2014 +0200 @@ -28,6 +28,7 @@ import com.oracle.graal.asm.*; import com.oracle.graal.compiler.gen.*; import com.oracle.graal.compiler.hsail.*; +import com.oracle.graal.debug.*; import com.oracle.graal.graph.*; import com.oracle.graal.hotspot.*; import com.oracle.graal.hotspot.HotSpotVMConfig.CompressEncoding; @@ -111,6 +112,19 @@ } @Override + public void visitSafepointNode(SafepointNode i) { + HotSpotVMConfig config = getGen().config; + if (config.useHSAILSafepoints == true) { + LIRFrameState info = gen.state(i); + HSAILHotSpotSafepointOp safepoint = new HSAILHotSpotSafepointOp(info, config, this); + ((HSAILHotSpotLIRGenerationResult) getGen().getResult()).addDeopt(safepoint); + append(safepoint); + } else { + Debug.log("HSAIL safepoints turned off"); + } + } + + @Override public void emitPrefetchAllocate(ValueNode address, ValueNode distance) { // nop } diff -r 78f1a1a70628 -r 66e3af78ea96 graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotSafepointOp.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotSafepointOp.java Thu Apr 17 00:44:32 2014 +0200 @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package com.oracle.graal.hotspot.hsail; + +import static com.oracle.graal.api.code.ValueUtil.*; + +import com.oracle.graal.api.code.*; +import com.oracle.graal.api.meta.*; +import com.oracle.graal.asm.hsail.*; +import com.oracle.graal.hotspot.*; +import com.oracle.graal.hsail.*; +import com.oracle.graal.lir.*; +import com.oracle.graal.lir.asm.*; +import com.oracle.graal.lir.hsail.*; +import com.oracle.graal.nodes.spi.*; + +/** + * Emits a safepoint deoptimization from HSA back to CPU. + */ +@Opcode("SAFEPOINT") +public class HSAILHotSpotSafepointOp extends HSAILLIRInstruction implements HSAILControlFlow.DeoptimizingOp { + private Constant actionAndReason; + @State protected LIRFrameState frameState; + protected int codeBufferPos = -1; + protected int dregOopMap = 0; + final int offsetToNoticeSafepoints; + + public HSAILHotSpotSafepointOp(LIRFrameState state, HotSpotVMConfig config, NodeLIRBuilderTool tool) { + actionAndReason = tool.getLIRGeneratorTool().getMetaAccess().encodeDeoptActionAndReason(DeoptimizationAction.None, DeoptimizationReason.None, 0); + frameState = state; + offsetToNoticeSafepoints = config.hsailNoticeSafepointsOffset; + } + + @Override + public void emitCode(CompilationResultBuilder crb, HSAILAssembler masm) { + + // get a unique codeBuffer position + // when we save our state, we will save this as well (it can be used as a key to get the + // debugInfo) + codeBufferPos = masm.position(); + + masm.emitComment(" /* HSAIL safepoint bci=" + frameState.debugInfo().getBytecodePosition().getBCI() + ", frameState=" + frameState + " */"); + String afterSafepointLabel = "@LAfterSafepoint_at_pos_" + codeBufferPos; + + AllocatableValue scratch64 = HSAIL.d16.asValue(Kind.Object); + AllocatableValue spAddrReg = HSAIL.d17.asValue(Kind.Object); + AllocatableValue scratch32 = HSAIL.s34.asValue(Kind.Int); + masm.emitLoadKernelArg(scratch64, masm.getDeoptInfoName(), "u64"); + + // Build address of noticeSafepoints field + HSAILAddress noticeSafepointsAddr = new HSAILAddressValue(Kind.Object, scratch64, offsetToNoticeSafepoints).toAddress(); + masm.emitLoad(Kind.Object, spAddrReg, noticeSafepointsAddr); + + // Load int value from that field + HSAILAddress noticeSafepointsIntAddr = new HSAILAddressValue(Kind.Int, spAddrReg, 0).toAddress(); + masm.emitLoadAcquire(scratch32, noticeSafepointsIntAddr); + masm.emitCompare(Kind.Int, scratch32, Constant.forInt(0), "eq", false, false); + masm.cbr(afterSafepointLabel); + + BytecodeFrame frame = frameState.debugInfo().frame(); + for (int i = 0; i < frame.numLocals + frame.numStack; i++) { + Value val = frame.values[i]; + if (isLegal(val) && isRegister(val)) { + Register reg = asRegister(val); + if (val.getKind() == Kind.Object) { + dregOopMap |= 1 << (reg.encoding()); + } + } + } + + AllocatableValue actionAndReasonReg = HSAIL.actionAndReasonReg.asValue(Kind.Int); + AllocatableValue codeBufferOffsetReg = HSAIL.codeBufferOffsetReg.asValue(Kind.Int); + AllocatableValue dregOopMapReg = HSAIL.dregOopMapReg.asValue(Kind.Int); + masm.emitMov(Kind.Int, actionAndReasonReg, actionAndReason); + masm.emitMov(Kind.Int, codeBufferOffsetReg, Constant.forInt(codeBufferPos)); + masm.emitMov(Kind.Int, dregOopMapReg, Constant.forInt(dregOopMap)); + masm.emitJumpToLabelName(masm.getDeoptLabelName()); + + masm.emitString0(afterSafepointLabel + ":\n"); + + // now record the debuginfo + crb.recordInfopoint(codeBufferPos, frameState, InfopointReason.SAFEPOINT); + } + + public LIRFrameState getFrameState() { + return frameState; + } + + public int getCodeBufferPos() { + return codeBufferPos; + } +} \ No newline at end of file diff -r 78f1a1a70628 -r 66e3af78ea96 graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotVMConfig.java --- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotVMConfig.java Wed Apr 16 22:54:48 2014 +0200 +++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotVMConfig.java Thu Apr 17 00:44:32 2014 +0200 @@ -1013,11 +1013,13 @@ @HotSpotVMField(name = "ThreadShadow::_pending_failed_speculation", type = "oop", get = HotSpotVMField.Type.OFFSET) @Stable public int pendingFailedSpeculationOffset; @HotSpotVMFlag(name = "UseHSAILDeoptimization") @Stable public boolean useHSAILDeoptimization; + @HotSpotVMFlag(name = "UseHSAILSafepoints") @Stable public boolean useHSAILSafepoints; /** * Offsets of Hsail deoptimization fields (defined in gpu_hsail.hpp). Used to propagate * exceptions from Hsail back to C++ runtime. */ + @HotSpotVMField(name = "Hsail::HSAILDeoptimizationInfo::_notice_safepoints", type = "jint*", get = HotSpotVMField.Type.OFFSET) @Stable public int hsailNoticeSafepointsOffset; @HotSpotVMField(name = "Hsail::HSAILDeoptimizationInfo::_deopt_save_states[0]", type = "Hsail::HSAILKernelDeoptimization", get = HotSpotVMField.Type.OFFSET) @Stable public int hsailSaveStatesOffset0; @HotSpotVMField(name = "Hsail::HSAILDeoptimizationInfo::_deopt_save_states[1]", type = "Hsail::HSAILKernelDeoptimization", get = HotSpotVMField.Type.OFFSET) @Stable public int hsailSaveStatesOffset1; @HotSpotVMField(name = "Hsail::HSAILDeoptimizationInfo::_deopt_occurred", type = "jint", get = HotSpotVMField.Type.OFFSET) @Stable public int hsailDeoptOccurredOffset; diff -r 78f1a1a70628 -r 66e3af78ea96 graal/com.oracle.graal.hsail/src/com/oracle/graal/hsail/HSAIL.java --- a/graal/com.oracle.graal.hsail/src/com/oracle/graal/hsail/HSAIL.java Wed Apr 16 22:54:48 2014 +0200 +++ b/graal/com.oracle.graal.hsail/src/com/oracle/graal/hsail/HSAIL.java Thu Apr 17 00:44:32 2014 +0200 @@ -132,7 +132,11 @@ public static final Register d18 = new Register(82, 18, "d18", CPU); public static final Register d19 = new Register(83, 19, "d19", CPU); public static final Register d20 = new Register(84, 20, "d20", CPU); + public static final Register threadRegister = d20; + public static final Register actionAndReasonReg = s32; + public static final Register codeBufferOffsetReg = s33; + public static final Register dregOopMapReg = s39; // @formatter:off public static final Register[] cRegisters = { diff -r 78f1a1a70628 -r 66e3af78ea96 graal/com.oracle.graal.lir.hsail/src/com/oracle/graal/lir/hsail/HSAILControlFlow.java --- a/graal/com.oracle.graal.lir.hsail/src/com/oracle/graal/lir/hsail/HSAILControlFlow.java Wed Apr 16 22:54:48 2014 +0200 +++ b/graal/com.oracle.graal.lir.hsail/src/com/oracle/graal/lir/hsail/HSAILControlFlow.java Thu Apr 17 00:44:32 2014 +0200 @@ -129,11 +129,17 @@ } } + public interface DeoptimizingOp { + public LIRFrameState getFrameState(); + + public int getCodeBufferPos(); + } + /*** * The ALIVE annotation is so we can get a scratch32 register that does not clobber * actionAndReason. */ - public static class DeoptimizeOp extends ReturnOp { + public static class DeoptimizeOp extends ReturnOp implements DeoptimizingOp { @Alive({REG, CONST}) protected Value actionAndReason; @State protected LIRFrameState frameState; @@ -173,6 +179,8 @@ // debugInfo) codeBufferPos = masm.position(); + masm.emitComment("/* HSAIL Deoptimization pos=" + codeBufferPos + ", bci=" + frameState.debugInfo().getBytecodePosition().getBCI() + ", frameState=" + frameState + " */"); + // get the bitmap of $d regs that contain references ReferenceMap referenceMap = frameState.debugInfo().getReferenceMap(); for (int dreg = HSAIL.d0.number; dreg <= HSAIL.d15.number; dreg++) { @@ -181,14 +189,9 @@ } } - // here we will by convention use some never-allocated registers to pass to the epilogue - // deopt code - // todo: define these in HSAIL.java - // we need to pass the actionAndReason and the codeBufferPos - - AllocatableValue actionAndReasonReg = HSAIL.s32.asValue(Kind.Int); - AllocatableValue codeBufferOffsetReg = HSAIL.s33.asValue(Kind.Int); - AllocatableValue dregOopMapReg = HSAIL.s39.asValue(Kind.Int); + AllocatableValue actionAndReasonReg = HSAIL.actionAndReasonReg.asValue(Kind.Int); + AllocatableValue codeBufferOffsetReg = HSAIL.codeBufferOffsetReg.asValue(Kind.Int); + AllocatableValue dregOopMapReg = HSAIL.dregOopMapReg.asValue(Kind.Int); masm.emitMov(Kind.Int, actionAndReasonReg, actionAndReason); masm.emitMov(Kind.Int, codeBufferOffsetReg, Constant.forInt(codeBufferPos)); masm.emitMov(Kind.Int, dregOopMapReg, Constant.forInt(dregOopMap)); diff -r 78f1a1a70628 -r 66e3af78ea96 make/linux/makefiles/buildtree.make --- a/make/linux/makefiles/buildtree.make Wed Apr 16 22:54:48 2014 +0200 +++ b/make/linux/makefiles/buildtree.make Thu Apr 17 00:44:32 2014 +0200 @@ -258,6 +258,10 @@ echo "$(call gamma-path,altsrc,os/$(OS_FAMILY)/vm) \\"; \ echo "$(call gamma-path,commonsrc,os/$(OS_FAMILY)/vm) \\"; \ echo "$(call gamma-path,commonsrc,os/posix/vm) \\"; \ + echo "$(call gamma-path,altsrc,gpu/ptx/vm) \\"; \ + echo "$(call gamma-path,commonsrc,gpu/ptx/vm)" \\; \ + echo "$(call gamma-path,altsrc,gpu/hsail/vm) \\"; \ + echo "$(call gamma-path,commonsrc,gpu/hsail/vm) \\"; \ echo "$(call gamma-path,altsrc,gpu) \\"; \ echo "$(call gamma-path,commonsrc,gpu)"; \ [ -n "$(CFLAGS_BROWSE)" ] && \ diff -r 78f1a1a70628 -r 66e3af78ea96 make/linux/makefiles/vm.make --- a/make/linux/makefiles/vm.make Wed Apr 16 22:54:48 2014 +0200 +++ b/make/linux/makefiles/vm.make Thu Apr 17 00:44:32 2014 +0200 @@ -185,8 +185,10 @@ GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/share/vm/graal) GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/gpu/ptx/vm) +GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/gpu/hsail/vm) GRAAL_PATHS += $(HS_COMMON_SRC)/share/vm/graal GRAAL_PATHS += $(HS_COMMON_SRC)/gpu/ptx/vm +GRAAL_PATHS += $(HS_COMMON_SRC)/gpu/hsail/vm # Include dirs per type. Src_Dirs/CORE := $(CORE_PATHS) diff -r 78f1a1a70628 -r 66e3af78ea96 src/gpu/hsail/vm/gpu_hsail.cpp --- a/src/gpu/hsail/vm/gpu_hsail.cpp Wed Apr 16 22:54:48 2014 +0200 +++ b/src/gpu/hsail/vm/gpu_hsail.cpp Thu Apr 17 00:44:32 2014 +0200 @@ -71,6 +71,7 @@ }; void * Hsail::_device_context = NULL; +jint Hsail::_notice_safepoints = false; Hsail::okra_create_context_func_t Hsail::_okra_create_context; Hsail::okra_create_kernel_func_t Hsail::_okra_create_kernel; @@ -122,6 +123,18 @@ static Stats kernelStats; +//static jint in_kernel = 0; + +void Hsail::notice_safepoints() { + _notice_safepoints = true; +// if (TraceGPUInteraction) { +// tty->print_cr("[HSAIL] Notice safepoint in_kernel=%d", in_kernel); +// } +} + +void Hsail::ignore_safepoints() { + _notice_safepoints = false; +} void Hsail::register_heap() { // After the okra functions are set up and the heap is initialized, register the java heap with HSA @@ -203,8 +216,7 @@ } - -jboolean Hsail::execute_kernel_void_1d_internal(address kernel, int dimX, jobject args, methodHandle& mh, nmethod *nm, jobject oops_save, +jboolean Hsail::execute_kernel_void_1d_internal(address kernel, int dimX, jobject args, methodHandle& mh, nmethod *nm, jobject oops_save, jobject donor_threads, int allocBytesPerWorkitem, TRAPS) { ResourceMark rm(THREAD); objArrayOop argsArray = (objArrayOop) JNIHandles::resolve(args); @@ -258,6 +270,9 @@ // This object sets up the kernel arguments HSAILKernelArguments hka((address) kernel, mh->signature(), argsArray, mh->is_static(), e); + if (TraceGPUInteraction) { + tty->print_cr("[HSAIL] range=%d", dimX); + } // if any object passed was null, throw an exception here // doing this means the kernel code can avoid null checks on the object parameters. @@ -273,8 +288,12 @@ // Run the kernel bool success = false; { - TraceTime t1("execute kernel", TraceGPUInteraction); + TraceTime t("execute kernel", TraceGPUInteraction); + + //in_kernel = 1; + // Run the kernel success = _okra_execute_with_range(kernel, dimX); + //in_kernel = 0; } // fix up any tlab tops that overflowed @@ -297,17 +316,23 @@ } if (UseHSAILDeoptimization) { + kernelStats.incDeopts(); // check if any workitem requested a deopt // currently we only support at most one such workitem int deoptcode = e->deopt_occurred(); - if (deoptcode != 0) { - if (deoptcode != 1) { + if (deoptcode != 1) { + if (deoptcode == 0) { + if (TraceGPUInteraction && _notice_safepoints != 0) { + tty->print_cr("[HSAIL] observed safepoint during kernel"); + } + } else { // error condition detected in deopt code char msg[200]; sprintf(msg, "deopt error detected, slot for workitem %d was not empty", -1 * (deoptcode + 1)); guarantee(deoptcode == 1, msg); } - kernelStats.incDeopts(); + } else { + { TraceTime t3("handle deoptimizing workitems", TraceGPUInteraction); if (TraceGPUInteraction) { @@ -402,48 +427,47 @@ tty->print_cr("[HSAIL] Deoptimizing to host completed for %d workitems", count_deoptimized); } } - - { - TraceTime t3("handle never-rans", TraceGPUInteraction); + } - // Handle any never_ran workitems if there were any - int count_never_ran = 0; - bool handleNeverRansHere = true; - // turn off verbose trace stuff for javacall arg setup - bool savedTraceGPUInteraction = TraceGPUInteraction; - TraceGPUInteraction = false; - jboolean *never_ran_array = e->never_ran_array(); - if (handleNeverRansHere) { - for (int k = 0; k < dimX; k++) { - if (never_ran_array[k]) { - // run it as a javaCall - KlassHandle methKlass = mh->method_holder(); - Thread* THREAD = Thread::current(); - JavaValue result(T_VOID); - JavaCallArguments javaArgs; - // re-resolve the args_handle here - objArrayOop resolvedArgsArray = (objArrayOop) JNIHandles::resolve(args); - // This object sets up the javaCall arguments - // the way argsArray is set up, this should work for instance methods as well - // (the receiver will be the first oop pushed) - HSAILJavaCallArguments hjca(&javaArgs, k, mh->signature(), resolvedArgsArray, mh->is_static()); - if (mh->is_static()) { - JavaCalls::call_static(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD); - } else { - JavaCalls::call_virtual(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD); - } - count_never_ran++; + // Handle any never_ran workitems if there were any + { + TraceTime t("handle never-rans ", TraceGPUInteraction); + int count_never_ran = 0; + bool handleNeverRansHere = true; + // turn off verbose trace stuff for javacall arg setup + bool savedTraceGPUInteraction = TraceGPUInteraction; + TraceGPUInteraction = false; + jboolean *never_ran_array = e->never_ran_array(); + if (handleNeverRansHere) { + for (int k = 0; k < dimX; k++) { + if (never_ran_array[k]) { + // run it as a javaCall + KlassHandle methKlass = mh->method_holder(); + Thread* THREAD = Thread::current(); + JavaValue result(T_VOID); + JavaCallArguments javaArgs; + // re-resolve the args_handle here + objArrayOop resolvedArgsArray = (objArrayOop) JNIHandles::resolve(args); + // This object sets up the javaCall arguments + // the way argsArray is set up, this should work for instance methods as well + // (the receiver will be the first oop pushed) + HSAILJavaCallArguments hjca(&javaArgs, k, mh->signature(), resolvedArgsArray, mh->is_static()); + if (mh->is_static()) { + JavaCalls::call_static(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD); + } else { + JavaCalls::call_virtual(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD); } + count_never_ran++; } - TraceGPUInteraction = savedTraceGPUInteraction; - if (TraceGPUInteraction) { - tty->print_cr("%d workitems never ran, have been run via JavaCall", count_never_ran); - showRanges(never_ran_array, dimX); - } - } // end of never-ran handling - } + } + TraceGPUInteraction = savedTraceGPUInteraction; + if (TraceGPUInteraction) { + tty->print_cr("%d workitems never ran, have been run via JavaCall", count_never_ran); + showRanges(never_ran_array, dimX); + } + } // end of never-ran handling } - + FREE_C_HEAP_ARRAY(jboolean, e->never_ran_array(), mtInternal); delete e; } @@ -547,7 +571,7 @@ LOOKUP_OKRA_FUNCTION(okra_register_heap, okra_register_heap); // if we made it this far, real success - gpu::initialized_gpu("Okra"); + Gpu::initialized_gpu(new Hsail()); return true; GPU_END diff -r 78f1a1a70628 -r 66e3af78ea96 src/gpu/hsail/vm/gpu_hsail.hpp --- a/src/gpu/hsail/vm/gpu_hsail.hpp Wed Apr 16 22:54:48 2014 +0200 +++ b/src/gpu/hsail/vm/gpu_hsail.hpp Thu Apr 17 00:44:32 2014 +0200 @@ -27,11 +27,9 @@ #include "utilities/exceptions.hpp" #include "graal/graalEnv.hpp" -// #include "graal/graalCodeInstaller.hpp" #include "gpu_hsail_Frame.hpp" -class Hsail { - friend class gpu; +class Hsail : public Gpu { public: class HSAILKernelDeoptimization { @@ -58,6 +56,7 @@ class HSAILDeoptimizationInfo : public ResourceObj { friend class VMStructs; private: + jint* _notice_safepoints; jint _deopt_occurred; jint _deopt_next_index; JavaThread** _donor_threads; @@ -67,12 +66,12 @@ HSAILKernelDeoptimization _deopt_save_states[MAX_DEOPT_SAVE_STATES_SIZE]; inline HSAILDeoptimizationInfo() { + _notice_safepoints = &Hsail::_notice_safepoints; _deopt_occurred = 0; _deopt_next_index = 0; } inline jint deopt_occurred() { - // Check that hsail did not write in the wrong place return _deopt_occurred; } inline jint num_deopts() { return _deopt_next_index; } @@ -102,8 +101,8 @@ static void getNewTlabForDonorThread(ThreadLocalAllocBuffer* tlab, size_t tlabMinHsail); static jboolean execute_kernel_void_1d_internal(address kernel, int dimX, jobject args, methodHandle& mh, nmethod *nm, jobject oopsSave, - jobject donorThreads, int allocBytesPerWorkitem, TRAPS); - + jobject donor_threads, int allocBytesPerWorkitem, TRAPS); + static void register_heap(); static GraalEnv::CodeInstallResult install_code(Handle& compiled_code, CodeBlob*& cb, Handle installed_code, Handle triggered_deoptimizations); @@ -113,6 +112,11 @@ // Registers the implementations for the native methods in HSAILHotSpotBackend static bool register_natives(JNIEnv* env); + virtual const char* name() { return "HSAIL"; } + + virtual void notice_safepoints(); + virtual void ignore_safepoints(); + #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) typedef unsigned long long CUdeviceptr; #else @@ -149,5 +153,8 @@ protected: static void* _device_context; + + // true if safepoints are activated + static jint _notice_safepoints; }; #endif // GPU_HSAIL_HPP diff -r 78f1a1a70628 -r 66e3af78ea96 src/gpu/hsail/vm/vmStructs_hsail.hpp --- a/src/gpu/hsail/vm/vmStructs_hsail.hpp Wed Apr 16 22:54:48 2014 +0200 +++ b/src/gpu/hsail/vm/vmStructs_hsail.hpp Thu Apr 17 00:44:32 2014 +0200 @@ -41,6 +41,7 @@ nonstatic_field(Hsail::HSAILKernelDeoptimization, _actionAndReason, jint) \ nonstatic_field(Hsail::HSAILKernelDeoptimization, _first_frame, HSAILFrame) \ \ + nonstatic_field(Hsail::HSAILDeoptimizationInfo, _notice_safepoints, jint*) \ nonstatic_field(Hsail::HSAILDeoptimizationInfo, _deopt_occurred, jint) \ nonstatic_field(Hsail::HSAILDeoptimizationInfo, _deopt_next_index, jint) \ nonstatic_field(Hsail::HSAILDeoptimizationInfo, _donor_threads, JavaThread**) \ diff -r 78f1a1a70628 -r 66e3af78ea96 src/gpu/ptx/vm/gpu_ptx.cpp --- a/src/gpu/ptx/vm/gpu_ptx.cpp Wed Apr 16 22:54:48 2014 +0200 +++ b/src/gpu/ptx/vm/gpu_ptx.cpp Thu Apr 17 00:44:32 2014 +0200 @@ -271,7 +271,7 @@ tty->print_cr("[CUDA] Success: Created context for device: %d", _cu_device); } - gpu::initialized_gpu(device_name); + Gpu::initialized_gpu(new Ptx()); return true; GPU_END diff -r 78f1a1a70628 -r 66e3af78ea96 src/gpu/ptx/vm/gpu_ptx.hpp --- a/src/gpu/ptx/vm/gpu_ptx.hpp Wed Apr 16 22:54:48 2014 +0200 +++ b/src/gpu/ptx/vm/gpu_ptx.hpp Thu Apr 17 00:44:32 2014 +0200 @@ -93,7 +93,7 @@ #define GRAAL_SUPPORTED_COMPUTE_CAPABILITY_VERSION 3.0 -class Ptx { +class Ptx : public Gpu { friend class PtxCall; private: @@ -120,6 +120,9 @@ static int ncores(int major, int minor); public: + + virtual const char* name() { return "PTX"; } + // Registers the implementations for the native methods in PTXHotSpotBackend static bool register_natives(JNIEnv* env); diff -r 78f1a1a70628 -r 66e3af78ea96 src/os/bsd/vm/gpu_bsd.cpp --- a/src/os/bsd/vm/gpu_bsd.cpp Wed Apr 16 22:54:48 2014 +0200 +++ b/src/os/bsd/vm/gpu_bsd.cpp Thu Apr 17 00:44:32 2014 +0200 @@ -27,7 +27,7 @@ #include "hsail/vm/gpu_hsail.hpp" #include "utilities/ostream.hpp" -jobject gpu::probe_gpus(JNIEnv* env) { +jobject Gpu::probe_gpus(JNIEnv* env) { #ifdef __APPLE__ /* * Let the CUDA driver initialization be the gate to GPU for now, pending diff -r 78f1a1a70628 -r 66e3af78ea96 src/os/linux/vm/gpu_linux.cpp --- a/src/os/linux/vm/gpu_linux.cpp Wed Apr 16 22:54:48 2014 +0200 +++ b/src/os/linux/vm/gpu_linux.cpp Thu Apr 17 00:44:32 2014 +0200 @@ -38,7 +38,7 @@ #define PCI_DRIVER_NAME_START_POS 255 -jobject gpu::probe_gpus(JNIEnv* env) { +jobject Gpu::probe_gpus(JNIEnv* env) { bool hsail = false; bool ptx = false; diff -r 78f1a1a70628 -r 66e3af78ea96 src/os/windows/vm/gpu_windows.cpp --- a/src/os/windows/vm/gpu_windows.cpp Wed Apr 16 22:54:48 2014 +0200 +++ b/src/os/windows/vm/gpu_windows.cpp Thu Apr 17 00:44:32 2014 +0200 @@ -27,7 +27,7 @@ #include "hsail/vm/gpu_hsail.hpp" #include "utilities/ostream.hpp" -jobject gpu::probe_gpus(JNIEnv* env) { +jobject Gpu::probe_gpus(JNIEnv* env) { // TODO: add detection of PTX/NVidia if (Hsail::register_natives(env)) { return env->NewStringUTF("HSAIL"); diff -r 78f1a1a70628 -r 66e3af78ea96 src/share/vm/graal/graalCompilerToVM.cpp --- a/src/share/vm/graal/graalCompilerToVM.cpp Wed Apr 16 22:54:48 2014 +0200 +++ b/src/share/vm/graal/graalCompilerToVM.cpp Thu Apr 17 00:44:32 2014 +0200 @@ -757,7 +757,7 @@ C2V_ENTRY(jobject, getGPUs, (JNIEnv *env, jobject)) #if defined(TARGET_OS_FAMILY_bsd) || defined(TARGET_OS_FAMILY_linux) || defined(TARGET_OS_FAMILY_windows) - return gpu::probe_gpus(env); + return Gpu::probe_gpus(env); #else return env->NewStringUTF(""); #endif diff -r 78f1a1a70628 -r 66e3af78ea96 src/share/vm/graal/graalGlobals.hpp --- a/src/share/vm/graal/graalGlobals.hpp Wed Apr 16 22:54:48 2014 +0200 +++ b/src/share/vm/graal/graalGlobals.hpp Thu Apr 17 00:44:32 2014 +0200 @@ -88,6 +88,9 @@ product(bool, UseHSAILDeoptimization, true, \ "Code gen and runtime support for deoptimizing HSAIL kernels") \ \ + product(bool, UseHSAILSafepoints, true, \ + "Code gen and runtime support for safepoints in HSAIL kernels") \ + \ product(bool, GPUOffload, false, \ "Offload execution to GPU whenever possible") \ \ diff -r 78f1a1a70628 -r 66e3af78ea96 src/share/vm/runtime/gpu.cpp --- a/src/share/vm/runtime/gpu.cpp Wed Apr 16 22:54:48 2014 +0200 +++ b/src/share/vm/runtime/gpu.cpp Thu Apr 17 00:44:32 2014 +0200 @@ -26,11 +26,24 @@ #include "runtime/gpu.hpp" #include "runtime/handles.hpp" -int gpu::_initialized_gpus = 0; +int Gpu::_initialized_gpus_count = 0; +Gpu* Gpu::_initialized_gpus[MAX_GPUS]; -void gpu::initialized_gpu(const char* name) { - _initialized_gpus++; - if (TraceGPUInteraction) { - tty->print_cr("[GPU] registered initialization of %s (total initialized: %d)", name, _initialized_gpus); +void Gpu::initialized_gpu(Gpu* gpu) { + // GPUs are always initialized on the same thread so no need for locking + guarantee(_initialized_gpus_count < MAX_GPUS, "oob"); + if (TraceGPUInteraction) { + tty->print_cr("[GPU] registered initialization of %s (total initialized: %d)", gpu->name(), _initialized_gpus); + } + _initialized_gpus[_initialized_gpus_count++] = gpu; +} + +void Gpu::safepoint_event(SafepointEvent event) { + for (int i = 0; i < _initialized_gpus_count; i++) { + if (event == SafepointBegin) { + _initialized_gpus[i]->notice_safepoints(); + } else { + _initialized_gpus[i]->ignore_safepoints(); } + } } diff -r 78f1a1a70628 -r 66e3af78ea96 src/share/vm/runtime/gpu.hpp --- a/src/share/vm/runtime/gpu.hpp Wed Apr 16 22:54:48 2014 +0200 +++ b/src/share/vm/runtime/gpu.hpp Thu Apr 17 00:44:32 2014 +0200 @@ -29,21 +29,41 @@ #include "oops/symbol.hpp" #include "utilities/array.hpp" +#define MAX_GPUS 2 + // Defines the interface to the graphics processor(s). -class gpu : AllStatic { +class Gpu { private: - static int _initialized_gpus; // number of initialize GPU devices + static int _initialized_gpus_count; + static Gpu* _initialized_gpus[MAX_GPUS]; public: // Notification of a GPU device that has been initialized. - static void initialized_gpu(const char* name); + static void initialized_gpu(Gpu* gpu); // Gets a comma separated list of supported GPU architecture names. static jobject probe_gpus(JNIEnv* env); // Gets the number of GPU devices that have been initialized. - static int initialized_gpus() { return _initialized_gpus; } + static int initialized_gpus() { return _initialized_gpus_count; } + + enum SafepointEvent { + SafepointBegin, + SafepointEnd + }; + + // Called when a safepoint has been activated. + static void safepoint_event(SafepointEvent event); + + // Name of this GPU + virtual const char* name() = 0; + + // Called when a safepoint has been activated. + virtual void notice_safepoints() {}; + + // Called when a safepoint has been deactivated. + virtual void ignore_safepoints() {}; }; #endif // SHARE_VM_RUNTIME_GPU_HPP diff -r 78f1a1a70628 -r 66e3af78ea96 src/share/vm/runtime/safepoint.cpp --- a/src/share/vm/runtime/safepoint.cpp Wed Apr 16 22:54:48 2014 +0200 +++ b/src/share/vm/runtime/safepoint.cpp Thu Apr 17 00:44:32 2014 +0200 @@ -39,6 +39,7 @@ #include "runtime/compilationPolicy.hpp" #include "runtime/deoptimization.hpp" #include "runtime/frame.inline.hpp" +#include "runtime/gpu.hpp" #include "runtime/interfaceSupport.hpp" #include "runtime/mutexLocker.hpp" #include "runtime/osThread.hpp" @@ -205,6 +206,12 @@ os::make_polling_page_unreadable(); } +#ifdef GRAAL + if (UseHSAILSafepoints) { + Gpu::safepoint_event(Gpu::SafepointBegin); + } +#endif + // Consider using active_processor_count() ... but that call is expensive. int ncpus = os::processor_count() ; @@ -438,6 +445,12 @@ // Remove safepoint check from interpreter Interpreter::ignore_safepoints(); +#ifdef GRAAL + if (UseHSAILSafepoints) { + Gpu::safepoint_event(Gpu::SafepointEnd); + } +#endif + { MutexLocker mu(Safepoint_lock);