# HG changeset patch # User bharadwaj # Date 1378867190 14400 # Node ID 91e5f927af63a32521c390e1ec28f7263314dacb # Parent 003be97acddad8474a04c41e1698437378afe4c7 Initial implementation of PTXRuntime (RegisterConfig, PTX description etc); guarded with new flag UseGPU. Specify -XX:+UseGPU to exercise this new implementation. diff -r 003be97acdda -r 91e5f927af63 graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/FloatPTXTest.java --- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/FloatPTXTest.java Tue Sep 10 16:33:28 2013 -0700 +++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/FloatPTXTest.java Tue Sep 10 22:39:50 2013 -0400 @@ -37,6 +37,8 @@ if (r.getTargetCode() == null) { printReport("Compilation of testAdd2F FAILED"); } + + /* r = compile("testAdd2D"); if (r.getTargetCode() == null) { printReport("Compilation of testAdd2D FAILED"); @@ -58,6 +60,7 @@ if (r.getTargetCode() == null) { printReport("Compilation of testConstD FAILED"); } + */ } public static float testAdd2F(float a, float b) { @@ -84,6 +87,7 @@ return 32.0 + a; } + @Ignore @Test public void testSub() { CompilationResult r = compile("testSub2F"); @@ -141,6 +145,7 @@ return 32.0 - a; } + @Ignore @Test public void testMul() { CompilationResult r = compile("testMul2F"); @@ -198,6 +203,7 @@ return 32.0 * a; } + @Ignore @Test public void testDiv() { CompilationResult r = compile("testDiv2F"); @@ -255,6 +261,7 @@ return 32.0 / a; } + @Ignore @Test public void testNeg() { CompilationResult r = compile("testNeg2F"); @@ -276,6 +283,7 @@ return -a; } + @Ignore @Test public void testRem() { // need linkage to PTX remainder() diff -r 003be97acdda -r 91e5f927af63 graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/IntegerPTXTest.java --- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/IntegerPTXTest.java Tue Sep 10 16:33:28 2013 -0700 +++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/IntegerPTXTest.java Tue Sep 10 22:39:50 2013 -0400 @@ -32,7 +32,16 @@ @Test public void testAdd() { - Integer r4 = (Integer) invoke(compile("testAdd2I"), 18, 24); + Integer r4 = (Integer) invoke(compile("testAdd2B"), (byte) 6, (byte) 4); + if (r4 == null) { + printReport("testAdd2B FAILED"); + } else if (r4.intValue() == testAdd2B((byte) 6, (byte) 4)) { + printReport("testAdd2B PASSED"); + } else { + printReport("testAdd2B FAILED"); + } + + r4 = (Integer) invoke(compile("testAdd2I"), 18, 24); if (r4 == null) { printReport("testAdd2I FAILED"); } else if (r4.intValue() == testAdd2I(18, 24)) { @@ -50,8 +59,6 @@ printReport("testAdd2L FAILED"); } - //invoke(compile("testAdd2B"), (byte) 6, (byte) 4); - r4 = (Integer) invoke(compile("testAddIConst"), 5); if (r4 == null) { printReport("testAddIConst FAILED"); @@ -69,7 +76,6 @@ } else { printReport("testAddConstI FAILED"); } - } public static int testAdd2I(int a, int b) { @@ -288,7 +294,7 @@ public static long testRem2L(long a, long b) { return a % b; } - @Ignore + @Test public void testIntConversion() { Long r1 = (Long) invoke(compile("testI2L"), 8); diff -r 003be97acdda -r 91e5f927af63 graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java --- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java Tue Sep 10 16:33:28 2013 -0700 +++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java Tue Sep 10 22:39:50 2013 -0400 @@ -132,8 +132,6 @@ // Start emiting body of the PTX kernel. codeBuffer.emitString0(") {"); codeBuffer.emitString(""); - - codeBuffer.emitString(".reg .u64" + " %rax;"); } // Emit .reg space declarations @@ -144,6 +142,8 @@ final SortedSet signed32 = new TreeSet<>(); final SortedSet signed64 = new TreeSet<>(); + final SortedSet float32 = new TreeSet<>(); + final SortedSet float64 = new TreeSet<>(); ValueProcedure trackRegisterKind = new ValueProcedure() { @@ -159,6 +159,12 @@ case Long: signed64.add(regVal.getRegister().encoding()); break; + case Float: + float32.add(regVal.getRegister().encoding()); + break; + case Double: + float64.add(regVal.getRegister().encoding()); + break; default : throw GraalInternalError.shouldNotReachHere("unhandled register type " + value.toString()); } @@ -179,6 +185,12 @@ for (Integer i : signed64) { codeBuffer.emitString(".reg .s64 %r" + i.intValue() + ";"); } + for (Integer i : float32) { + codeBuffer.emitString(".reg .f32 %r" + i.intValue() + ";"); + } + for (Integer i : float64) { + codeBuffer.emitString(".reg .f64 %r" + i.intValue() + ";"); + } } @Override diff -r 003be97acdda -r 91e5f927af63 graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXLIRGenerator.java --- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXLIRGenerator.java Tue Sep 10 16:33:28 2013 -0700 +++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXLIRGenerator.java Tue Sep 10 22:39:50 2013 -0400 @@ -123,7 +123,7 @@ // Additional argument for return value. Value[] params = new Value[argCount + 1]; for (int i = 0; i < argCount; i++) { - params[i] = toParamKind(incomingArguments.getArgument(i)); + params[i] = incomingArguments.getArgument(i); } // Add the return value as the last parameter. params[argCount] = incomingArguments.getReturn(); @@ -131,7 +131,6 @@ append(new PTXParameterOp(params)); for (LocalNode local : graph.getNodes(LocalNode.class)) { Value param = params[local.index()]; - assert param.getKind() == local.kind().getStackKind(); setResult(local, emitLoadParam(param.getKind(), param, null)); } } diff -r 003be97acdda -r 91e5f927af63 graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotBackend.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotBackend.java Tue Sep 10 22:39:50 2013 -0400 @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package com.oracle.graal.hotspot.ptx; + +import com.oracle.graal.api.code.*; +import com.oracle.graal.api.meta.*; +import com.oracle.graal.asm.*; +import com.oracle.graal.compiler.gen.*; +import com.oracle.graal.hotspot.*; +import com.oracle.graal.hotspot.meta.*; +import com.oracle.graal.lir.*; +import com.oracle.graal.lir.asm.*; +import com.oracle.graal.nodes.*; + +/** + * HotSpot PTX specific backend. + */ +public class PTXHotSpotBackend extends HotSpotBackend { + + public PTXHotSpotBackend(HotSpotRuntime runtime, TargetDescription target) { + super(runtime, target); + } + + @Override + public FrameMap newFrameMap() { + throw new InternalError("NYI"); + } + + @Override + public TargetMethodAssembler newAssembler(LIRGenerator lirGen, CompilationResult compilationResult) { + throw new InternalError("NYI"); + } + + @Override + public void emitCode(TargetMethodAssembler tasm, LIRGenerator lirGen, ResolvedJavaMethod installedCodeOwner) { + throw new InternalError("NYI"); + } + + @Override + protected AbstractAssembler createAssembler(FrameMap frameMap) { + throw new InternalError("NYI"); + } + + @Override + public LIRGenerator newLIRGenerator(StructuredGraph graph, FrameMap frameMap, CallingConvention cc, LIR lir) { + throw new InternalError("NYI"); + } +} diff -r 003be97acdda -r 91e5f927af63 graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotGraalRuntime.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotGraalRuntime.java Tue Sep 10 22:39:50 2013 -0400 @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package com.oracle.graal.hotspot.ptx; + +import com.oracle.graal.ptx.*; +import com.oracle.graal.api.code.*; +import com.oracle.graal.api.meta.*; +import com.oracle.graal.hotspot.*; +import com.oracle.graal.hotspot.meta.*; + +/** + * PTX specific implementation of {@link HotSpotGraalRuntime}. + */ +public class PTXHotSpotGraalRuntime extends HotSpotGraalRuntime { + + protected PTXHotSpotGraalRuntime() { + } + + /** + * Called from C++ code to retrieve the singleton instance, creating it first if necessary. + */ + public static HotSpotGraalRuntime makeInstance() { + HotSpotGraalRuntime graalRuntime = graalRuntime(); + if (graalRuntime == null) { + HotSpotGraalRuntimeFactory factory = findFactory("PTX"); + if (factory != null) { + graalRuntime = factory.createRuntime(); + } else { + graalRuntime = new PTXHotSpotGraalRuntime(); + } + graalRuntime.completeInitialization(); + } + return graalRuntime; + } + + protected Architecture createArchitecture() { + return new PTX(); + } + + @Override + protected TargetDescription createTarget() { + final int stackFrameAlignment = 16; + final int implicitNullCheckLimit = 4096; + final boolean inlineObjects = true; + return new TargetDescription(createArchitecture(), true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects); + } + + @Override + protected HotSpotBackend createBackend() { + return new PTXHotSpotBackend(getRuntime(), getTarget()); + } + + @Override + protected HotSpotRuntime createRuntime() { + return new PTXHotSpotRuntime(config, this); + } + + @Override + protected Value[] getNativeABICallerSaveRegisters() { + throw new InternalError("NYI"); + } +} diff -r 003be97acdda -r 91e5f927af63 graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotRegisterConfig.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotRegisterConfig.java Tue Sep 10 22:39:50 2013 -0400 @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package com.oracle.graal.hotspot.ptx; + +import static com.oracle.graal.ptx.PTX.*; + +import java.util.*; + +import com.oracle.graal.ptx.*; +import com.oracle.graal.api.code.*; +import com.oracle.graal.api.code.CallingConvention.Type; +import com.oracle.graal.api.meta.*; +import com.oracle.graal.graph.*; + +public class PTXHotSpotRegisterConfig implements RegisterConfig { + + private final Architecture architecture; + + private final Register[] allocatable; + + private final HashMap categorized = new HashMap<>(); + + private final RegisterAttributes[] attributesMap; + + @Override + public Register[] getAllocatableRegisters() { + return allocatable.clone(); + } + + public Register[] getAllocatableRegisters(PlatformKind kind) { + if (categorized.containsKey(kind)) { + return categorized.get(kind); + } + + ArrayList list = new ArrayList<>(); + for (Register reg : getAllocatableRegisters()) { + if (architecture.canStoreValue(reg.getRegisterCategory(), kind)) { + list.add(reg); + } + } + + Register[] ret = list.toArray(new Register[0]); + categorized.put(kind, ret); + return ret; + } + + @Override + public RegisterAttributes[] getAttributesMap() { + return attributesMap.clone(); + } + + private final Register[] javaGeneralParameterRegisters; + private final Register[] nativeGeneralParameterRegisters; + + private static Register[] initAllocatable() { + Register[] registers = new Register[] { + param0, param1, param2, param3, + param4, param5, param6, param7, + r0, r1, r2, r3, r4, r5, r6, r7, + r8, r9, r10, r11, r12, r13, r14, r15, + retReg, + }; + + return registers; + } + + public PTXHotSpotRegisterConfig(Architecture architecture) { + this.architecture = architecture; + + javaGeneralParameterRegisters = paramRegisters; + nativeGeneralParameterRegisters = gprRegisters; + + allocatable = initAllocatable(); + attributesMap = RegisterAttributes.createMap(this, PTX.allRegisters); + } + + @Override + public Register[] getCallerSaveRegisters() { + // No caller save registers; return empty array + return new Register[]{}; + } + + @Override + public Register getRegisterForRole(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public CallingConvention getCallingConvention(Type type, JavaType returnType, JavaType[] parameterTypes, TargetDescription target, boolean stackOnly) { + if (type == Type.NativeCall) { + return callingConvention(nativeGeneralParameterRegisters, returnType, parameterTypes, type, target, stackOnly); + } + return callingConvention(javaGeneralParameterRegisters, returnType, parameterTypes, type, target, stackOnly); + } + + public Register[] getCallingConventionRegisters(Type type, Kind kind) { + assert architecture.canStoreValue(REG, kind); + return type == Type.NativeCall ? nativeGeneralParameterRegisters : javaGeneralParameterRegisters; + } + + private CallingConvention callingConvention(Register[] generalParameterRegisters, JavaType returnType, JavaType[] parameterTypes, Type type, TargetDescription target, boolean stackOnly) { + AllocatableValue[] locations = new AllocatableValue[parameterTypes.length]; + + int currentGeneral = 0; + int currentStackOffset = 0; + + for (int i = 0; i < parameterTypes.length; i++) { + final Kind kind = parameterTypes[i].getKind(); + + switch (kind) { + case Byte: + case Boolean: + case Short: + case Char: + case Int: + case Long: + case Float: + case Double: + case Object: + if (!stackOnly && currentGeneral < generalParameterRegisters.length) { + Register register = generalParameterRegisters[currentGeneral++]; + locations[i] = register.asValue(kind); + } + break; + default: + throw GraalInternalError.shouldNotReachHere(); + } + + if (locations[i] == null) { + locations[i] = StackSlot.get(kind.getStackKind(), currentStackOffset, !type.out); + currentStackOffset += Math.max(target.arch.getSizeInBytes(kind), target.wordSize); + } + } + + Kind returnKind = returnType == null ? Kind.Void : returnType.getKind(); + AllocatableValue returnLocation = returnKind == Kind.Void ? Value.ILLEGAL : getReturnRegister(returnKind).asValue(returnKind); + return new CallingConvention(currentStackOffset, returnLocation, locations); + } + + @Override + public Register getReturnRegister(Kind kind) { + switch (kind) { + case Boolean: + case Byte: + case Char: + case Short: + case Int: + case Long: + case Object: + case Float: + case Double: + return retReg; + case Void: + case Illegal: + return null; + default: + throw new UnsupportedOperationException("no return register for type " + kind); + } + } + + @Override + public Register getFrameRegister() { + // No frame register + return null; + } + + public CalleeSaveLayout getCalleeSaveLayout() { + return null; + } + + @Override + public String toString() { + return String.format("Allocatable: " + Arrays.toString(getAllocatableRegisters()) + "%n" + "CallerSave: " + Arrays.toString(getCallerSaveRegisters()) + "%n"); + } +} diff -r 003be97acdda -r 91e5f927af63 graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotRuntime.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotRuntime.java Tue Sep 10 22:39:50 2013 -0400 @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package com.oracle.graal.hotspot.ptx; + +import static com.oracle.graal.ptx.PTX.*; + +import com.oracle.graal.api.code.*; +import com.oracle.graal.hotspot.*; +import com.oracle.graal.hotspot.meta.*; +import com.oracle.graal.nodes.spi.*; + +public class PTXHotSpotRuntime extends HotSpotRuntime { + + public PTXHotSpotRuntime(HotSpotVMConfig config, HotSpotGraalRuntime graalRuntime) { + super(config, graalRuntime); + + } + + @Override + public void registerReplacements(Replacements replacements) { + //TODO: Do we need to implement this functionality for PTX? + } + + // PTX code does not use stack or stack pointer + @Override + public Register stackPointerRegister() { + return Register.None; + } + + // PTX code does not have heap register + @Override + public Register heapBaseRegister() { + return Register.None; + } + + // Thread register is %tid. + @Override + public Register threadRegister() { + return tid; + } + + @Override + protected RegisterConfig createRegisterConfig() { + return new PTXHotSpotRegisterConfig(graalRuntime.getTarget().arch); + } +} diff -r 003be97acdda -r 91e5f927af63 graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMemOp.java --- a/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMemOp.java Tue Sep 10 16:33:28 2013 -0700 +++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMemOp.java Tue Sep 10 22:39:50 2013 -0400 @@ -208,6 +208,12 @@ case Long: masm.ld_return_address("u64", asRegister(result), addr.getBase(), addr.getDisplacement()); break; + case Float: + masm.ld_return_address("f32", asRegister(result), addr.getBase(), addr.getDisplacement()); + break; + case Double: + masm.ld_return_address("f64", asRegister(result), addr.getBase(), addr.getDisplacement()); + break; default: throw GraalInternalError.shouldNotReachHere(); } diff -r 003be97acdda -r 91e5f927af63 graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXParameterOp.java --- a/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXParameterOp.java Tue Sep 10 16:33:28 2013 -0700 +++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXParameterOp.java Tue Sep 10 22:39:50 2013 -0400 @@ -48,6 +48,9 @@ for (int i = 0; i < argCount; i++) { Kind paramKind = params[i].getKind(); switch (paramKind) { + case Byte : + masm.param_8_decl(asRegister(params[i]), (i == (argCount - 1))); + break; case Int : masm.param_32_decl(asIntReg(params[i]), (i == (argCount - 1))); break; diff -r 003be97acdda -r 91e5f927af63 graal/com.oracle.graal.ptx/src/com/oracle/graal/ptx/PTX.java --- a/graal/com.oracle.graal.ptx/src/com/oracle/graal/ptx/PTX.java Tue Sep 10 16:33:28 2013 -0700 +++ b/graal/com.oracle.graal.ptx/src/com/oracle/graal/ptx/PTX.java Tue Sep 10 22:39:50 2013 -0400 @@ -35,83 +35,167 @@ */ public class PTX extends Architecture { - public static final RegisterCategory CPU = new RegisterCategory("CPU"); - public static final RegisterCategory FPU = new RegisterCategory("FPU"); + public static final RegisterCategory REG = new RegisterCategory("REG"); + public static final RegisterCategory SREG = new RegisterCategory("SREG"); + public static final RegisterCategory PARAM = new RegisterCategory("PARAM"); + // @formatter:off + /* Parameter State Space + * + * The parameter (.param) state space is used (1) to pass input + * arguments from the host to the kernel, (2a) to declare formal + * input and return parameters for device functions called from + * within kernel execution, and (2b) to declare locally-scoped + * byte array variables that serve as function call arguments, + * typically for passing large structures by value to a function. + * + * TODO: XXX + * The parameters are virtual symbols - just like registers. Bit, + * Till we figure out how to model a virtual register set in Graal, + * we will pretend that we can use only 8 parameters. + */ + + public static final Register param0 = new Register(0, 0, "param0", PARAM); + public static final Register param1 = new Register(1, 1, "param1", PARAM); + public static final Register param2 = new Register(2, 2, "param2", PARAM); + public static final Register param3 = new Register(3, 3, "param3", PARAM); + public static final Register param4 = new Register(4, 4, "param4", PARAM); + public static final Register param5 = new Register(5, 5, "param5", PARAM); + public static final Register param6 = new Register(6, 6, "param6", PARAM); + public static final Register param7 = new Register(7, 7, "param7", PARAM); + /* * Register State Space * - * Registers (.reg state space) are fast storage locations. The number of - * registers is limited, and will vary from platform to platform. When the - * limit is exceeded, register variables will be spilled to memory, causing - * changes in performance. For each architecture, there is a recommended - * maximum number of registers to use (see the "CUDA Programming Guide" for - * details). + * Registers (.reg state space) are fast storage locations. The + * number of GPU architectural registers is limited, and will vary + * from platform to platform. When the limit is exceeded, register + * variables will be spilled to memory, causing changes in + * performance. For each architecture, there is a recommended + * maximum number of registers to use (see the "CUDA Programming + * Guide" for details). + * + * TODD: XXX + * + * However, PTX supports virtual registers. So, the generated PTX + * code does not need to use a specified number of registers. Till + * we figure out how to model a virtual register set in Graal, we + * will pretend that we can use only 16 registers. */ - // General purpose registers - public static final Register r0 = new Register(0, 0, "r0", CPU); - public static final Register r1 = new Register(1, 1, "r1", CPU); - public static final Register r2 = new Register(2, 2, "r2", CPU); - public static final Register r3 = new Register(3, 3, "r3", CPU); - public static final Register r4 = new Register(4, 4, "r4", CPU); - public static final Register r5 = new Register(5, 5, "r5", CPU); - public static final Register r6 = new Register(6, 6, "r6", CPU); - public static final Register r7 = new Register(7, 7, "r7", CPU); + public static final Register r0 = new Register(8, 8, "r0", REG); + public static final Register r1 = new Register(9, 9, "r1", REG); + public static final Register r2 = new Register(10, 10, "r2", REG); + public static final Register r3 = new Register(11, 11, "r3", REG); + public static final Register r4 = new Register(12, 12, "r4", REG); + public static final Register r5 = new Register(13, 13, "r5", REG); + public static final Register r6 = new Register(14, 14, "r6", REG); + public static final Register r7 = new Register(15, 15, "r7", REG); - public static final Register r8 = new Register(8, 8, "r8", CPU); - public static final Register r9 = new Register(9, 9, "r9", CPU); - public static final Register r10 = new Register(10, 10, "r10", CPU); - public static final Register r11 = new Register(11, 11, "r11", CPU); - public static final Register r12 = new Register(12, 12, "r12", CPU); - public static final Register r13 = new Register(13, 13, "r13", CPU); - public static final Register r14 = new Register(14, 14, "r14", CPU); - public static final Register r15 = new Register(15, 15, "r15", CPU); + public static final Register r8 = new Register(16, 16, "r8", REG); + public static final Register r9 = new Register(17, 17, "r9", REG); + public static final Register r10 = new Register(18, 18, "r10", REG); + public static final Register r11 = new Register(19, 19, "r11", REG); + public static final Register r12 = new Register(20, 20, "r12", REG); + public static final Register r13 = new Register(21, 21, "r13", REG); + public static final Register r14 = new Register(22, 22, "r14", REG); + public static final Register r15 = new Register(23, 23, "r15", REG); + + // Define a virtual register that holds return value + public static final Register retReg = new Register(24, 24, "retReg", REG); public static final Register[] gprRegisters = { r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15 }; - // Floating point registers - public static final Register f0 = new Register(16, 0, "f0", FPU); - public static final Register f1 = new Register(17, 1, "f1", FPU); - public static final Register f2 = new Register(18, 2, "f2", FPU); - public static final Register f3 = new Register(19, 3, "f3", FPU); - public static final Register f4 = new Register(20, 4, "f4", FPU); - public static final Register f5 = new Register(21, 5, "f5", FPU); - public static final Register f6 = new Register(22, 6, "f6", FPU); - public static final Register f7 = new Register(23, 7, "f7", FPU); + public static final Register[] paramRegisters = { + param0, param1, param2, param3, param4, param5, param6, param7 + }; + + // PTX ISA Manual: Section 9:. Special Registers - public static final Register f8 = new Register(24, 8, "f8", FPU); - public static final Register f9 = new Register(25, 9, "f9", FPU); - public static final Register f10 = new Register(26, 10, "f10", FPU); - public static final Register f11 = new Register(27, 11, "f11", FPU); - public static final Register f12 = new Register(28, 12, "f12", FPU); - public static final Register f13 = new Register(29, 13, "f13", FPU); - public static final Register f14 = new Register(30, 14, "f14", FPU); - public static final Register f15 = new Register(31, 15, "f15", FPU); + // PTX includes a number of predefined, read-only variables, which + // are visible as special registers and accessed through mov or + // cvt instructions. + // Thread identifier within a Co-operative Thread Array (CTA) - %tid + public static final Register tid = new Register(100, 100, "tid", SREG); + // Number of thread IDs per CTA - %ntid + public static final Register ntid = new Register(101, 101, "ntid", SREG); + // Lane identifier + public static final Register laneid = new Register(102, 102, "laneid", SREG); + // Warp identifier + public static final Register warpid = new Register(103, 103, "warid", SREG); + // Number of warp IDs + public static final Register nwarpid = new Register(104, 104, "nwarpid", SREG); + // CTA identifier + public static final Register ctaid = new Register(105, 105, "ctaid", SREG); + // Number of CTA IDs per grid + public static final Register nctaid = new Register(106, 106, "nctaid", SREG); + // Single Multiprocessor (SM) ID + public static final Register smid = new Register(107, 107, "smid", SREG); + // Number of SM IDs + public static final Register nsmid = new Register(108, 108, "nsmid", SREG); + // Grid ID + public static final Register gridid = new Register(109, 109, "gridid", SREG); + // 32-bit mask with bit set in position equal to thread's lane number in the warp + public static final Register lanemask_eq = new Register(110, 110, "lanemask_eq", SREG); + // 32-bit mask with bits set in positions less than or equal to thread's lane number in the warp + public static final Register lanemask_le = new Register(111, 111, "lanemask_le", SREG); + // 32-bit mask with bits set in positions less than thread's lane number in the warp + public static final Register lanemask_lt = new Register(112, 112, "lanemask_lt", SREG); + // 32-bit mask with bits set in positions greater than or equal to thread's lane number in the warp + public static final Register lanemask_ge = new Register(113, 113, "lanemask_ge", SREG); + // 32-bit mask with bits set in positions greater than thread's lane number in the warp + public static final Register lanemask_gt = new Register(114, 114, "lanemask_gt", SREG); + // A predefined, read-only 32-bit unsigned 32-bit unsigned cycle counter + public static final Register clock = new Register(114, 114, "clock", SREG); + // A predefined, read-only 64-bit unsigned 32-bit unsigned cycle counter + public static final Register clock64 = new Register(115, 115, "clock64", SREG); + // Performance monitoring registers + public static final Register pm0 = new Register(116, 116, "pm0", SREG); + public static final Register pm1 = new Register(117, 117, "pm1", SREG); + public static final Register pm2 = new Register(118, 118, "pm2", SREG); + public static final Register pm3 = new Register(119, 119, "pm3", SREG); + public static final Register pm4 = new Register(120, 120, "pm4", SREG); + public static final Register pm5 = new Register(121, 121, "pm5", SREG); + public static final Register pm6 = new Register(122, 122, "pm6", SREG); + public static final Register pm7 = new Register(123, 123, "pm7", SREG); + // TODO: Add Driver-defined read-only %envreg<32> + // and %globaltimer, %globaltimer_lo and %globaltimer_hi - public static final Register[] fpuRegisters = { - f0, f1, f2, f3, f4, f5, f6, f7, - f8, f9, f10, f11, f12, f13, f14, f15 + public static final Register[] specialRegisters = { + tid, ntid, laneid, warpid, nwarpid, ctaid, + nctaid, smid, nsmid, gridid, + lanemask_eq, lanemask_le, lanemask_lt, lanemask_ge, lanemask_gt, + clock, clock64, + pm0, pm1, pm2, pm3, pm4, pm5, pm6, pm7 }; public static final Register[] allRegisters = { - // GPR + // Parameter State Space + param0, param1, param2, param3, + param4, param5, param6, param7, + // Register State Space r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15, - // FPU - f0, f1, f2, f3, f4, f5, f6, f7, - f8, f9, f10, f11, f12, f13, f14, f15 + // return register + retReg, + // Special Register State Space - SREG + tid, ntid, laneid, warpid, nwarpid, ctaid, + nctaid, smid, nsmid, gridid, + lanemask_eq, lanemask_le, lanemask_lt, lanemask_ge, lanemask_gt, + clock, clock64, + pm0, pm1, pm2, pm3, pm4, pm5, pm6, pm7 }; // @formatter:on public PTX() { - super("PTX", 8, ByteOrder.LITTLE_ENDIAN, false, allRegisters, LOAD_STORE | STORE_STORE, 0, r15.encoding + 1, 8); + super("PTX", 8, ByteOrder.LITTLE_ENDIAN, false, allRegisters, + LOAD_STORE | STORE_STORE, 0, r15.encoding + 1, 8); } @Override @@ -121,7 +205,7 @@ } Kind kind = (Kind) platformKind; - if (category == CPU) { + if (category == REG) { switch (kind) { case Boolean: case Byte: @@ -130,10 +214,6 @@ case Int: case Long: case Object: - return true; - } - } else if (category == FPU) { - switch (kind) { case Float: case Double: return true; @@ -145,12 +225,12 @@ @Override public PlatformKind getLargestStorableKind(RegisterCategory category) { - if (category == CPU) { - return Kind.Long; - } else if (category == FPU) { + if (category == REG) { return Kind.Double; } else { return Kind.Illegal; } } + + } diff -r 003be97acdda -r 91e5f927af63 mx/projects --- a/mx/projects Tue Sep 10 16:33:28 2013 -0700 +++ b/mx/projects Tue Sep 10 22:39:50 2013 -0400 @@ -28,7 +28,7 @@ library@OKRA@urls=http://cr.openjdk.java.net/~tdeneau/okra-1.2.jar distribution@GRAAL@path=graal.jar -distribution@GRAAL@dependencies=com.oracle.graal.hotspot.amd64,com.oracle.graal.truffle,com.oracle.graal.truffle.printer,com.oracle.graal.hotspot.sparc,com.oracle.graal.hotspot,com.oracle.graal.compiler.hsail +distribution@GRAAL@dependencies=com.oracle.graal.hotspot.amd64,com.oracle.graal.hotspot.ptx,com.oracle.graal.truffle,com.oracle.graal.truffle.printer,com.oracle.graal.hotspot.sparc,com.oracle.graal.hotspot,com.oracle.graal.compiler.hsail # graal.api.runtime project@com.oracle.graal.api.runtime@subDir=graal @@ -134,6 +134,15 @@ project@com.oracle.graal.hotspot.sparc@javaCompliance=1.7 project@com.oracle.graal.hotspot.sparc@workingSets=Graal,HotSpot,SPARC +# graal.hotspot.ptx +project@com.oracle.graal.hotspot.ptx@subDir=graal +project@com.oracle.graal.hotspot.ptx@sourceDirs=src +project@com.oracle.graal.hotspot.ptx@dependencies=com.oracle.graal.hotspot,com.oracle.graal.ptx +project@com.oracle.graal.hotspot.ptx@checkstyle=com.oracle.graal.graph +project@com.oracle.graal.hotspot.ptx@annotationProcessors=com.oracle.graal.service.processor +project@com.oracle.graal.hotspot.ptx@javaCompliance=1.7 +project@com.oracle.graal.hotspot.ptx@workingSets=Graal,HotSpot,PTX + # graal.hotspot.server project@com.oracle.graal.hotspot.server@subDir=graal project@com.oracle.graal.hotspot.server@sourceDirs=src diff -r 003be97acdda -r 91e5f927af63 src/gpu/ptx/vm/gpu_ptx.cpp --- a/src/gpu/ptx/vm/gpu_ptx.cpp Tue Sep 10 16:33:28 2013 -0700 +++ b/src/gpu/ptx/vm/gpu_ptx.cpp Tue Sep 10 22:39:50 2013 -0400 @@ -29,7 +29,7 @@ #include "utilities/ostream.hpp" #include "memory/allocation.hpp" #include "memory/allocation.inline.hpp" -#include "kernelArguments.hpp" +#include "ptxKernelArguments.hpp" void * gpu::Ptx::_device_context; int gpu::Ptx::_cu_device = 0; diff -r 003be97acdda -r 91e5f927af63 src/gpu/ptx/vm/kernelArguments.cpp --- a/src/gpu/ptx/vm/kernelArguments.cpp Tue Sep 10 16:33:28 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "kernelArguments.hpp" -#include "runtime/javaCalls.hpp" - -gpu::Ptx::cuda_cu_memalloc_func_t gpu::Ptx::_cuda_cu_memalloc; -gpu::Ptx::cuda_cu_memcpy_htod_func_t gpu::Ptx::_cuda_cu_memcpy_htod; - -// Get next java argument -oop PTXKernelArguments::next_arg(BasicType expectedType) { - assert(_index < _args->length(), "out of bounds"); - oop arg=((objArrayOop) (_args))->obj_at(_index++); - assert(expectedType == T_OBJECT || java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch"); - return arg; -} - -void PTXKernelArguments::do_int() { - // If the parameter is a return value, - if (is_return_type()) { - // Allocate device memory for T_INT return value pointer on device. Size in bytes - int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_INT_BYTE_SIZE); - if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); - _success = false; - return; - } - // Push _return_value_ptr to _kernelBuffer - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; - _bufferOffset += sizeof(_return_value_ptr); - } - else { - // Get the next java argument and its value which should be a T_INT - oop arg = next_arg(T_INT); - // Copy the java argument value to kernelArgBuffer - jvalue intval; - if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) { - tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); - _success = false; - return; - } - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = intval.i; - _bufferOffset += sizeof(intval.i); - } - return; -} - -void PTXKernelArguments::do_long() { - // If the parameter is a return value, - if (is_return_type()) { - // Allocate device memory for T_LONG return value pointer on device. Size in bytes - int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_LONG_BYTE_SIZE); - if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); - _success = false; - return; - } - // Push _return_value_ptr to _kernelBuffer - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; - _bufferOffset += sizeof(_return_value_ptr); - } - else { - // Get the next java argument and its value which should be a T_LONG - oop arg = next_arg(T_LONG); - // Copy the java argument value to kernelArgBuffer - jvalue val; - if (java_lang_boxing_object::get_value(arg, &val) != T_LONG) { - tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_LONG"); - _success = false; - return; - } - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.j; - _bufferOffset += sizeof(val.j); - } - return; -} - -void PTXKernelArguments::do_byte() { - // If the parameter is a return value, - if (is_return_type()) { - // Allocate device memory for T_BYTE return value pointer on device. Size in bytes - int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BYTE_SIZE); - if (status != GRAAL_CUDA_SUCCESS) { - tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); - _success = false; - return; - } - // Push _return_value_ptr to _kernelBuffer - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; - _bufferOffset += sizeof(_return_value_ptr); - } - else { - // Get the next java argument and its value which should be a T_BYTE - oop arg = next_arg(T_BYTE); - // Copy the java argument value to kernelArgBuffer - jvalue val; - if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) { - tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE"); - _success = false; - return; - } - *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b; - _bufferOffset += sizeof(val.b); - } - return; -} - -// TODO implement other do_* diff -r 003be97acdda -r 91e5f927af63 src/gpu/ptx/vm/kernelArguments.hpp --- a/src/gpu/ptx/vm/kernelArguments.hpp Tue Sep 10 16:33:28 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef KERNEL_ARGUMENTS_PTX_HPP -#define KERNEL_ARGUMENTS_PTX_HPP - -#include "runtime/gpu.hpp" -#include "runtime/signature.hpp" - -#define T_BYTE_SIZE 1 -#define T_INT_BYTE_SIZE 4 -#define T_LONG_BYTE_SIZE 8 - -class PTXKernelArguments : public SignatureIterator { -public: - // Buffer holding CUdeviceptr values that represent the kernel arguments - char _kernelArgBuffer[1024]; - // Current offset into _kernelArgBuffer - size_t _bufferOffset; - gpu::Ptx::CUdeviceptr _return_value_ptr; -private: - // Array of java argument oops - arrayOop _args; - // Current index into _args - int _index; - // Flag to indicate successful creation of kernel argument buffer - bool _success; - // Get next java argument - oop next_arg(BasicType expectedType); - - public: - PTXKernelArguments(Symbol* signature, arrayOop args, bool is_static) : SignatureIterator(signature) { - this->_return_type = T_ILLEGAL; - _index = 0; - _args = args; - _success = true; - _bufferOffset = 0; - _return_value_ptr = 0; - if (!is_static) { - // TODO : Create a device argument for receiver object and add it to _kernelBuffer - tty->print_cr("{CUDA] ****** TODO: Support for execution of non-static java methods not implemented yet."); - } - // Iterate over the entire signature - iterate(); - assert((_success && (_index == args->length())), "arg count mismatch with signature"); - } - - inline char* device_argument_buffer() { - return _kernelArgBuffer; - } - - inline size_t device_argument_buffer_size() { - return _bufferOffset; - } - - // Get the return oop value - oop get_return_oop(); - - // get device return value ptr - gpu::Ptx::CUdeviceptr get_return_value_ptr() { - return _return_value_ptr; - } - - - void do_byte(); - void do_int(); - void do_long(); - - inline void do_bool() { - /* TODO : To be implemented */ - guarantee(false, "NYI"); - } - inline void do_char() { - /* TODO : To be implemented */ - guarantee(false, "NYI"); - } - inline void do_short() { - /* TODO : To be implemented */ - guarantee(false, "NYI"); - } - inline void do_float() { - /* TODO : To be implemented */ - guarantee(false, "NYI"); - } - inline void do_double() { - /* TODO : To be implemented */ - guarantee(false, "NYI"); - } - - inline void do_object() { - /* TODO : To be implemented */ - guarantee(false, "NYI"); - } - inline void do_object(int begin, int end) { - /* TODO : To be implemented */ - guarantee(false, "NYI"); - } - inline void do_array(int begin, int end) { - /* TODO : To be implemented */ - guarantee(false, "NYI"); - } - inline void do_void() { - /* TODO : To be implemented */ - guarantee(false, "NYI"); - } -}; - -#endif // KERNEL_ARGUMENTS_HPP diff -r 003be97acdda -r 91e5f927af63 src/gpu/ptx/vm/ptxKernelArguments.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/gpu/ptx/vm/ptxKernelArguments.cpp Tue Sep 10 22:39:50 2013 -0400 @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "ptxKernelArguments.hpp" +#include "runtime/javaCalls.hpp" + +gpu::Ptx::cuda_cu_memalloc_func_t gpu::Ptx::_cuda_cu_memalloc; +gpu::Ptx::cuda_cu_memcpy_htod_func_t gpu::Ptx::_cuda_cu_memcpy_htod; + +// Get next java argument +oop PTXKernelArguments::next_arg(BasicType expectedType) { + assert(_index < _args->length(), "out of bounds"); + oop arg=((objArrayOop) (_args))->obj_at(_index++); + assert(expectedType == T_OBJECT || java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch"); + return arg; +} + +void PTXKernelArguments::do_int() { + // If the parameter is a return value, + if (is_return_type()) { + // Allocate device memory for T_INT return value pointer on device. Size in bytes + int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_INT_BYTE_SIZE); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); + _success = false; + return; + } + // Push _return_value_ptr to _kernelBuffer + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; + _bufferOffset += sizeof(_return_value_ptr); + } + else { + // Get the next java argument and its value which should be a T_INT + oop arg = next_arg(T_INT); + // Copy the java argument value to kernelArgBuffer + jvalue intval; + if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) { + tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT"); + _success = false; + return; + } + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = intval.i; + _bufferOffset += sizeof(intval.i); + } + return; +} + +void PTXKernelArguments::do_long() { + // If the parameter is a return value, + if (is_return_type()) { + // Allocate device memory for T_LONG return value pointer on device. Size in bytes + int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_LONG_BYTE_SIZE); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); + _success = false; + return; + } + // Push _return_value_ptr to _kernelBuffer + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; + _bufferOffset += sizeof(_return_value_ptr); + } + else { + // Get the next java argument and its value which should be a T_LONG + oop arg = next_arg(T_LONG); + // Copy the java argument value to kernelArgBuffer + jvalue val; + if (java_lang_boxing_object::get_value(arg, &val) != T_LONG) { + tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_LONG"); + _success = false; + return; + } + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.j; + _bufferOffset += sizeof(val.j); + } + return; +} + +void PTXKernelArguments::do_byte() { + // If the parameter is a return value, + if (is_return_type()) { + // Allocate device memory for T_BYTE return value pointer on device. Size in bytes + int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BYTE_SIZE); + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status); + _success = false; + return; + } + // Push _return_value_ptr to _kernelBuffer + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr; + _bufferOffset += sizeof(_return_value_ptr); + } + else { + // Get the next java argument and its value which should be a T_BYTE + oop arg = next_arg(T_BYTE); + // Copy the java argument value to kernelArgBuffer + jvalue val; + if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) { + tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE"); + _success = false; + return; + } + *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b; + _bufferOffset += sizeof(val.b); + } + return; +} + +// TODO implement other do_* diff -r 003be97acdda -r 91e5f927af63 src/gpu/ptx/vm/ptxKernelArguments.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/gpu/ptx/vm/ptxKernelArguments.hpp Tue Sep 10 22:39:50 2013 -0400 @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef KERNEL_ARGUMENTS_PTX_HPP +#define KERNEL_ARGUMENTS_PTX_HPP + +#include "runtime/gpu.hpp" +#include "runtime/signature.hpp" + +#define T_BYTE_SIZE 1 +#define T_INT_BYTE_SIZE 4 +#define T_LONG_BYTE_SIZE 8 + +class PTXKernelArguments : public SignatureIterator { +public: + // Buffer holding CUdeviceptr values that represent the kernel arguments + char _kernelArgBuffer[1024]; + // Current offset into _kernelArgBuffer + size_t _bufferOffset; + gpu::Ptx::CUdeviceptr _return_value_ptr; +private: + // Array of java argument oops + arrayOop _args; + // Current index into _args + int _index; + // Flag to indicate successful creation of kernel argument buffer + bool _success; + // Get next java argument + oop next_arg(BasicType expectedType); + + public: + PTXKernelArguments(Symbol* signature, arrayOop args, bool is_static) : SignatureIterator(signature) { + this->_return_type = T_ILLEGAL; + _index = 0; + _args = args; + _success = true; + _bufferOffset = 0; + _return_value_ptr = 0; + if (!is_static) { + // TODO : Create a device argument for receiver object and add it to _kernelBuffer + tty->print_cr("{CUDA] ****** TODO: Support for execution of non-static java methods not implemented yet."); + } + // Iterate over the entire signature + iterate(); + assert((_success && (_index == args->length())), "arg count mismatch with signature"); + } + + inline char* device_argument_buffer() { + return _kernelArgBuffer; + } + + inline size_t device_argument_buffer_size() { + return _bufferOffset; + } + + // Get the return oop value + oop get_return_oop(); + + // get device return value ptr + gpu::Ptx::CUdeviceptr get_return_value_ptr() { + return _return_value_ptr; + } + + + void do_byte(); + void do_int(); + void do_long(); + + inline void do_bool() { + /* TODO : To be implemented */ + guarantee(false, "NYI"); + } + inline void do_char() { + /* TODO : To be implemented */ + guarantee(false, "NYI"); + } + inline void do_short() { + /* TODO : To be implemented */ + guarantee(false, "NYI"); + } + inline void do_float() { + /* TODO : To be implemented */ + guarantee(false, "NYI"); + } + inline void do_double() { + /* TODO : To be implemented */ + guarantee(false, "NYI"); + } + + inline void do_object() { + /* TODO : To be implemented */ + guarantee(false, "NYI"); + } + inline void do_object(int begin, int end) { + /* TODO : To be implemented */ + guarantee(false, "NYI"); + } + inline void do_array(int begin, int end) { + /* TODO : To be implemented */ + guarantee(false, "NYI"); + } + inline void do_void() { + /* TODO : To be implemented */ + guarantee(false, "NYI"); + } +}; + +#endif // KERNEL_ARGUMENTS_HPP diff -r 003be97acdda -r 91e5f927af63 src/share/vm/classfile/vmSymbols.hpp --- a/src/share/vm/classfile/vmSymbols.hpp Tue Sep 10 16:33:28 2013 -0700 +++ b/src/share/vm/classfile/vmSymbols.hpp Tue Sep 10 22:39:50 2013 -0400 @@ -311,6 +311,7 @@ template(com_oracle_graal_hotspot_meta_HotSpotResolvedObjectType, "com/oracle/graal/hotspot/meta/HotSpotResolvedObjectType") \ template(com_oracle_graal_hotspot_meta_HotSpotMonitorValue, "com/oracle/graal/hotspot/meta/HotSpotMonitorValue") \ template(com_oracle_graal_hotspot_debug_LocalImpl, "com/oracle/graal/hotspot/debug/LocalImpl") \ + template(com_oracle_graal_hotspot_ptx_PTXHotSpotGraalRuntime, "com/oracle/graal/hotspot/ptx/PTXHotSpotGraalRuntime")\ AMD64_ONLY(template(com_oracle_graal_hotspot_amd64_AMD64HotSpotGraalRuntime,"com/oracle/graal/hotspot/amd64/AMD64HotSpotGraalRuntime"))\ SPARC_ONLY(template(com_oracle_graal_hotspot_sparc_SPARCHotSpotGraalRuntime,"com/oracle/graal/hotspot/sparc/SPARCHotSpotGraalRuntime"))\ /* graal.api.meta */ \ diff -r 003be97acdda -r 91e5f927af63 src/share/vm/graal/graalCompilerToGPU.cpp --- a/src/share/vm/graal/graalCompilerToGPU.cpp Tue Sep 10 16:33:28 2013 -0700 +++ b/src/share/vm/graal/graalCompilerToGPU.cpp Tue Sep 10 22:39:50 2013 -0400 @@ -28,7 +28,7 @@ #include "graal/graalJavaAccess.hpp" #include "runtime/gpu.hpp" #include "runtime/javaCalls.hpp" -# include "ptx/vm/kernelArguments.hpp" +# include "ptx/vm/ptxKernelArguments.hpp" // Entry to native method implementation that transitions current thread to '_thread_in_vm'. #define C2V_VMENTRY(result_type, name, signature) \ diff -r 003be97acdda -r 91e5f927af63 src/share/vm/graal/graalVMToCompiler.cpp --- a/src/share/vm/graal/graalVMToCompiler.cpp Tue Sep 10 16:33:28 2013 -0700 +++ b/src/share/vm/graal/graalVMToCompiler.cpp Tue Sep 10 22:39:50 2013 -0400 @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "classfile/systemDictionary.hpp" #include "graal/graalVMToCompiler.hpp" +#include "runtime/gpu.hpp" // this is a *global* handle jobject VMToCompiler::_graalRuntimePermObject = NULL; @@ -60,7 +61,12 @@ Handle VMToCompiler::graalRuntime() { if (JNIHandles::resolve(_graalRuntimePermObject) == NULL) { #ifdef AMD64 - Symbol* name = vmSymbols::com_oracle_graal_hotspot_amd64_AMD64HotSpotGraalRuntime(); + Symbol* name = NULL; + if (UseGPU && gpu::is_available() && gpu::has_gpu_linkage()) { + name = vmSymbols::com_oracle_graal_hotspot_ptx_PTXHotSpotGraalRuntime(); + } else { + name = vmSymbols::com_oracle_graal_hotspot_amd64_AMD64HotSpotGraalRuntime(); + } #endif #ifdef SPARC Symbol* name = vmSymbols::com_oracle_graal_hotspot_sparc_SPARCHotSpotGraalRuntime(); diff -r 003be97acdda -r 91e5f927af63 src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Tue Sep 10 16:33:28 2013 -0700 +++ b/src/share/vm/runtime/globals.hpp Tue Sep 10 22:39:50 2013 -0400 @@ -3717,8 +3717,11 @@ product(bool , AllowNonVirtualCalls, false, \ "Obey the ACC_SUPER flag and allow invokenonvirtual calls") \ \ - product(bool, TraceGPUInteraction, false, \ - "Trace external GPU warp loading") \ + product(bool, TraceGPUInteraction, false, \ + "Trace external GPU Interaction") \ + \ + product(bool, UseGPU, false, \ + "Run code on GPU") \ \ diagnostic(ccstr, SharedArchiveFile, NULL, \ "Override the default location of the CDS archive file") \