# HG changeset patch # User bharadwaj # Date 1378306057 14400 # Node ID c99e6578593623d127588a6f55077c89ee16adf3 # Parent db297343d44e19b7477d6fb5134c7d7769d93d43 Improvements to PTX codegen; allows more PTX tests that run on the device to pass. diff -r db297343d44e -r c99e65785936 graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java --- a/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java Wed Sep 04 14:56:30 2013 +0200 +++ b/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java Wed Sep 04 10:47:37 2013 -0400 @@ -272,69 +272,69 @@ } public final void ld_global_b8(Register d, Register a, long immOff) { - emitString("ld.global.b8" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + ""); + emitString("ld.global.b8" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + ""); } public final void ld_global_b16(Register d, Register a, long immOff) { - emitString("ld.global.b16" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + ""); + emitString("ld.global.b16" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + ""); } public final void ld_global_b32(Register d, Register a, long immOff) { - emitString("ld.global.b32" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + ""); + emitString("ld.global.b32" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + ""); } public final void ld_global_b64(Register d, Register a, long immOff) { - emitString("ld.global.b64" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + ""); + emitString("ld.global.b64" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + ""); } public final void ld_global_u8(Register d, Register a, long immOff) { - emitString("ld.global.u8" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + ""); + emitString("ld.global.u8" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + ""); } public final void ld_global_u16(Register d, Register a, long immOff) { - emitString("ld.global.u16" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + ""); + emitString("ld.global.u16" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + ""); } public final void ld_global_u32(Register d, Register a, long immOff) { - emitString("ld.global.u32" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + ""); + emitString("ld.global.u32" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + ""); } public final void ld_global_u64(Register d, Register a, long immOff) { - emitString("ld.global.u64" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + ""); + emitString("ld.global.u64" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + ""); } public final void ld_global_s8(Register d, Register a, long immOff) { - emitString("ld.global.s8" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + ""); + emitString("ld.global.s8" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + ""); } public final void ld_global_s16(Register d, Register a, long immOff) { - emitString("ld.global.s16" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + ""); + emitString("ld.global.s16" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + ""); } public final void ld_global_s32(Register d, Register a, long immOff) { - emitString("ld.global.s32" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + ""); + emitString("ld.global.s32" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + ""); } public final void ld_global_s64(Register d, Register a, long immOff) { - emitString("ld.global.s64" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + ""); + emitString("ld.global.s64" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + ""); } public final void ld_global_f32(Register d, Register a, long immOff) { - emitString("ld.global.f32" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + ""); + emitString("ld.global.f32" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + ""); } public final void ld_global_f64(Register d, Register a, long immOff) { - emitString("ld.global.f64" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + ""); + emitString("ld.global.f64" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + ""); } // Load from state space to destination register public final void ld_from_state_space(String s, Register d, Register a, long immOff) { - emitString("ld" + s + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + ""); + emitString("ld" + s + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + ""); } // Load return address from return parameter which is in .param state space public final void ld_return_address(String s, Register d, Register a, long immOff) { - emitString("ld.param." + s + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + ""); + emitString("ld.param." + s + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + ""); } public final void mov_b16(Register d, Register a) { @@ -429,68 +429,68 @@ emitString("mov.f64" + " " + "%r" + d.encoding() + ", " + f64 + ";" + ""); } - public final void mul_f32(Register d, Register a, Register b) { - emitString("mul.f32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void mul_lo_f32(Register d, Register a, Register b) { + emitString("mul.lo.f32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void mul_f64(Register d, Register a, Register b) { - emitString("smul.f64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void mul_lo_f64(Register d, Register a, Register b) { + emitString("mul.lo.f64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void mul_s16(Register d, Register a, Register b) { - emitString("mul.s16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void mul_lo_s16(Register d, Register a, Register b) { + emitString("mul.lo.s16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void mul_s32(Register d, Register a, Register b) { - emitString("mul.s32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void mul_lo_s32(Register d, Register a, Register b) { + emitString("mul.lo.s32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void mul_s64(Register d, Register a, Register b) { - emitString("mul.s64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void mul_lo_s64(Register d, Register a, Register b) { + emitString("mul.lo.s64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void mul_s16(Register d, Register a, short s16) { - emitString("mul.s16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + s16 + ";" + ""); + public final void mul_lo_s16(Register d, Register a, short s16) { + emitString("mul.lo.s16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + s16 + ";" + ""); } - public final void mul_s32(Register d, Register a, int s32) { - emitString("mul.s32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + s32 + ";" + ""); + public final void mul_lo_s32(Register d, Register a, int s32) { + emitString("mul.lo.s32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + s32 + ";" + ""); } - public final void mul_s64(Register d, Register a, long s64) { - emitString("mul.s64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + s64 + ";" + ""); + public final void mul_lo_s64(Register d, Register a, long s64) { + emitString("mul.lo.s64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + s64 + ";" + ""); } - public final void mul_f32(Register d, Register a, float f32) { - emitString("mul.f32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + f32 + ";" + ""); + public final void mul_lo_f32(Register d, Register a, float f32) { + emitString("mul.lo.f32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + f32 + ";" + ""); } - public final void mul_f64(Register d, Register a, double f64) { - emitString("mul.f64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + f64 + ";" + ""); + public final void mul_lo_f64(Register d, Register a, double f64) { + emitString("mul.lo.f64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + f64 + ";" + ""); } - public final void mul_u16(Register d, Register a, Register b) { - emitString("mul.u16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void mul_lo_u16(Register d, Register a, Register b) { + emitString("mul.lo.u16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void mul_u32(Register d, Register a, Register b) { - emitString("mul.u32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void mul_lo_u32(Register d, Register a, Register b) { + emitString("mul.lo.u32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void mul_u64(Register d, Register a, Register b) { - emitString("mul.u64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void mul_lo_u64(Register d, Register a, Register b) { + emitString("mul.lo.u64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void mul_u16(Register d, Register a, short u16) { - emitString("mul.u16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u16 + ";" + ""); + public final void mul_lo_u16(Register d, Register a, short u16) { + emitString("mul.lo.u16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u16 + ";" + ""); } - public final void mul_u32(Register d, Register a, int u32) { - emitString("mul.u32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u32 + ";" + ""); + public final void mul_lo_u32(Register d, Register a, int u32) { + emitString("mul.lo.u32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u32 + ";" + ""); } - public final void mul_u64(Register d, Register a, long u64) { - emitString("mul.u64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u64 + ";" + ""); + public final void mul_lo_u64(Register d, Register a, long u64) { + emitString("mul.lo.u64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u64 + ";" + ""); } public final void neg_f32(Register d, Register a) { @@ -550,15 +550,15 @@ } public final void param_8_decl(Register d, boolean lastParam) { - emitString(".param" + " " + ".s8" + " " + d.toString() + (lastParam ? "" : ",")); + emitString(".param" + " " + ".s8" + " " + d + (lastParam ? "" : ",")); } public final void param_32_decl(Register d, boolean lastParam) { - emitString(".param" + " " + ".s32" + " " + d.toString() + (lastParam ? "" : ",")); + emitString(".param" + " " + ".s32" + " " + d + (lastParam ? "" : ",")); } public final void param_64_decl(Register d, boolean lastParam) { - emitString(".param" + " " + ".s64" + " " + d.toString() + (lastParam ? "" : ",")); + emitString(".param" + " " + ".s64" + " " + d + (lastParam ? "" : ",")); } public final void popc_b32(Register d, Register a) { @@ -849,54 +849,32 @@ emitString("setp.ge.u32" + " " + "%p" + ", " + u32 + ", %r" + b.encoding() + ";" + ""); } - public final void shl_s16(Register d, Register a, Register b) { - emitString("shl.s16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); - } - - public final void shl_s32(Register d, Register a, Register b) { - emitString("shl.s32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + // Shift left - only types supported are .b16, .b32 and .b64 + public final void shl_b16(Register d, Register a, Register b) { + emitString("shl.b16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void shl_s64(Register d, Register a, Register b) { - emitString("shl.s64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void shl_b32(Register d, Register a, Register b) { + emitString("shl.b32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void shl_s16(Register d, Register a, int u32) { - emitString("shl.s16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u32 + ";" + ""); - } - - public final void shl_s32(Register d, Register a, int u32) { - emitString("shl.s32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u32 + ";" + ""); - } - - public final void shl_s64(Register d, Register a, int u32) { - emitString("shl.s64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u32 + ";" + ""); + public final void shl_b64(Register d, Register a, Register b) { + emitString("shl.b64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } - public final void shl_u16(Register d, Register a, Register b) { - emitString("shl.u16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); - } - - public final void shl_u32(Register d, Register a, Register b) { - emitString("shl.u32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); - } - - public final void shl_u64(Register d, Register a, Register b) { - emitString("shl.u64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); + public final void shl_b16_const(Register d, Register a, int b) { + emitString("shl.b16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + b + ";" + ""); } - public final void shl_u16(Register d, Register a, int u32) { - emitString("shl.u16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u32 + ";" + ""); + public final void shl_b32_const(Register d, Register a, int b) { + emitString("shl.b32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + b + ";" + ""); } - public final void shl_u32(Register d, Register a, int u32) { - emitString("shl.u32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u32 + ";" + ""); + public final void shl_b64_const(Register d, Register a, int b) { + emitString("shl.b64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + b + ";" + ""); } - public final void shl_u64(Register d, Register a, int u32) { - emitString("shl.u64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u32 + ";" + ""); - } - + // Shift Right instruction public final void shr_s16(Register d, Register a, Register b) { emitString("shr.s16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + ""); } diff -r db297343d44e -r c99e65785936 graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/IntegerPTXTest.java --- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/IntegerPTXTest.java Wed Sep 04 14:56:30 2013 +0200 +++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/IntegerPTXTest.java Wed Sep 04 10:47:37 2013 -0400 @@ -22,7 +22,7 @@ */ package com.oracle.graal.compiler.ptx.test; -import org.junit.Test; +import org.junit.*; import java.lang.reflect.Method; @@ -32,10 +32,19 @@ @Test public void testAdd() { + Integer r4 = (Integer) invoke(compile("testAdd2I"), 18, 24); + if (r4 == null) { + printReport("testAdd2I FAILED"); + } else if (r4.intValue() == testAdd2I(18, 24)) { + printReport("testAdd2I PASSED"); + } else { + printReport("testAdd2I FAILED"); + } + Long r2 = (Long) invoke(compile("testAdd2L"), (long) 12, (long) 6); if (r2 == null) { printReport("testAdd2L FAILED"); - } else if (r2.longValue() == 18) { + } else if (r2.longValue() == testAdd2L(12, 6)) { printReport("testAdd2L PASSED"); } else { printReport("testAdd2L FAILED"); @@ -43,10 +52,10 @@ //invoke(compile("testAdd2B"), (byte) 6, (byte) 4); - Integer r4 = (Integer) invoke(compile("testAddIConst"), 5); + r4 = (Integer) invoke(compile("testAddIConst"), 5); if (r4 == null) { printReport("testAddIConst FAILED"); - } else if (r4.intValue() == 37) { + } else if (r4.intValue() == testAddIConst(5)) { printReport("testAddIConst PASSED"); } else { printReport("testAddIConst FAILED"); @@ -55,20 +64,12 @@ r4 = (Integer) invoke(compile("testAddConstI"), 7); if (r4 == null) { printReport("testAddConstI FAILED"); - } else if (r4.intValue() == 39) { + } else if (r4.intValue() == testAddConstI(7)) { printReport("testAddConstI PASSED"); } else { printReport("testAddConstI FAILED"); } - r4 = (Integer) invoke(compile("testAdd2I"), 18, 24); - if (r4 == null) { - printReport("testAdd2I FAILED"); - } else if (r4.intValue() == 42) { - printReport("testAdd2I PASSED"); - } else { - printReport("testAdd2I FAILED"); - } } public static int testAdd2I(int a, int b) { @@ -93,20 +94,21 @@ @Test public void testSub() { - Long r2 = (Long) invoke(compile("testSub2L"), (long) 12, (long) 6); - if (r2 == null) { - printReport("testSub2I FAILED (null return value)"); - } else if (r2.longValue() == 6) { + + Integer r1 = (Integer) invoke(compile("testSub2I"), 18, 4); + + if (r1 == null) { + printReport("testSub2I FAILED"); + } else if (r1.intValue() == testSub2I(18, 4)) { printReport("testSub2I PASSED"); } else { printReport("testSub2I FAILED"); } - Integer r1 = (Integer) invoke(compile("testSub2I"), 18, 4); - - if (r1 == null) { - printReport("testSub2I FAILED"); - } else if (r1.intValue() == 14) { + Long r2 = (Long) invoke(compile("testSub2L"), (long) 12, (long) 6); + if (r2 == null) { + printReport("testSub2I FAILED (null return value)"); + } else if (r2.longValue() == testSub2L(12, 6)) { printReport("testSub2I PASSED"); } else { printReport("testSub2I FAILED"); @@ -115,7 +117,7 @@ r1 = (Integer) invoke(compile("testSubIConst"), 35); if (r1 == null) { printReport("testSubIConst FAILED"); - } else if (r1.intValue() == 3) { + } else if (r1.intValue() == testSubIConst(35)) { printReport("testSubIConst PASSED"); } else { printReport("testSubIConst FAILED"); @@ -124,7 +126,7 @@ r1 = (Integer) invoke(compile("testSubConstI"), 12); if (r1 == null) { printReport("testSubConstI FAILED"); - } else if (r1.intValue() == 20) { + } else if (r1.intValue() == testSubConstI(12)) { printReport("testSubConstI PASSED"); } else { printReport("testSubConstI FAILED"); @@ -149,10 +151,42 @@ @Test public void testMul() { - invoke(compile("testMul2I"), 8, 4); - invoke(compile("testMul2L"), (long) 12, (long) 6); - invoke(compile("testMulIConst"), 4); - invoke(compile("testMulConstI"), 5); + + Integer r1 = (Integer) invoke(compile("testMul2I"), 8, 4); + if (r1 == null) { + printReport("testMul2I FAILED"); + } else if (r1.intValue() == testMul2I(8, 4)) { + printReport("testMul2I PASSED"); + } else { + printReport("testMul2I FAILED"); + } + + Long r2 = (Long) invoke(compile("testMul2L"), (long) 12, (long) 6); + if (r2 == null) { + printReport("testMul2L FAILED"); + } else if (r2.longValue() == testMul2L(12, 6)) { + printReport("testMul2L PASSED"); + } else { + printReport("testMul2L FAILED"); + } + + r1 = (Integer) invoke(compile("testMulIConst"), 4); + if (r1 == null) { + printReport("testMulIConst FAILED"); + } else if (r1.intValue() == testMulIConst(4)) { + printReport("testMulIConst PASSED"); + } else { + printReport("testMulIConst FAILED"); + } + + r1 = (Integer) invoke(compile("testMulConstI"), 5); + if (r1 == null) { + printReport("testMulConstI FAILED"); + } else if (r1.intValue() == testMulConstI(5)) { + printReport("testMulConstI PASSED"); + } else { + printReport("testMulConstI FAILED"); + } } public static int testMul2I(int a, int b) { @@ -170,12 +204,44 @@ public static int testMulConstI(int a) { return 32 * a; } + @Test public void testDiv() { - invoke(compile("testDiv2I"), 8, 4); - invoke(compile("testDiv2L"), (long) 12, (long) 6); - invoke(compile("testDivIConst"), 64); - invoke(compile("testDivConstI"), 8); + Integer r1 = (Integer) invoke(compile("testDiv2I"), 8, 4); + if (r1 == null) { + printReport("testDiv2I FAILED (null value returned)"); + } else if (r1.intValue() == testDiv2I(8, 4)) { + printReport("testDiv2I PASSED"); + } else { + printReport("testDiv2I FAILED"); + } + + Long r2 = (Long) invoke(compile("testDiv2L"), (long) 12, (long) 6); + if (r2 == null) { + printReport("testDiv2L FAILED (null value returned)"); + } else if (r2.longValue() == testDiv2L(12, 6)) { + printReport("testDiv2L PASSED"); + } else { + printReport("testDiv2L FAILED"); + } + + r1 = (Integer) invoke(compile("testDivIConst"), 64); + if (r1 == null) { + printReport("testDivIConst FAILED (null value returned)"); + } else if (r1.intValue() == testDivIConst(64)) { + printReport("testDivIConst PASSED"); + } else { + printReport("testDivIConst FAILED"); + } + + r1 = (Integer) invoke(compile("testDivConstI"), 8); + if (r1 == null) { + printReport("testDivConstI FAILED (null value returned)"); + } else if (r1.intValue() == testDivConstI(8)) { + printReport("testDivConstI PASSED"); + } else { + printReport("testDivConstI FAILED"); + } } public static int testDiv2I(int a, int b) { @@ -196,8 +262,23 @@ @Test public void testRem() { - invoke(compile("testRem2I"), 8, 4); - invoke(compile("testRem2L"), (long) 12, (long) 6); + Integer r1 = (Integer) invoke(compile("testRem2I"), 8, 4); + if (r1 == null) { + printReport("testRem2I FAILED (null value returned)"); + } else if (r1.intValue() == testRem2I(8, 4)) { + printReport("testRem2I PASSED"); + } else { + printReport("testRem2I FAILED"); + } + + Long r2 = (Long) invoke(compile("testRem2L"), (long) 12, (long) 6); + if (r2 == null) { + printReport("testRem2L FAILED (null value returned)"); + } else if (r1.longValue() == testRem2L(12, 6)) { + printReport("testRem2L PASSED"); + } else { + printReport("testRem2L FAILED"); + } } public static int testRem2I(int a, int b) { @@ -207,11 +288,27 @@ public static long testRem2L(long a, long b) { return a % b; } - + @Ignore @Test public void testIntConversion() { - invoke(compile("testI2L"), 8); - invoke(compile("testL2I"), (long) 12); + Long r1 = (Long) invoke(compile("testI2L"), 8); + if (r1 == null) { + printReport("testI2L FAILED (null value returned)"); + } else if (r1.longValue() == testI2L(8)) { + printReport("testI2L PASSED"); + } else { + printReport("testI2L FAILED"); + } + + Integer r2 = (Integer) invoke(compile("testL2I"), (long) 12); + if (r2 == null) { + printReport("testL2I FAILED (null value returned)"); + } else if (r1.longValue() == testL2I(12)) { + printReport("testL2I PASSED"); + } else { + printReport("testL2I FAILED"); + } + // invoke(compile("testI2C"), 65); // invoke(compile("testI2B"), 9); // invoke(compile("testI2F"), 17); diff -r db297343d44e -r c99e65785936 graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java --- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java Wed Sep 04 14:56:30 2013 +0200 +++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java Wed Sep 04 10:47:37 2013 -0400 @@ -174,7 +174,7 @@ } for (Integer i : signed32) { - codeBuffer.emitString(" .reg .s32 %r" + i.intValue() + ";"); + codeBuffer.emitString(".reg .s32 %r" + i.intValue() + ";"); } for (Integer i : signed64) { codeBuffer.emitString(".reg .s64 %r" + i.intValue() + ";"); diff -r db297343d44e -r c99e65785936 graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXArithmetic.java --- a/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXArithmetic.java Wed Sep 04 14:56:30 2013 +0200 +++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXArithmetic.java Wed Sep 04 10:47:37 2013 -0400 @@ -316,7 +316,7 @@ masm.cvt_f32_f64(asFloatReg(dst), asDoubleReg(src)); break; case LSHL: - masm.shl_s64(asLongReg(dst), asLongReg(dst), asIntReg(src)); + masm.shl_b64(asLongReg(dst), asLongReg(dst), asIntReg(src)); break; case LSHR: masm.shr_s64(asLongReg(dst), asLongReg(dst), asIntReg(src)); @@ -360,19 +360,19 @@ switch (opcode) { case IADD: masm.add_s32(asIntReg(dst), asIntReg(src1), tasm.asIntConst(src2)); break; case ISUB: masm.sub_s32(asIntReg(dst), asIntReg(src1), tasm.asIntConst(src2)); break; - case IMUL: masm.mul_s32(asIntReg(dst), asIntReg(src1), tasm.asIntConst(src2)); break; + case IMUL: masm.mul_lo_s32(asIntReg(dst), asIntReg(src1), tasm.asIntConst(src2)); break; case IAND: masm.and_b32(asIntReg(dst), asIntReg(src1), tasm.asIntConst(src2)); break; - case ISHL: masm.shl_s32(asIntReg(dst), asIntReg(src1), tasm.asIntConst(src2)); break; + case ISHL: masm.shl_b32_const(asIntReg(dst), asIntReg(src1), tasm.asIntConst(src2)); break; case ISHR: masm.shr_s32(asIntReg(dst), asIntReg(src1), tasm.asIntConst(src2)); break; case IUSHR: masm.shr_u32(asIntReg(dst), asIntReg(src1), tasm.asIntConst(src2)); break; case IXOR: masm.xor_b32(asIntReg(dst), asIntReg(src1), tasm.asIntConst(src2)); break; case LXOR: masm.xor_b64(asLongReg(dst), asLongReg(src1), tasm.asLongConst(src2)); break; case LUSHR: masm.shr_u64(asLongReg(dst), asLongReg(src1), tasm.asLongConst(src2)); break; case FADD: masm.add_f32(asFloatReg(dst), asFloatReg(src1), tasm.asFloatConst(src2)); break; - case FMUL: masm.mul_f32(asFloatReg(dst), asFloatReg(src1), tasm.asFloatConst(src2)); break; + case FMUL: masm.mul_lo_f32(asFloatReg(dst), asFloatReg(src1), tasm.asFloatConst(src2)); break; case FDIV: masm.div_f32(asFloatReg(dst), asFloatReg(src1), tasm.asFloatConst(src2)); break; case DADD: masm.add_f64(asDoubleReg(dst), asDoubleReg(src1), tasm.asDoubleConst(src2)); break; - case DMUL: masm.mul_f64(asDoubleReg(dst), asDoubleReg(src1), tasm.asDoubleConst(src2)); break; + case DMUL: masm.mul_lo_f64(asDoubleReg(dst), asDoubleReg(src1), tasm.asDoubleConst(src2)); break; case DDIV: masm.div_f64(asDoubleReg(dst), asDoubleReg(src1), tasm.asDoubleConst(src2)); break; default: throw GraalInternalError.shouldNotReachHere(); @@ -387,34 +387,34 @@ // case D: new Mul(Double, dst, src1, src2); case IADD: masm.add_s32(asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; case ISUB: masm.sub_s32(asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; - case IMUL: masm.mul_s32(asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; + case IMUL: masm.mul_lo_s32(asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; case IDIV: masm.div_s32(asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; case IAND: masm.and_b32(asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; case IOR: masm.or_b32(asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; case IXOR: masm.xor_b32(asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; - case ISHL: masm.shl_s32(asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; + case ISHL: masm.shl_b32(asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; case ISHR: masm.shr_s32(asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; case IUSHR: masm.shr_u32(asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; case IREM: masm.rem_s32(asIntReg(dst), asIntReg(src1), asIntReg(src2)); break; case LADD: masm.add_s64(asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; case LSUB: masm.sub_s64(asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; - case LMUL: masm.mul_s64(asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; + case LMUL: masm.mul_lo_s64(asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; case LDIV: masm.div_s64(asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; case LAND: masm.and_b64(asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; - case LOR: masm.or_b64(asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; + case LOR: masm.or_b64(asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; case LXOR: masm.xor_b64(asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; - case LSHL: masm.shl_s64(asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; + case LSHL: masm.shl_b64(asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; case LSHR: masm.shr_s64(asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; case LUSHR: masm.shr_u64(asLongReg(dst), asLongReg(src1), asIntReg(src2)); break; case LREM: masm.rem_s64(asLongReg(dst), asLongReg(src1), asLongReg(src2)); break; case FADD: masm.add_f32(asFloatReg(dst), asFloatReg(src1), asFloatReg(src2)); break; case FSUB: masm.sub_f32(asFloatReg(dst), asFloatReg(src1), asFloatReg(src2)); break; - case FMUL: masm.mul_f32(asFloatReg(dst), asFloatReg(src1), asFloatReg(src2)); break; + case FMUL: masm.mul_lo_f32(asFloatReg(dst), asFloatReg(src1), asFloatReg(src2)); break; case FDIV: masm.div_f32(asFloatReg(dst), asFloatReg(src1), asFloatReg(src2)); break; case FREM: masm.div_f32(asFloatReg(dst), asFloatReg(src1), asFloatReg(src2)); break; case DADD: masm.add_f64(asDoubleReg(dst), asDoubleReg(src1), asDoubleReg(src2)); break; case DSUB: masm.sub_f64(asDoubleReg(dst), asDoubleReg(src1), asDoubleReg(src2)); break; - case DMUL: masm.mul_f64(asDoubleReg(dst), asDoubleReg(src1), asDoubleReg(src2)); break; + case DMUL: masm.mul_lo_f64(asDoubleReg(dst), asDoubleReg(src1), asDoubleReg(src2)); break; case DDIV: masm.div_f64(asDoubleReg(dst), asDoubleReg(src1), asDoubleReg(src2)); break; case DREM: masm.div_f64(asDoubleReg(dst), asDoubleReg(src1), asDoubleReg(src2)); break; default: diff -r db297343d44e -r c99e65785936 graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMemOp.java --- a/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMemOp.java Wed Sep 04 14:56:30 2013 +0200 +++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMemOp.java Wed Sep 04 10:47:37 2013 -0400 @@ -158,7 +158,7 @@ masm.ld_from_state_space(".param.s16", asRegister(result), addr.getBase(), addr.getDisplacement()); break; case Char: - masm.ld_from_state_space(".param.s16", asRegister(result), addr.getBase(), addr.getDisplacement()); + masm.ld_from_state_space(".param.u16", asRegister(result), addr.getBase(), addr.getDisplacement()); break; case Int: masm.ld_from_state_space(".param.s32", asRegister(result), addr.getBase(), addr.getDisplacement()); diff -r db297343d44e -r c99e65785936 graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXParameterOp.java --- a/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXParameterOp.java Wed Sep 04 14:56:30 2013 +0200 +++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXParameterOp.java Wed Sep 04 10:47:37 2013 -0400 @@ -42,23 +42,23 @@ @Override public void emitCode(TargetMethodAssembler tasm) { - PTXAssembler ptxasm = (PTXAssembler) tasm.asm; + PTXAssembler masm = (PTXAssembler) tasm.asm; // Emit parameter directives for arguments int argCount = params.length; for (int i = 0; i < argCount; i++) { Kind paramKind = params[i].getKind(); switch (paramKind) { case Int : - ptxasm.param_32_decl(asIntReg(params[i]), (i == (argCount - 1))); + masm.param_32_decl(asIntReg(params[i]), (i == (argCount - 1))); break; case Long : - ptxasm.param_64_decl(asLongReg(params[i]), (i == (argCount - 1))); + masm.param_64_decl(asLongReg(params[i]), (i == (argCount - 1))); break; case Float : - ptxasm.param_32_decl(asFloatReg(params[i]), (i == (argCount - 1))); + masm.param_32_decl(asFloatReg(params[i]), (i == (argCount - 1))); break; case Double : - ptxasm.param_64_decl(asDoubleReg(params[i]), (i == (argCount - 1))); + masm.param_64_decl(asDoubleReg(params[i]), (i == (argCount - 1))); break; default : throw GraalInternalError.shouldNotReachHere("unhandled parameter type " + paramKind.toString()); diff -r db297343d44e -r c99e65785936 src/gpu/ptx/vm/gpu_ptx.cpp --- a/src/gpu/ptx/vm/gpu_ptx.cpp Wed Sep 04 14:56:30 2013 +0200 +++ b/src/gpu/ptx/vm/gpu_ptx.cpp Wed Sep 04 10:47:37 2013 -0400 @@ -38,6 +38,7 @@ gpu::Ptx::cuda_cu_ctx_create_func_t gpu::Ptx::_cuda_cu_ctx_create; gpu::Ptx::cuda_cu_ctx_destroy_func_t gpu::Ptx::_cuda_cu_ctx_destroy; gpu::Ptx::cuda_cu_ctx_synchronize_func_t gpu::Ptx::_cuda_cu_ctx_synchronize; +gpu::Ptx::cuda_cu_ctx_set_current_func_t gpu::Ptx::_cuda_cu_ctx_set_current; gpu::Ptx::cuda_cu_device_get_count_func_t gpu::Ptx::_cuda_cu_device_get_count; gpu::Ptx::cuda_cu_device_get_name_func_t gpu::Ptx::_cuda_cu_device_get_name; gpu::Ptx::cuda_cu_device_get_func_t gpu::Ptx::_cuda_cu_device_get; @@ -87,7 +88,7 @@ tty->print_cr("Failed to initialize CUDA device"); return false; } - + if (TraceGPUInteraction) { tty->print_cr("CUDA driver initialization: Success"); } @@ -108,7 +109,7 @@ if (TraceGPUInteraction) { tty->print_cr("[CUDA] Number of compute-capable devices found: %d", device_count); } - + /* Get the handle to the first compute device */ int device_id = 0; /* Compute-capable device handle */ @@ -195,12 +196,6 @@ jit_options[2] = GRAAL_CU_JIT_MAX_REGISTERS; jit_option_values[2] = (void *)(size_t)jit_register_count; - if (TraceGPUInteraction) { - tty->print_cr("[CUDA] PTX Kernel\n%s", code); - tty->print_cr("[CUDA] Function name : %s", name); - - } - /* Create CUDA context to compile and execute the kernel */ int status = _cuda_cu_ctx_create(&_device_context, 0, _cu_device); @@ -213,6 +208,23 @@ tty->print_cr("[CUDA] Success: Created context for device: %d", _cu_device); } + status = _cuda_cu_ctx_set_current(_device_context); + + if (status != GRAAL_CUDA_SUCCESS) { + tty->print_cr("[CUDA] Failed to set current context for device: %d", _cu_device); + return NULL; + } + + if (TraceGPUInteraction) { + tty->print_cr("[CUDA] Success: Set current context for device: %d", _cu_device); + } + + if (TraceGPUInteraction) { + tty->print_cr("[CUDA] PTX Kernel\n%s", code); + tty->print_cr("[CUDA] Function name : %s", name); + + } + /* Load module's data with compiler options */ status = _cuda_cu_module_load_data_ex(&cu_module, (void*) code, jit_num_options, jit_options, (void **)jit_option_values); @@ -220,7 +232,7 @@ if (status == GRAAL_CUDA_ERROR_NO_BINARY_FOR_GPU) { tty->print_cr("[CUDA] Check for malformed PTX kernel or incorrect PTX compilation options"); } - tty->print_cr("[CUDA] *** Error (%d) Failed to load module data with online compiler options for method %s", + tty->print_cr("[CUDA] *** Error (%d) Failed to load module data with online compiler options for method %s", status, name); return NULL; } @@ -255,7 +267,7 @@ unsigned int blockX = 1; unsigned int blockY = 1; unsigned int blockZ = 1; - + struct CUfunc_st* cu_function = (struct CUfunc_st*) kernel; void * config[5] = { @@ -366,7 +378,7 @@ if (cuda_library_name != NULL) { char *buffer = (char*)malloc(STD_BUFFER_SIZE); void *handle = os::dll_load(cuda_library_name, buffer, STD_BUFFER_SIZE); - free(buffer); + free(buffer); if (handle != NULL) { _cuda_cu_init = CAST_TO_FN_PTR(cuda_cu_init_func_t, os::dll_lookup(handle, "cuInit")); @@ -376,6 +388,8 @@ CAST_TO_FN_PTR(cuda_cu_ctx_destroy_func_t, os::dll_lookup(handle, "cuCtxDestroy")); _cuda_cu_ctx_synchronize = CAST_TO_FN_PTR(cuda_cu_ctx_synchronize_func_t, os::dll_lookup(handle, "cuCtxSynchronize")); + _cuda_cu_ctx_set_current = + CAST_TO_FN_PTR(cuda_cu_ctx_set_current_func_t, os::dll_lookup(handle, "cuCtxSetCurrent")); _cuda_cu_device_get_count = CAST_TO_FN_PTR(cuda_cu_device_get_count_func_t, os::dll_lookup(handle, "cuDeviceGetCount")); _cuda_cu_device_get_name = @@ -416,4 +430,3 @@ tty->print_cr("Failed to find CUDA linkage"); return false; } - diff -r db297343d44e -r c99e65785936 src/gpu/ptx/vm/gpu_ptx.hpp --- a/src/gpu/ptx/vm/gpu_ptx.hpp Wed Sep 04 14:56:30 2013 +0200 +++ b/src/gpu/ptx/vm/gpu_ptx.hpp Wed Sep 04 10:47:37 2013 -0400 @@ -87,6 +87,7 @@ typedef int (*cuda_cu_ctx_create_func_t)(void*, int, int); typedef int (*cuda_cu_ctx_destroy_func_t)(void*); typedef int (*cuda_cu_ctx_synchronize_func_t)(void); + typedef int (*cuda_cu_ctx_set_current_func_t)(void*); typedef int (*cuda_cu_device_get_count_func_t)(int*); typedef int (*cuda_cu_device_get_name_func_t)(char*, int, int); typedef int (*cuda_cu_device_get_func_t)(int*, int); @@ -98,7 +99,7 @@ unsigned int, void*, void**, void**); typedef int (*cuda_cu_module_get_function_func_t)(void*, void*, const char*); typedef int (*cuda_cu_module_load_data_ex_func_t)(void*, void*, unsigned int, void*, void**); - typedef int (*cuda_cu_memalloc_func_t)(void*, unsigned int); + typedef int (*cuda_cu_memalloc_func_t)(void*, size_t); typedef int (*cuda_cu_memfree_func_t)(gpu::Ptx::CUdeviceptr); typedef int (*cuda_cu_memcpy_htod_func_t)(gpu::Ptx::CUdeviceptr, const void*, unsigned int); typedef int (*cuda_cu_memcpy_dtoh_func_t)(const void*, gpu::Ptx::CUdeviceptr, unsigned int); @@ -120,6 +121,7 @@ static cuda_cu_memfree_func_t _cuda_cu_memfree; static cuda_cu_memcpy_htod_func_t _cuda_cu_memcpy_htod; static cuda_cu_memcpy_dtoh_func_t _cuda_cu_memcpy_dtoh; + static cuda_cu_ctx_set_current_func_t _cuda_cu_ctx_set_current; protected: static void* _device_context; diff -r db297343d44e -r c99e65785936 src/gpu/ptx/vm/kernelArguments.hpp --- a/src/gpu/ptx/vm/kernelArguments.hpp Wed Sep 04 14:56:30 2013 +0200 +++ b/src/gpu/ptx/vm/kernelArguments.hpp Wed Sep 04 10:47:37 2013 -0400 @@ -56,6 +56,7 @@ _args = args; _success = true; _bufferOffset = 0; + _return_value_ptr = 0; if (!is_static) { // TODO : Create a device argument for receiver object and add it to _kernelBuffer tty->print_cr("{CUDA] ****** TODO: Support for execution of non-static java methods not implemented yet.");