Mercurial > hg > graal-compiler
comparison src/cpu/x86/vm/templateInterpreter_x86_64.cpp @ 11080:b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
Summary: add intrinsics using new instruction to interpreter, C1, C2, for suitable x86; add test
Reviewed-by: kvn, twisti
author | drchase |
---|---|
date | Tue, 02 Jul 2013 20:42:12 -0400 |
parents | 603ca7e51354 |
children | 6b0fd0964b87 ca0165daa6ec afbe18ae0905 |
comparison
equal
deleted
inserted
replaced
11079:738e04fb1232 | 11080:b800986664f4 |
---|---|
838 // If G1 is not enabled then attempt to go through the accessor entry point | 838 // If G1 is not enabled then attempt to go through the accessor entry point |
839 // Reference.get is an accessor | 839 // Reference.get is an accessor |
840 return generate_accessor_entry(); | 840 return generate_accessor_entry(); |
841 } | 841 } |
842 | 842 |
843 /** | |
844 * Method entry for static native methods: | |
845 * int java.util.zip.CRC32.update(int crc, int b) | |
846 */ | |
847 address InterpreterGenerator::generate_CRC32_update_entry() { | |
848 if (UseCRC32Intrinsics) { | |
849 address entry = __ pc(); | |
850 | |
851 // rbx,: Method* | |
852 // rsi: senderSP must preserved for slow path, set SP to it on fast path | |
853 // rdx: scratch | |
854 // rdi: scratch | |
855 | |
856 Label slow_path; | |
857 // If we need a safepoint check, generate full interpreter entry. | |
858 ExternalAddress state(SafepointSynchronize::address_of_state()); | |
859 __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()), | |
860 SafepointSynchronize::_not_synchronized); | |
861 __ jcc(Assembler::notEqual, slow_path); | |
862 | |
863 // We don't generate local frame and don't align stack because | |
864 // we call stub code and there is no safepoint on this path. | |
865 | |
866 // Load parameters | |
867 const Register crc = rax; // crc | |
868 const Register val = rdx; // source java byte value | |
869 const Register tbl = rdi; // scratch | |
870 | |
871 // Arguments are reversed on java expression stack | |
872 __ movl(val, Address(rsp, wordSize)); // byte value | |
873 __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC | |
874 | |
875 __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr())); | |
876 __ notl(crc); // ~crc | |
877 __ update_byte_crc32(crc, val, tbl); | |
878 __ notl(crc); // ~crc | |
879 // result in rax | |
880 | |
881 // _areturn | |
882 __ pop(rdi); // get return address | |
883 __ mov(rsp, rsi); // set sp to sender sp | |
884 __ jmp(rdi); | |
885 | |
886 // generate a vanilla native entry as the slow path | |
887 __ bind(slow_path); | |
888 | |
889 (void) generate_native_entry(false); | |
890 | |
891 return entry; | |
892 } | |
893 return generate_native_entry(false); | |
894 } | |
895 | |
896 /** | |
897 * Method entry for static native methods: | |
898 * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) | |
899 * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) | |
900 */ | |
901 address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { | |
902 if (UseCRC32Intrinsics) { | |
903 address entry = __ pc(); | |
904 | |
905 // rbx,: Method* | |
906 // r13: senderSP must preserved for slow path, set SP to it on fast path | |
907 | |
908 Label slow_path; | |
909 // If we need a safepoint check, generate full interpreter entry. | |
910 ExternalAddress state(SafepointSynchronize::address_of_state()); | |
911 __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()), | |
912 SafepointSynchronize::_not_synchronized); | |
913 __ jcc(Assembler::notEqual, slow_path); | |
914 | |
915 // We don't generate local frame and don't align stack because | |
916 // we call stub code and there is no safepoint on this path. | |
917 | |
918 // Load parameters | |
919 const Register crc = c_rarg0; // crc | |
920 const Register buf = c_rarg1; // source java byte array address | |
921 const Register len = c_rarg2; // length | |
922 | |
923 // Arguments are reversed on java expression stack | |
924 __ movl(len, Address(rsp, wordSize)); // Length | |
925 // Calculate address of start element | |
926 if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { | |
927 __ movptr(buf, Address(rsp, 3*wordSize)); // long buf | |
928 __ addptr(buf, Address(rsp, 2*wordSize)); // + offset | |
929 __ movl(crc, Address(rsp, 5*wordSize)); // Initial CRC | |
930 } else { | |
931 __ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array | |
932 __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size | |
933 __ addptr(buf, Address(rsp, 2*wordSize)); // + offset | |
934 __ movl(crc, Address(rsp, 4*wordSize)); // Initial CRC | |
935 } | |
936 | |
937 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len); | |
938 // result in rax | |
939 | |
940 // _areturn | |
941 __ pop(rdi); // get return address | |
942 __ mov(rsp, r13); // set sp to sender sp | |
943 __ jmp(rdi); | |
944 | |
945 // generate a vanilla native entry as the slow path | |
946 __ bind(slow_path); | |
947 | |
948 (void) generate_native_entry(false); | |
949 | |
950 return entry; | |
951 } | |
952 return generate_native_entry(false); | |
953 } | |
843 | 954 |
844 // Interpreter stub for calling a native method. (asm interpreter) | 955 // Interpreter stub for calling a native method. (asm interpreter) |
845 // This sets up a somewhat different looking stack for calling the | 956 // This sets up a somewhat different looking stack for calling the |
846 // native method than the typical interpreter frame setup. | 957 // native method than the typical interpreter frame setup. |
847 address InterpreterGenerator::generate_native_entry(bool synchronized) { | 958 address InterpreterGenerator::generate_native_entry(bool synchronized) { |
1508 address AbstractInterpreterGenerator::generate_method_entry( | 1619 address AbstractInterpreterGenerator::generate_method_entry( |
1509 AbstractInterpreter::MethodKind kind) { | 1620 AbstractInterpreter::MethodKind kind) { |
1510 // determine code generation flags | 1621 // determine code generation flags |
1511 bool synchronized = false; | 1622 bool synchronized = false; |
1512 address entry_point = NULL; | 1623 address entry_point = NULL; |
1624 InterpreterGenerator* ig_this = (InterpreterGenerator*)this; | |
1513 | 1625 |
1514 switch (kind) { | 1626 switch (kind) { |
1515 case Interpreter::zerolocals : break; | 1627 case Interpreter::zerolocals : break; |
1516 case Interpreter::zerolocals_synchronized: synchronized = true; break; | 1628 case Interpreter::zerolocals_synchronized: synchronized = true; break; |
1517 case Interpreter::native : entry_point = ((InterpreterGenerator*)this)->generate_native_entry(false); break; | 1629 case Interpreter::native : entry_point = ig_this->generate_native_entry(false); break; |
1518 case Interpreter::native_synchronized : entry_point = ((InterpreterGenerator*)this)->generate_native_entry(true); break; | 1630 case Interpreter::native_synchronized : entry_point = ig_this->generate_native_entry(true); break; |
1519 case Interpreter::empty : entry_point = ((InterpreterGenerator*)this)->generate_empty_entry(); break; | 1631 case Interpreter::empty : entry_point = ig_this->generate_empty_entry(); break; |
1520 case Interpreter::accessor : entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry(); break; | 1632 case Interpreter::accessor : entry_point = ig_this->generate_accessor_entry(); break; |
1521 case Interpreter::abstract : entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry(); break; | 1633 case Interpreter::abstract : entry_point = ig_this->generate_abstract_entry(); break; |
1522 | 1634 |
1523 case Interpreter::java_lang_math_sin : // fall thru | 1635 case Interpreter::java_lang_math_sin : // fall thru |
1524 case Interpreter::java_lang_math_cos : // fall thru | 1636 case Interpreter::java_lang_math_cos : // fall thru |
1525 case Interpreter::java_lang_math_tan : // fall thru | 1637 case Interpreter::java_lang_math_tan : // fall thru |
1526 case Interpreter::java_lang_math_abs : // fall thru | 1638 case Interpreter::java_lang_math_abs : // fall thru |
1527 case Interpreter::java_lang_math_log : // fall thru | 1639 case Interpreter::java_lang_math_log : // fall thru |
1528 case Interpreter::java_lang_math_log10 : // fall thru | 1640 case Interpreter::java_lang_math_log10 : // fall thru |
1529 case Interpreter::java_lang_math_sqrt : // fall thru | 1641 case Interpreter::java_lang_math_sqrt : // fall thru |
1530 case Interpreter::java_lang_math_pow : // fall thru | 1642 case Interpreter::java_lang_math_pow : // fall thru |
1531 case Interpreter::java_lang_math_exp : entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind); break; | 1643 case Interpreter::java_lang_math_exp : entry_point = ig_this->generate_math_entry(kind); break; |
1532 case Interpreter::java_lang_ref_reference_get | 1644 case Interpreter::java_lang_ref_reference_get |
1533 : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break; | 1645 : entry_point = ig_this->generate_Reference_get_entry(); break; |
1646 case Interpreter::java_util_zip_CRC32_update | |
1647 : entry_point = ig_this->generate_CRC32_update_entry(); break; | |
1648 case Interpreter::java_util_zip_CRC32_updateBytes | |
1649 : // fall thru | |
1650 case Interpreter::java_util_zip_CRC32_updateByteBuffer | |
1651 : entry_point = ig_this->generate_CRC32_updateBytes_entry(kind); break; | |
1534 default: | 1652 default: |
1535 fatal(err_msg("unexpected method kind: %d", kind)); | 1653 fatal(err_msg("unexpected method kind: %d", kind)); |
1536 break; | 1654 break; |
1537 } | 1655 } |
1538 | 1656 |
1539 if (entry_point) { | 1657 if (entry_point) { |
1540 return entry_point; | 1658 return entry_point; |
1541 } | 1659 } |
1542 | 1660 |
1543 return ((InterpreterGenerator*) this)-> | 1661 return ig_this->generate_normal_entry(synchronized); |
1544 generate_normal_entry(synchronized); | |
1545 } | 1662 } |
1546 | 1663 |
1547 // These should never be compiled since the interpreter will prefer | 1664 // These should never be compiled since the interpreter will prefer |
1548 // the compiled version to the intrinsic version. | 1665 // the compiled version to the intrinsic version. |
1549 bool AbstractInterpreter::can_be_compiled(methodHandle m) { | 1666 bool AbstractInterpreter::can_be_compiled(methodHandle m) { |