Mercurial > hg > graal-compiler
comparison src/share/vm/opto/superword.cpp @ 6614:006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
Summary: Added asm encoding and mach nodes for vector arithmetic instructions on x86.
Reviewed-by: roland
author | kvn |
---|---|
date | Mon, 20 Aug 2012 09:07:21 -0700 |
parents | 6f8f439e247d |
children | 4b0d6fd74911 |
comparison
equal
deleted
inserted
replaced
6594:d5ec46c7da5c | 6614:006050192a5a |
---|---|
1355 vn = StoreVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen); | 1355 vn = StoreVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen); |
1356 } else if (n->req() == 3) { | 1356 } else if (n->req() == 3) { |
1357 // Promote operands to vector | 1357 // Promote operands to vector |
1358 Node* in1 = vector_opd(p, 1); | 1358 Node* in1 = vector_opd(p, 1); |
1359 Node* in2 = vector_opd(p, 2); | 1359 Node* in2 = vector_opd(p, 2); |
1360 if (VectorNode::is_invariant_vector(in1) && (n->is_Add() || n->is_Mul())) { | |
1361 // Move invariant vector input into second position to avoid register spilling. | |
1362 Node* tmp = in1; | |
1363 in1 = in2; | |
1364 in2 = tmp; | |
1365 } | |
1360 vn = VectorNode::make(_phase->C, opc, in1, in2, vlen, velt_basic_type(n)); | 1366 vn = VectorNode::make(_phase->C, opc, in1, in2, vlen, velt_basic_type(n)); |
1361 } else { | 1367 } else { |
1362 ShouldNotReachHere(); | 1368 ShouldNotReachHere(); |
1363 } | 1369 } |
1364 assert(vn != NULL, "sanity"); | 1370 assert(vn != NULL, "sanity"); |
1397 } | 1403 } |
1398 | 1404 |
1399 if (same_opd) { | 1405 if (same_opd) { |
1400 if (opd->is_Vector() || opd->is_LoadVector()) { | 1406 if (opd->is_Vector() || opd->is_LoadVector()) { |
1401 return opd; // input is matching vector | 1407 return opd; // input is matching vector |
1408 } | |
1409 if ((opd_idx == 2) && VectorNode::is_shift(p0)) { | |
1410 // No vector is needed for shift count. | |
1411 // Vector instructions do not mask shift count, do it here. | |
1412 Compile* C = _phase->C; | |
1413 Node* cnt = opd; | |
1414 juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1); | |
1415 const TypeInt* t = opd->find_int_type(); | |
1416 if (t != NULL && t->is_con()) { | |
1417 juint shift = t->get_con(); | |
1418 if (shift > mask) { // Unsigned cmp | |
1419 cnt = ConNode::make(C, TypeInt::make(shift & mask)); | |
1420 } | |
1421 } else { | |
1422 if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) { | |
1423 cnt = ConNode::make(C, TypeInt::make(mask)); | |
1424 _phase->_igvn.register_new_node_with_optimizer(cnt); | |
1425 cnt = new (C, 3) AndINode(opd, cnt); | |
1426 _phase->_igvn.register_new_node_with_optimizer(cnt); | |
1427 _phase->set_ctrl(cnt, _phase->get_ctrl(opd)); | |
1428 } | |
1429 assert(opd->bottom_type()->isa_int(), "int type only"); | |
1430 // Move non constant shift count into XMM register. | |
1431 cnt = new (_phase->C, 2) MoveI2FNode(cnt); | |
1432 } | |
1433 if (cnt != opd) { | |
1434 _phase->_igvn.register_new_node_with_optimizer(cnt); | |
1435 _phase->set_ctrl(cnt, _phase->get_ctrl(opd)); | |
1436 } | |
1437 return cnt; | |
1402 } | 1438 } |
1403 assert(!opd->is_StoreVector(), "such vector is not expected here"); | 1439 assert(!opd->is_StoreVector(), "such vector is not expected here"); |
1404 // Convert scalar input to vector with the same number of elements as | 1440 // Convert scalar input to vector with the same number of elements as |
1405 // p0's vector. Use p0's type because size of operand's container in | 1441 // p0's vector. Use p0's type because size of operand's container in |
1406 // vector should match p0's size regardless operand's size. | 1442 // vector should match p0's size regardless operand's size. |
1716 // Propagate narrowed type backwards through operations | 1752 // Propagate narrowed type backwards through operations |
1717 // that don't depend on higher order bits | 1753 // that don't depend on higher order bits |
1718 for (int i = _block.length() - 1; i >= 0; i--) { | 1754 for (int i = _block.length() - 1; i >= 0; i--) { |
1719 Node* n = _block.at(i); | 1755 Node* n = _block.at(i); |
1720 // Only integer types need be examined | 1756 // Only integer types need be examined |
1721 if (n->bottom_type()->isa_int()) { | 1757 const Type* vt = velt_type(n); |
1758 if (vt->basic_type() == T_INT) { | |
1722 uint start, end; | 1759 uint start, end; |
1723 vector_opd_range(n, &start, &end); | 1760 vector_opd_range(n, &start, &end); |
1724 const Type* vt = velt_type(n); | 1761 const Type* vt = velt_type(n); |
1725 | 1762 |
1726 for (uint j = start; j < end; j++) { | 1763 for (uint j = start; j < end; j++) { |
1727 Node* in = n->in(j); | 1764 Node* in = n->in(j); |
1728 // Don't propagate through a type conversion | 1765 // Don't propagate through a memory |
1729 if (n->bottom_type() != in->bottom_type()) | 1766 if (!in->is_Mem() && in_bb(in) && velt_type(in)->basic_type() == T_INT && |
1730 continue; | 1767 data_size(n) < data_size(in)) { |
1731 switch(in->Opcode()) { | 1768 bool same_type = true; |
1732 case Op_AddI: case Op_AddL: | 1769 for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) { |
1733 case Op_SubI: case Op_SubL: | 1770 Node *use = in->fast_out(k); |
1734 case Op_MulI: case Op_MulL: | 1771 if (!in_bb(use) || !same_velt_type(use, n)) { |
1735 case Op_AndI: case Op_AndL: | 1772 same_type = false; |
1736 case Op_OrI: case Op_OrL: | 1773 break; |
1737 case Op_XorI: case Op_XorL: | |
1738 case Op_LShiftI: case Op_LShiftL: | |
1739 case Op_CMoveI: case Op_CMoveL: | |
1740 if (in_bb(in)) { | |
1741 bool same_type = true; | |
1742 for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) { | |
1743 Node *use = in->fast_out(k); | |
1744 if (!in_bb(use) || !same_velt_type(use, n)) { | |
1745 same_type = false; | |
1746 break; | |
1747 } | |
1748 } | 1774 } |
1749 if (same_type) { | 1775 } |
1750 set_velt_type(in, vt); | 1776 if (same_type) { |
1751 } | 1777 set_velt_type(in, vt); |
1752 } | 1778 } |
1753 } | 1779 } |
1754 } | 1780 } |
1755 } | 1781 } |
1756 } | 1782 } |
1790 if (n->is_Mem()) { | 1816 if (n->is_Mem()) { |
1791 return Type::get_const_basic_type(n->as_Mem()->memory_type()); | 1817 return Type::get_const_basic_type(n->as_Mem()->memory_type()); |
1792 } | 1818 } |
1793 const Type* t = _igvn.type(n); | 1819 const Type* t = _igvn.type(n); |
1794 if (t->basic_type() == T_INT) { | 1820 if (t->basic_type() == T_INT) { |
1795 if (t->higher_equal(TypeInt::BOOL)) return TypeInt::BOOL; | 1821 // A narrow type of arithmetic operations will be determined by |
1796 if (t->higher_equal(TypeInt::BYTE)) return TypeInt::BYTE; | 1822 // propagating the type of memory operations. |
1797 if (t->higher_equal(TypeInt::CHAR)) return TypeInt::CHAR; | |
1798 if (t->higher_equal(TypeInt::SHORT)) return TypeInt::SHORT; | |
1799 return TypeInt::INT; | 1823 return TypeInt::INT; |
1800 } | 1824 } |
1801 return t; | 1825 return t; |
1802 } | 1826 } |
1803 | 1827 |
1938 | 1962 |
1939 // Given: | 1963 // Given: |
1940 // lim0 == original pre loop limit | 1964 // lim0 == original pre loop limit |
1941 // V == v_align (power of 2) | 1965 // V == v_align (power of 2) |
1942 // invar == extra invariant piece of the address expression | 1966 // invar == extra invariant piece of the address expression |
1943 // e == k [ +/- invar ] | 1967 // e == offset [ +/- invar ] |
1944 // | 1968 // |
1945 // When reassociating expressions involving '%' the basic rules are: | 1969 // When reassociating expressions involving '%' the basic rules are: |
1946 // (a - b) % k == 0 => a % k == b % k | 1970 // (a - b) % k == 0 => a % k == b % k |
1947 // and: | 1971 // and: |
1948 // (a + b) % k == 0 => a % k == (k - b) % k | 1972 // (a + b) % k == 0 => a % k == (k - b) % k |
1991 int stride = iv_stride(); | 2015 int stride = iv_stride(); |
1992 int scale = align_to_ref_p.scale_in_bytes(); | 2016 int scale = align_to_ref_p.scale_in_bytes(); |
1993 int elt_size = align_to_ref_p.memory_size(); | 2017 int elt_size = align_to_ref_p.memory_size(); |
1994 int v_align = vw / elt_size; | 2018 int v_align = vw / elt_size; |
1995 assert(v_align > 1, "sanity"); | 2019 assert(v_align > 1, "sanity"); |
1996 int k = align_to_ref_p.offset_in_bytes() / elt_size; | 2020 int offset = align_to_ref_p.offset_in_bytes() / elt_size; |
1997 | 2021 Node *offsn = _igvn.intcon(offset); |
1998 Node *kn = _igvn.intcon(k); | 2022 |
1999 | 2023 Node *e = offsn; |
2000 Node *e = kn; | |
2001 if (align_to_ref_p.invar() != NULL) { | 2024 if (align_to_ref_p.invar() != NULL) { |
2002 // incorporate any extra invariant piece producing k +/- invar >>> log2(elt) | 2025 // incorporate any extra invariant piece producing (offset +/- invar) >>> log2(elt) |
2003 Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); | 2026 Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); |
2004 Node* aref = new (_phase->C, 3) URShiftINode(align_to_ref_p.invar(), log2_elt); | 2027 Node* aref = new (_phase->C, 3) URShiftINode(align_to_ref_p.invar(), log2_elt); |
2005 _phase->_igvn.register_new_node_with_optimizer(aref); | 2028 _phase->_igvn.register_new_node_with_optimizer(aref); |
2006 _phase->set_ctrl(aref, pre_ctrl); | 2029 _phase->set_ctrl(aref, pre_ctrl); |
2007 if (align_to_ref_p.negate_invar()) { | 2030 if (align_to_ref_p.negate_invar()) { |
2012 _phase->_igvn.register_new_node_with_optimizer(e); | 2035 _phase->_igvn.register_new_node_with_optimizer(e); |
2013 _phase->set_ctrl(e, pre_ctrl); | 2036 _phase->set_ctrl(e, pre_ctrl); |
2014 } | 2037 } |
2015 if (vw > ObjectAlignmentInBytes) { | 2038 if (vw > ObjectAlignmentInBytes) { |
2016 // incorporate base e +/- base && Mask >>> log2(elt) | 2039 // incorporate base e +/- base && Mask >>> log2(elt) |
2017 Node* mask = _igvn.MakeConX(~(-1 << exact_log2(vw))); | |
2018 Node* xbase = new(_phase->C, 2) CastP2XNode(NULL, align_to_ref_p.base()); | 2040 Node* xbase = new(_phase->C, 2) CastP2XNode(NULL, align_to_ref_p.base()); |
2019 _phase->_igvn.register_new_node_with_optimizer(xbase); | 2041 _phase->_igvn.register_new_node_with_optimizer(xbase); |
2020 Node* masked_xbase = new (_phase->C, 3) AndXNode(xbase, mask); | 2042 #ifdef _LP64 |
2043 xbase = new (_phase->C, 2) ConvL2INode(xbase); | |
2044 _phase->_igvn.register_new_node_with_optimizer(xbase); | |
2045 #endif | |
2046 Node* mask = _igvn.intcon(vw-1); | |
2047 Node* masked_xbase = new (_phase->C, 3) AndINode(xbase, mask); | |
2021 _phase->_igvn.register_new_node_with_optimizer(masked_xbase); | 2048 _phase->_igvn.register_new_node_with_optimizer(masked_xbase); |
2022 #ifdef _LP64 | |
2023 masked_xbase = new (_phase->C, 2) ConvL2INode(masked_xbase); | |
2024 _phase->_igvn.register_new_node_with_optimizer(masked_xbase); | |
2025 #endif | |
2026 Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); | 2049 Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); |
2027 Node* bref = new (_phase->C, 3) URShiftINode(masked_xbase, log2_elt); | 2050 Node* bref = new (_phase->C, 3) URShiftINode(masked_xbase, log2_elt); |
2028 _phase->_igvn.register_new_node_with_optimizer(bref); | 2051 _phase->_igvn.register_new_node_with_optimizer(bref); |
2029 _phase->set_ctrl(bref, pre_ctrl); | 2052 _phase->set_ctrl(bref, pre_ctrl); |
2030 e = new (_phase->C, 3) AddINode(e, bref); | 2053 e = new (_phase->C, 3) AddINode(e, bref); |