Mercurial > hg > graal-jvmci-8
comparison src/share/vm/opto/macro.cpp @ 420:a1980da045cc
6462850: generate biased locking code in C2 ideal graph
Summary: Inline biased locking code in C2 ideal graph during macro nodes expansion
Reviewed-by: never
author | kvn |
---|---|
date | Fri, 07 Nov 2008 09:29:38 -0800 |
parents | f8199438385b |
children | 87559db65269 |
comparison
equal
deleted
inserted
replaced
419:0bf25c4807f9 | 420:a1980da045cc |
---|---|
80 jvms->set_scloff(jvms->scloff()+jvms_adj); | 80 jvms->set_scloff(jvms->scloff()+jvms_adj); |
81 jvms->set_endoff(jvms->endoff()+jvms_adj); | 81 jvms->set_endoff(jvms->endoff()+jvms_adj); |
82 } | 82 } |
83 } | 83 } |
84 | 84 |
85 Node* PhaseMacroExpand::opt_iff(Node* region, Node* iff) { | 85 Node* PhaseMacroExpand::opt_bits_test(Node* ctrl, Node* region, int edge, Node* word, int mask, int bits, bool return_fast_path) { |
86 IfNode *opt_iff = transform_later(iff)->as_If(); | 86 Node* cmp; |
87 | 87 if (mask != 0) { |
88 // Fast path taken; set region slot 2 | 88 Node* and_node = transform_later(new (C, 3) AndXNode(word, MakeConX(mask))); |
89 Node *fast_taken = transform_later( new (C, 1) IfFalseNode(opt_iff) ); | 89 cmp = transform_later(new (C, 3) CmpXNode(and_node, MakeConX(bits))); |
90 region->init_req(2,fast_taken); // Capture fast-control | 90 } else { |
91 cmp = word; | |
92 } | |
93 Node* bol = transform_later(new (C, 2) BoolNode(cmp, BoolTest::ne)); | |
94 IfNode* iff = new (C, 2) IfNode( ctrl, bol, PROB_MIN, COUNT_UNKNOWN ); | |
95 transform_later(iff); | |
96 | |
97 // Fast path taken. | |
98 Node *fast_taken = transform_later( new (C, 1) IfFalseNode(iff) ); | |
91 | 99 |
92 // Fast path not-taken, i.e. slow path | 100 // Fast path not-taken, i.e. slow path |
93 Node *slow_taken = transform_later( new (C, 1) IfTrueNode(opt_iff) ); | 101 Node *slow_taken = transform_later( new (C, 1) IfTrueNode(iff) ); |
94 return slow_taken; | 102 |
103 if (return_fast_path) { | |
104 region->init_req(edge, slow_taken); // Capture slow-control | |
105 return fast_taken; | |
106 } else { | |
107 region->init_req(edge, fast_taken); // Capture fast-control | |
108 return slow_taken; | |
109 } | |
95 } | 110 } |
96 | 111 |
97 //--------------------copy_predefined_input_for_runtime_call-------------------- | 112 //--------------------copy_predefined_input_for_runtime_call-------------------- |
98 void PhaseMacroExpand::copy_predefined_input_for_runtime_call(Node * ctrl, CallNode* oldcall, CallNode* call) { | 113 void PhaseMacroExpand::copy_predefined_input_for_runtime_call(Node * ctrl, CallNode* oldcall, CallNode* call) { |
99 // Set fixed predefined input arguments | 114 // Set fixed predefined input arguments |
852 } | 867 } |
853 | 868 |
854 | 869 |
855 Node* PhaseMacroExpand::make_load(Node* ctl, Node* mem, Node* base, int offset, const Type* value_type, BasicType bt) { | 870 Node* PhaseMacroExpand::make_load(Node* ctl, Node* mem, Node* base, int offset, const Type* value_type, BasicType bt) { |
856 Node* adr = basic_plus_adr(base, offset); | 871 Node* adr = basic_plus_adr(base, offset); |
857 const TypePtr* adr_type = TypeRawPtr::BOTTOM; | 872 const TypePtr* adr_type = adr->bottom_type()->is_ptr(); |
858 Node* value = LoadNode::make(_igvn, ctl, mem, adr, adr_type, value_type, bt); | 873 Node* value = LoadNode::make(_igvn, ctl, mem, adr, adr_type, value_type, bt); |
859 transform_later(value); | 874 transform_later(value); |
860 return value; | 875 return value; |
861 } | 876 } |
862 | 877 |
1581 Node* obj = lock->obj_node(); | 1596 Node* obj = lock->obj_node(); |
1582 Node* box = lock->box_node(); | 1597 Node* box = lock->box_node(); |
1583 Node* flock = lock->fastlock_node(); | 1598 Node* flock = lock->fastlock_node(); |
1584 | 1599 |
1585 // Make the merge point | 1600 // Make the merge point |
1586 Node *region = new (C, 3) RegionNode(3); | 1601 Node *region; |
1587 | 1602 Node *mem_phi; |
1588 Node *bol = transform_later(new (C, 2) BoolNode(flock,BoolTest::ne)); | 1603 Node *slow_path; |
1589 Node *iff = new (C, 2) IfNode( ctrl, bol, PROB_MIN, COUNT_UNKNOWN ); | 1604 |
1590 // Optimize test; set region slot 2 | 1605 if (UseOptoBiasInlining) { |
1591 Node *slow_path = opt_iff(region,iff); | 1606 /* |
1607 * See the full descrition in MacroAssembler::biased_locking_enter(). | |
1608 * | |
1609 * if( (mark_word & biased_lock_mask) == biased_lock_pattern ) { | |
1610 * // The object is biased. | |
1611 * proto_node = klass->prototype_header; | |
1612 * o_node = thread | proto_node; | |
1613 * x_node = o_node ^ mark_word; | |
1614 * if( (x_node & ~age_mask) == 0 ) { // Biased to the current thread ? | |
1615 * // Done. | |
1616 * } else { | |
1617 * if( (x_node & biased_lock_mask) != 0 ) { | |
1618 * // The klass's prototype header is no longer biased. | |
1619 * cas(&mark_word, mark_word, proto_node) | |
1620 * goto cas_lock; | |
1621 * } else { | |
1622 * // The klass's prototype header is still biased. | |
1623 * if( (x_node & epoch_mask) != 0 ) { // Expired epoch? | |
1624 * old = mark_word; | |
1625 * new = o_node; | |
1626 * } else { | |
1627 * // Different thread or anonymous biased. | |
1628 * old = mark_word & (epoch_mask | age_mask | biased_lock_mask); | |
1629 * new = thread | old; | |
1630 * } | |
1631 * // Try to rebias. | |
1632 * if( cas(&mark_word, old, new) == 0 ) { | |
1633 * // Done. | |
1634 * } else { | |
1635 * goto slow_path; // Failed. | |
1636 * } | |
1637 * } | |
1638 * } | |
1639 * } else { | |
1640 * // The object is not biased. | |
1641 * cas_lock: | |
1642 * if( FastLock(obj) == 0 ) { | |
1643 * // Done. | |
1644 * } else { | |
1645 * slow_path: | |
1646 * OptoRuntime::complete_monitor_locking_Java(obj); | |
1647 * } | |
1648 * } | |
1649 */ | |
1650 | |
1651 region = new (C, 5) RegionNode(5); | |
1652 // create a Phi for the memory state | |
1653 mem_phi = new (C, 5) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM); | |
1654 | |
1655 Node* fast_lock_region = new (C, 3) RegionNode(3); | |
1656 Node* fast_lock_mem_phi = new (C, 3) PhiNode( fast_lock_region, Type::MEMORY, TypeRawPtr::BOTTOM); | |
1657 | |
1658 // First, check mark word for the biased lock pattern. | |
1659 Node* mark_node = make_load(ctrl, mem, obj, oopDesc::mark_offset_in_bytes(), TypeX_X, TypeX_X->basic_type()); | |
1660 | |
1661 // Get fast path - mark word has the biased lock pattern. | |
1662 ctrl = opt_bits_test(ctrl, fast_lock_region, 1, mark_node, | |
1663 markOopDesc::biased_lock_mask_in_place, | |
1664 markOopDesc::biased_lock_pattern, true); | |
1665 // fast_lock_region->in(1) is set to slow path. | |
1666 fast_lock_mem_phi->init_req(1, mem); | |
1667 | |
1668 // Now check that the lock is biased to the current thread and has | |
1669 // the same epoch and bias as Klass::_prototype_header. | |
1670 | |
1671 // Special-case a fresh allocation to avoid building nodes: | |
1672 Node* klass_node = AllocateNode::Ideal_klass(obj, &_igvn); | |
1673 if (klass_node == NULL) { | |
1674 Node* k_adr = basic_plus_adr(obj, oopDesc::klass_offset_in_bytes()); | |
1675 klass_node = transform_later( LoadKlassNode::make(_igvn, mem, k_adr, _igvn.type(k_adr)->is_ptr()) ); | |
1676 klass_node->init_req(0, ctrl); | |
1677 } | |
1678 Node *proto_node = make_load(ctrl, mem, klass_node, Klass::prototype_header_offset_in_bytes() + sizeof(oopDesc), TypeX_X, TypeX_X->basic_type()); | |
1679 | |
1680 Node* thread = transform_later(new (C, 1) ThreadLocalNode()); | |
1681 Node* cast_thread = transform_later(new (C, 2) CastP2XNode(ctrl, thread)); | |
1682 Node* o_node = transform_later(new (C, 3) OrXNode(cast_thread, proto_node)); | |
1683 Node* x_node = transform_later(new (C, 3) XorXNode(o_node, mark_node)); | |
1684 | |
1685 // Get slow path - mark word does NOT match the value. | |
1686 Node* not_biased_ctrl = opt_bits_test(ctrl, region, 3, x_node, | |
1687 (~markOopDesc::age_mask_in_place), 0); | |
1688 // region->in(3) is set to fast path - the object is biased to the current thread. | |
1689 mem_phi->init_req(3, mem); | |
1690 | |
1691 | |
1692 // Mark word does NOT match the value (thread | Klass::_prototype_header). | |
1693 | |
1694 | |
1695 // First, check biased pattern. | |
1696 // Get fast path - _prototype_header has the same biased lock pattern. | |
1697 ctrl = opt_bits_test(not_biased_ctrl, fast_lock_region, 2, x_node, | |
1698 markOopDesc::biased_lock_mask_in_place, 0, true); | |
1699 | |
1700 not_biased_ctrl = fast_lock_region->in(2); // Slow path | |
1701 // fast_lock_region->in(2) - the prototype header is no longer biased | |
1702 // and we have to revoke the bias on this object. | |
1703 // We are going to try to reset the mark of this object to the prototype | |
1704 // value and fall through to the CAS-based locking scheme. | |
1705 Node* adr = basic_plus_adr(obj, oopDesc::mark_offset_in_bytes()); | |
1706 Node* cas = new (C, 5) StoreXConditionalNode(not_biased_ctrl, mem, adr, | |
1707 proto_node, mark_node); | |
1708 transform_later(cas); | |
1709 Node* proj = transform_later( new (C, 1) SCMemProjNode(cas)); | |
1710 fast_lock_mem_phi->init_req(2, proj); | |
1711 | |
1712 | |
1713 // Second, check epoch bits. | |
1714 Node* rebiased_region = new (C, 3) RegionNode(3); | |
1715 Node* old_phi = new (C, 3) PhiNode( rebiased_region, TypeX_X); | |
1716 Node* new_phi = new (C, 3) PhiNode( rebiased_region, TypeX_X); | |
1717 | |
1718 // Get slow path - mark word does NOT match epoch bits. | |
1719 Node* epoch_ctrl = opt_bits_test(ctrl, rebiased_region, 1, x_node, | |
1720 markOopDesc::epoch_mask_in_place, 0); | |
1721 // The epoch of the current bias is not valid, attempt to rebias the object | |
1722 // toward the current thread. | |
1723 rebiased_region->init_req(2, epoch_ctrl); | |
1724 old_phi->init_req(2, mark_node); | |
1725 new_phi->init_req(2, o_node); | |
1726 | |
1727 // rebiased_region->in(1) is set to fast path. | |
1728 // The epoch of the current bias is still valid but we know | |
1729 // nothing about the owner; it might be set or it might be clear. | |
1730 Node* cmask = MakeConX(markOopDesc::biased_lock_mask_in_place | | |
1731 markOopDesc::age_mask_in_place | | |
1732 markOopDesc::epoch_mask_in_place); | |
1733 Node* old = transform_later(new (C, 3) AndXNode(mark_node, cmask)); | |
1734 cast_thread = transform_later(new (C, 2) CastP2XNode(ctrl, thread)); | |
1735 Node* new_mark = transform_later(new (C, 3) OrXNode(cast_thread, old)); | |
1736 old_phi->init_req(1, old); | |
1737 new_phi->init_req(1, new_mark); | |
1738 | |
1739 transform_later(rebiased_region); | |
1740 transform_later(old_phi); | |
1741 transform_later(new_phi); | |
1742 | |
1743 // Try to acquire the bias of the object using an atomic operation. | |
1744 // If this fails we will go in to the runtime to revoke the object's bias. | |
1745 cas = new (C, 5) StoreXConditionalNode(rebiased_region, mem, adr, | |
1746 new_phi, old_phi); | |
1747 transform_later(cas); | |
1748 proj = transform_later( new (C, 1) SCMemProjNode(cas)); | |
1749 | |
1750 // Get slow path - Failed to CAS. | |
1751 not_biased_ctrl = opt_bits_test(rebiased_region, region, 4, cas, 0, 0); | |
1752 mem_phi->init_req(4, proj); | |
1753 // region->in(4) is set to fast path - the object is rebiased to the current thread. | |
1754 | |
1755 // Failed to CAS. | |
1756 slow_path = new (C, 3) RegionNode(3); | |
1757 Node *slow_mem = new (C, 3) PhiNode( slow_path, Type::MEMORY, TypeRawPtr::BOTTOM); | |
1758 | |
1759 slow_path->init_req(1, not_biased_ctrl); // Capture slow-control | |
1760 slow_mem->init_req(1, proj); | |
1761 | |
1762 // Call CAS-based locking scheme (FastLock node). | |
1763 | |
1764 transform_later(fast_lock_region); | |
1765 transform_later(fast_lock_mem_phi); | |
1766 | |
1767 // Get slow path - FastLock failed to lock the object. | |
1768 ctrl = opt_bits_test(fast_lock_region, region, 2, flock, 0, 0); | |
1769 mem_phi->init_req(2, fast_lock_mem_phi); | |
1770 // region->in(2) is set to fast path - the object is locked to the current thread. | |
1771 | |
1772 slow_path->init_req(2, ctrl); // Capture slow-control | |
1773 slow_mem->init_req(2, fast_lock_mem_phi); | |
1774 | |
1775 transform_later(slow_path); | |
1776 transform_later(slow_mem); | |
1777 // Reset lock's memory edge. | |
1778 lock->set_req(TypeFunc::Memory, slow_mem); | |
1779 | |
1780 } else { | |
1781 region = new (C, 3) RegionNode(3); | |
1782 // create a Phi for the memory state | |
1783 mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM); | |
1784 | |
1785 // Optimize test; set region slot 2 | |
1786 slow_path = opt_bits_test(ctrl, region, 2, flock, 0, 0); | |
1787 mem_phi->init_req(2, mem); | |
1788 } | |
1592 | 1789 |
1593 // Make slow path call | 1790 // Make slow path call |
1594 CallNode *call = make_slow_call( (CallNode *) lock, OptoRuntime::complete_monitor_enter_Type(), OptoRuntime::complete_monitor_locking_Java(), NULL, slow_path, obj, box ); | 1791 CallNode *call = make_slow_call( (CallNode *) lock, OptoRuntime::complete_monitor_enter_Type(), OptoRuntime::complete_monitor_locking_Java(), NULL, slow_path, obj, box ); |
1595 | 1792 |
1596 extract_call_projections(call); | 1793 extract_call_projections(call); |
1612 region->init_req(1, slow_ctrl); | 1809 region->init_req(1, slow_ctrl); |
1613 // region inputs are now complete | 1810 // region inputs are now complete |
1614 transform_later(region); | 1811 transform_later(region); |
1615 _igvn.subsume_node(_fallthroughproj, region); | 1812 _igvn.subsume_node(_fallthroughproj, region); |
1616 | 1813 |
1617 // create a Phi for the memory state | 1814 Node *memproj = transform_later( new(C, 1) ProjNode(call, TypeFunc::Memory) ); |
1618 Node *mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM); | |
1619 Node *memproj = transform_later( new (C, 1) ProjNode(call, TypeFunc::Memory) ); | |
1620 mem_phi->init_req(1, memproj ); | 1815 mem_phi->init_req(1, memproj ); |
1621 mem_phi->init_req(2, mem); | |
1622 transform_later(mem_phi); | 1816 transform_later(mem_phi); |
1623 _igvn.hash_delete(_memproj_fallthrough); | 1817 _igvn.hash_delete(_memproj_fallthrough); |
1624 _igvn.subsume_node(_memproj_fallthrough, mem_phi); | 1818 _igvn.subsume_node(_memproj_fallthrough, mem_phi); |
1625 | |
1626 | |
1627 } | 1819 } |
1628 | 1820 |
1629 //------------------------------expand_unlock_node---------------------- | 1821 //------------------------------expand_unlock_node---------------------- |
1630 void PhaseMacroExpand::expand_unlock_node(UnlockNode *unlock) { | 1822 void PhaseMacroExpand::expand_unlock_node(UnlockNode *unlock) { |
1631 | 1823 |
1635 Node* box = unlock->box_node(); | 1827 Node* box = unlock->box_node(); |
1636 | 1828 |
1637 // No need for a null check on unlock | 1829 // No need for a null check on unlock |
1638 | 1830 |
1639 // Make the merge point | 1831 // Make the merge point |
1640 RegionNode *region = new (C, 3) RegionNode(3); | 1832 Node *region; |
1833 Node *mem_phi; | |
1834 | |
1835 if (UseOptoBiasInlining) { | |
1836 // Check for biased locking unlock case, which is a no-op. | |
1837 // See the full descrition in MacroAssembler::biased_locking_exit(). | |
1838 region = new (C, 4) RegionNode(4); | |
1839 // create a Phi for the memory state | |
1840 mem_phi = new (C, 4) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM); | |
1841 mem_phi->init_req(3, mem); | |
1842 | |
1843 Node* mark_node = make_load(ctrl, mem, obj, oopDesc::mark_offset_in_bytes(), TypeX_X, TypeX_X->basic_type()); | |
1844 ctrl = opt_bits_test(ctrl, region, 3, mark_node, | |
1845 markOopDesc::biased_lock_mask_in_place, | |
1846 markOopDesc::biased_lock_pattern); | |
1847 } else { | |
1848 region = new (C, 3) RegionNode(3); | |
1849 // create a Phi for the memory state | |
1850 mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM); | |
1851 } | |
1641 | 1852 |
1642 FastUnlockNode *funlock = new (C, 3) FastUnlockNode( ctrl, obj, box ); | 1853 FastUnlockNode *funlock = new (C, 3) FastUnlockNode( ctrl, obj, box ); |
1643 funlock = transform_later( funlock )->as_FastUnlock(); | 1854 funlock = transform_later( funlock )->as_FastUnlock(); |
1644 Node *bol = transform_later(new (C, 2) BoolNode(funlock,BoolTest::ne)); | |
1645 Node *iff = new (C, 2) IfNode( ctrl, bol, PROB_MIN, COUNT_UNKNOWN ); | |
1646 // Optimize test; set region slot 2 | 1855 // Optimize test; set region slot 2 |
1647 Node *slow_path = opt_iff(region,iff); | 1856 Node *slow_path = opt_bits_test(ctrl, region, 2, funlock, 0, 0); |
1648 | 1857 |
1649 CallNode *call = make_slow_call( (CallNode *) unlock, OptoRuntime::complete_monitor_exit_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), "complete_monitor_unlocking_C", slow_path, obj, box ); | 1858 CallNode *call = make_slow_call( (CallNode *) unlock, OptoRuntime::complete_monitor_exit_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), "complete_monitor_unlocking_C", slow_path, obj, box ); |
1650 | 1859 |
1651 extract_call_projections(call); | 1860 extract_call_projections(call); |
1652 | 1861 |
1664 region->init_req(1, slow_ctrl); | 1873 region->init_req(1, slow_ctrl); |
1665 // region inputs are now complete | 1874 // region inputs are now complete |
1666 transform_later(region); | 1875 transform_later(region); |
1667 _igvn.subsume_node(_fallthroughproj, region); | 1876 _igvn.subsume_node(_fallthroughproj, region); |
1668 | 1877 |
1669 // create a Phi for the memory state | |
1670 Node *mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM); | |
1671 Node *memproj = transform_later( new(C, 1) ProjNode(call, TypeFunc::Memory) ); | 1878 Node *memproj = transform_later( new(C, 1) ProjNode(call, TypeFunc::Memory) ); |
1672 mem_phi->init_req(1, memproj ); | 1879 mem_phi->init_req(1, memproj ); |
1673 mem_phi->init_req(2, mem); | 1880 mem_phi->init_req(2, mem); |
1674 transform_later(mem_phi); | 1881 transform_later(mem_phi); |
1675 _igvn.hash_delete(_memproj_fallthrough); | 1882 _igvn.hash_delete(_memproj_fallthrough); |
1676 _igvn.subsume_node(_memproj_fallthrough, mem_phi); | 1883 _igvn.subsume_node(_memproj_fallthrough, mem_phi); |
1677 | |
1678 | |
1679 } | 1884 } |
1680 | 1885 |
1681 //------------------------------expand_macro_nodes---------------------- | 1886 //------------------------------expand_macro_nodes---------------------- |
1682 // Returns true if a failure occurred. | 1887 // Returns true if a failure occurred. |
1683 bool PhaseMacroExpand::expand_macro_nodes() { | 1888 bool PhaseMacroExpand::expand_macro_nodes() { |