Mercurial > hg > truffle
comparison src/cpu/ppc/vm/templateTable_ppc_64.cpp @ 20418:b384ba33c9a0
8050942: PPC64: implement template interpreter for ppc64le
Reviewed-by: kvn, goetz
Contributed-by: asmundak@google.com
author | kvn |
---|---|
date | Thu, 17 Jul 2014 15:40:04 -0700 |
parents | 63c5920a038d |
children | f6bde7889409 |
comparison
equal
deleted
inserted
replaced
20416:99f0593d8c9f | 20418:b384ba33c9a0 |
---|---|
186 // calls out to InterpreterRuntime::resolve_get_put to do | 186 // calls out to InterpreterRuntime::resolve_get_put to do |
187 // additional, required work. | 187 // additional, required work. |
188 assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); | 188 assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); |
189 assert(load_bc_into_bc_reg, "we use bc_reg as temp"); | 189 assert(load_bc_into_bc_reg, "we use bc_reg as temp"); |
190 __ get_cache_and_index_at_bcp(Rtemp /* dst = cache */, 1); | 190 __ get_cache_and_index_at_bcp(Rtemp /* dst = cache */, 1); |
191 // Big Endian: ((*(cache+indices))>>((1+byte_no)*8))&0xFF | 191 // ((*(cache+indices))>>((1+byte_no)*8))&0xFF: |
192 #if defined(VM_LITTLE_ENDIAN) | |
193 __ lbz(Rnew_bc, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 1 + byte_no, Rtemp); | |
194 #else | |
192 __ lbz(Rnew_bc, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 7 - (1 + byte_no), Rtemp); | 195 __ lbz(Rnew_bc, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 7 - (1 + byte_no), Rtemp); |
196 #endif | |
193 __ cmpwi(CCR0, Rnew_bc, 0); | 197 __ cmpwi(CCR0, Rnew_bc, 0); |
194 __ li(Rnew_bc, (unsigned int)(unsigned char)new_bc); | 198 __ li(Rnew_bc, (unsigned int)(unsigned char)new_bc); |
195 __ beq(CCR0, L_patch_done); | 199 __ beq(CCR0, L_patch_done); |
196 // __ isync(); // acquire not needed | 200 // __ isync(); // acquire not needed |
197 break; | 201 break; |
1836 // Align bcp. | 1840 // Align bcp. |
1837 __ addi(Rdef_offset_addr, R14_bcp, BytesPerInt); | 1841 __ addi(Rdef_offset_addr, R14_bcp, BytesPerInt); |
1838 __ clrrdi(Rdef_offset_addr, Rdef_offset_addr, log2_long((jlong)BytesPerInt)); | 1842 __ clrrdi(Rdef_offset_addr, Rdef_offset_addr, log2_long((jlong)BytesPerInt)); |
1839 | 1843 |
1840 // Load lo & hi. | 1844 // Load lo & hi. |
1841 __ lwz(Rlow_byte, BytesPerInt, Rdef_offset_addr); | 1845 __ get_u4(Rlow_byte, Rdef_offset_addr, BytesPerInt, InterpreterMacroAssembler::Unsigned); |
1842 __ lwz(Rhigh_byte, BytesPerInt * 2, Rdef_offset_addr); | 1846 __ get_u4(Rhigh_byte, Rdef_offset_addr, 2 *BytesPerInt, InterpreterMacroAssembler::Unsigned); |
1843 | 1847 |
1844 // Check for default case (=index outside [low,high]). | 1848 // Check for default case (=index outside [low,high]). |
1845 __ cmpw(CCR0, R17_tos, Rlow_byte); | 1849 __ cmpw(CCR0, R17_tos, Rlow_byte); |
1846 __ cmpw(CCR1, R17_tos, Rhigh_byte); | 1850 __ cmpw(CCR1, R17_tos, Rhigh_byte); |
1847 __ blt(CCR0, Ldefault_case); | 1851 __ blt(CCR0, Ldefault_case); |
1851 __ sub(Rindex, R17_tos, Rlow_byte); | 1855 __ sub(Rindex, R17_tos, Rlow_byte); |
1852 __ extsw(Rindex, Rindex); | 1856 __ extsw(Rindex, Rindex); |
1853 __ profile_switch_case(Rindex, Rhigh_byte /* scratch */, Rscratch1, Rscratch2); | 1857 __ profile_switch_case(Rindex, Rhigh_byte /* scratch */, Rscratch1, Rscratch2); |
1854 __ sldi(Rindex, Rindex, LogBytesPerInt); | 1858 __ sldi(Rindex, Rindex, LogBytesPerInt); |
1855 __ addi(Rindex, Rindex, 3 * BytesPerInt); | 1859 __ addi(Rindex, Rindex, 3 * BytesPerInt); |
1860 #if defined(VM_LITTLE_ENDIAN) | |
1861 __ lwbrx(Roffset, Rdef_offset_addr, Rindex); | |
1862 __ extsw(Roffset, Roffset); | |
1863 #else | |
1856 __ lwax(Roffset, Rdef_offset_addr, Rindex); | 1864 __ lwax(Roffset, Rdef_offset_addr, Rindex); |
1865 #endif | |
1857 __ b(Ldispatch); | 1866 __ b(Ldispatch); |
1858 | 1867 |
1859 __ bind(Ldefault_case); | 1868 __ bind(Ldefault_case); |
1860 __ profile_switch_default(Rhigh_byte, Rscratch1); | 1869 __ profile_switch_default(Rhigh_byte, Rscratch1); |
1861 __ lwa(Roffset, 0, Rdef_offset_addr); | 1870 __ get_u4(Roffset, Rdef_offset_addr, 0, InterpreterMacroAssembler::Signed); |
1862 | 1871 |
1863 __ bind(Ldispatch); | 1872 __ bind(Ldispatch); |
1864 | 1873 |
1865 __ add(R14_bcp, Roffset, R14_bcp); | 1874 __ add(R14_bcp, Roffset, R14_bcp); |
1866 __ dispatch_next(vtos); | 1875 __ dispatch_next(vtos); |
1872 } | 1881 } |
1873 | 1882 |
1874 // Table switch using linear search through cases. | 1883 // Table switch using linear search through cases. |
1875 // Bytecode stream format: | 1884 // Bytecode stream format: |
1876 // Bytecode (1) | 4-byte padding | default offset (4) | count (4) | value/offset pair1 (8) | value/offset pair2 (8) | ... | 1885 // Bytecode (1) | 4-byte padding | default offset (4) | count (4) | value/offset pair1 (8) | value/offset pair2 (8) | ... |
1877 // Note: Everything is big-endian format here. So on little endian machines, we have to revers offset and count and cmp value. | 1886 // Note: Everything is big-endian format here. |
1878 void TemplateTable::fast_linearswitch() { | 1887 void TemplateTable::fast_linearswitch() { |
1879 transition(itos, vtos); | 1888 transition(itos, vtos); |
1880 | 1889 |
1881 Label Lloop_entry, Lsearch_loop, Lfound, Lcontinue_execution, Ldefault_case; | 1890 Label Lloop_entry, Lsearch_loop, Lcontinue_execution, Ldefault_case; |
1882 | |
1883 Register Rcount = R3_ARG1, | 1891 Register Rcount = R3_ARG1, |
1884 Rcurrent_pair = R4_ARG2, | 1892 Rcurrent_pair = R4_ARG2, |
1885 Rdef_offset_addr = R5_ARG3, // Is going to contain address of default offset. | 1893 Rdef_offset_addr = R5_ARG3, // Is going to contain address of default offset. |
1886 Roffset = R31, // Might need to survive C call. | 1894 Roffset = R31, // Might need to survive C call. |
1887 Rvalue = R12_scratch2, | 1895 Rvalue = R12_scratch2, |
1891 // Align bcp. | 1899 // Align bcp. |
1892 __ addi(Rdef_offset_addr, R14_bcp, BytesPerInt); | 1900 __ addi(Rdef_offset_addr, R14_bcp, BytesPerInt); |
1893 __ clrrdi(Rdef_offset_addr, Rdef_offset_addr, log2_long((jlong)BytesPerInt)); | 1901 __ clrrdi(Rdef_offset_addr, Rdef_offset_addr, log2_long((jlong)BytesPerInt)); |
1894 | 1902 |
1895 // Setup loop counter and limit. | 1903 // Setup loop counter and limit. |
1896 __ lwz(Rcount, BytesPerInt, Rdef_offset_addr); // Load count. | 1904 __ get_u4(Rcount, Rdef_offset_addr, BytesPerInt, InterpreterMacroAssembler::Unsigned); |
1897 __ addi(Rcurrent_pair, Rdef_offset_addr, 2 * BytesPerInt); // Rcurrent_pair now points to first pair. | 1905 __ addi(Rcurrent_pair, Rdef_offset_addr, 2 * BytesPerInt); // Rcurrent_pair now points to first pair. |
1898 | 1906 |
1899 // Set up search loop. | 1907 __ mtctr(Rcount); |
1900 __ cmpwi(CCR0, Rcount, 0); | 1908 __ cmpwi(CCR0, Rcount, 0); |
1901 __ beq(CCR0, Ldefault_case); | 1909 __ bne(CCR0, Lloop_entry); |
1902 | 1910 |
1903 __ mtctr(Rcount); | 1911 // Default case |
1904 | |
1905 // linear table search | |
1906 __ bind(Lsearch_loop); | |
1907 | |
1908 __ lwz(Rvalue, 0, Rcurrent_pair); | |
1909 __ lwa(Roffset, 1 * BytesPerInt, Rcurrent_pair); | |
1910 | |
1911 __ cmpw(CCR0, Rvalue, Rcmp_value); | |
1912 __ beq(CCR0, Lfound); | |
1913 | |
1914 __ addi(Rcurrent_pair, Rcurrent_pair, 2 * BytesPerInt); | |
1915 __ bdnz(Lsearch_loop); | |
1916 | |
1917 // default case | |
1918 __ bind(Ldefault_case); | 1912 __ bind(Ldefault_case); |
1919 | 1913 __ get_u4(Roffset, Rdef_offset_addr, 0, InterpreterMacroAssembler::Signed); |
1920 __ lwa(Roffset, 0, Rdef_offset_addr); | |
1921 if (ProfileInterpreter) { | 1914 if (ProfileInterpreter) { |
1922 __ profile_switch_default(Rdef_offset_addr, Rcount/* scratch */); | 1915 __ profile_switch_default(Rdef_offset_addr, Rcount/* scratch */); |
1923 __ b(Lcontinue_execution); | 1916 } |
1924 } | 1917 __ b(Lcontinue_execution); |
1925 | 1918 |
1926 // Entry found, skip Roffset bytecodes and continue. | 1919 // Next iteration |
1927 __ bind(Lfound); | 1920 __ bind(Lsearch_loop); |
1921 __ bdz(Ldefault_case); | |
1922 __ addi(Rcurrent_pair, Rcurrent_pair, 2 * BytesPerInt); | |
1923 __ bind(Lloop_entry); | |
1924 __ get_u4(Rvalue, Rcurrent_pair, 0, InterpreterMacroAssembler::Unsigned); | |
1925 __ cmpw(CCR0, Rvalue, Rcmp_value); | |
1926 __ bne(CCR0, Lsearch_loop); | |
1927 | |
1928 // Found, load offset. | |
1929 __ get_u4(Roffset, Rcurrent_pair, BytesPerInt, InterpreterMacroAssembler::Signed); | |
1930 // Calculate case index and profile | |
1931 __ mfctr(Rcurrent_pair); | |
1928 if (ProfileInterpreter) { | 1932 if (ProfileInterpreter) { |
1929 // Calc the num of the pair we hit. Careful, Rcurrent_pair points 2 ints | 1933 __ sub(Rcurrent_pair, Rcount, Rcurrent_pair); |
1930 // beyond the actual current pair due to the auto update load above! | |
1931 __ sub(Rcurrent_pair, Rcurrent_pair, Rdef_offset_addr); | |
1932 __ addi(Rcurrent_pair, Rcurrent_pair, - 2 * BytesPerInt); | |
1933 __ srdi(Rcurrent_pair, Rcurrent_pair, LogBytesPerInt + 1); | |
1934 __ profile_switch_case(Rcurrent_pair, Rcount /*scratch*/, Rdef_offset_addr/*scratch*/, Rscratch); | 1934 __ profile_switch_case(Rcurrent_pair, Rcount /*scratch*/, Rdef_offset_addr/*scratch*/, Rscratch); |
1935 __ bind(Lcontinue_execution); | 1935 } |
1936 } | 1936 |
1937 __ bind(Lcontinue_execution); | |
1937 __ add(R14_bcp, Roffset, R14_bcp); | 1938 __ add(R14_bcp, Roffset, R14_bcp); |
1938 __ dispatch_next(vtos); | 1939 __ dispatch_next(vtos); |
1939 } | 1940 } |
1940 | 1941 |
1941 // Table switch using binary search (value/offset pairs are ordered). | 1942 // Table switch using binary search (value/offset pairs are ordered). |
1987 __ addi(Rarray, R14_bcp, 3 * BytesPerInt); | 1988 __ addi(Rarray, R14_bcp, 3 * BytesPerInt); |
1988 __ clrrdi(Rarray, Rarray, log2_long((jlong)BytesPerInt)); | 1989 __ clrrdi(Rarray, Rarray, log2_long((jlong)BytesPerInt)); |
1989 | 1990 |
1990 // initialize i & j | 1991 // initialize i & j |
1991 __ li(Ri,0); | 1992 __ li(Ri,0); |
1992 __ lwz(Rj, -BytesPerInt, Rarray); | 1993 __ get_u4(Rj, Rarray, -BytesPerInt, InterpreterMacroAssembler::Unsigned); |
1993 | 1994 |
1994 // and start. | 1995 // and start. |
1995 Label entry; | 1996 Label entry; |
1996 __ b(entry); | 1997 __ b(entry); |
1997 | 1998 |
2004 // j = h; | 2005 // j = h; |
2005 // } else { | 2006 // } else { |
2006 // i = h; | 2007 // i = h; |
2007 // } | 2008 // } |
2008 __ sldi(Rscratch, Rh, log_entry_size); | 2009 __ sldi(Rscratch, Rh, log_entry_size); |
2010 #if defined(VM_LITTLE_ENDIAN) | |
2011 __ lwbrx(Rscratch, Rscratch, Rarray); | |
2012 #else | |
2009 __ lwzx(Rscratch, Rscratch, Rarray); | 2013 __ lwzx(Rscratch, Rscratch, Rarray); |
2014 #endif | |
2010 | 2015 |
2011 // if (key < current value) | 2016 // if (key < current value) |
2012 // Rh = Rj | 2017 // Rh = Rj |
2013 // else | 2018 // else |
2014 // Rh = Ri | 2019 // Rh = Ri |
2036 __ mr(Rh, Ri); // Save index in i for profiling. | 2041 __ mr(Rh, Ri); // Save index in i for profiling. |
2037 } | 2042 } |
2038 // Ri = value offset | 2043 // Ri = value offset |
2039 __ sldi(Ri, Ri, log_entry_size); | 2044 __ sldi(Ri, Ri, log_entry_size); |
2040 __ add(Ri, Ri, Rarray); | 2045 __ add(Ri, Ri, Rarray); |
2041 __ lwz(Rscratch, 0, Ri); | 2046 __ get_u4(Rscratch, Ri, 0, InterpreterMacroAssembler::Unsigned); |
2042 | 2047 |
2043 Label not_found; | 2048 Label not_found; |
2044 // Ri = offset offset | 2049 // Ri = offset offset |
2045 __ cmpw(CCR0, Rkey, Rscratch); | 2050 __ cmpw(CCR0, Rkey, Rscratch); |
2046 __ beq(CCR0, not_found); | 2051 __ beq(CCR0, not_found); |
2047 // entry not found -> j = default offset | 2052 // entry not found -> j = default offset |
2048 __ lwz(Rj, -2 * BytesPerInt, Rarray); | 2053 __ get_u4(Rj, Rarray, -2 * BytesPerInt, InterpreterMacroAssembler::Unsigned); |
2049 __ b(default_case); | 2054 __ b(default_case); |
2050 | 2055 |
2051 __ bind(not_found); | 2056 __ bind(not_found); |
2052 // entry found -> j = offset | 2057 // entry found -> j = offset |
2053 __ profile_switch_case(Rh, Rj, Rscratch, Rkey); | 2058 __ profile_switch_case(Rh, Rj, Rscratch, Rkey); |
2054 __ lwz(Rj, BytesPerInt, Ri); | 2059 __ get_u4(Rj, Ri, BytesPerInt, InterpreterMacroAssembler::Unsigned); |
2055 | 2060 |
2056 if (ProfileInterpreter) { | 2061 if (ProfileInterpreter) { |
2057 __ b(continue_execution); | 2062 __ b(continue_execution); |
2058 } | 2063 } |
2059 | 2064 |
2144 __ get_cache_and_index_at_bcp(Rcache, 1, index_size); | 2149 __ get_cache_and_index_at_bcp(Rcache, 1, index_size); |
2145 Label Lresolved, Ldone; | 2150 Label Lresolved, Ldone; |
2146 | 2151 |
2147 assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); | 2152 assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); |
2148 // We are resolved if the indices offset contains the current bytecode. | 2153 // We are resolved if the indices offset contains the current bytecode. |
2149 // Big Endian: | 2154 #if defined(VM_LITTLE_ENDIAN) |
2155 __ lbz(Rscratch, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + byte_no + 1, Rcache); | |
2156 #else | |
2150 __ lbz(Rscratch, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 7 - (byte_no + 1), Rcache); | 2157 __ lbz(Rscratch, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 7 - (byte_no + 1), Rcache); |
2158 #endif | |
2151 // Acquire by cmp-br-isync (see below). | 2159 // Acquire by cmp-br-isync (see below). |
2152 __ cmpdi(CCR0, Rscratch, (int)bytecode()); | 2160 __ cmpdi(CCR0, Rscratch, (int)bytecode()); |
2153 __ beq(CCR0, Lresolved); | 2161 __ beq(CCR0, Lresolved); |
2154 | 2162 |
2155 address entry = NULL; | 2163 address entry = NULL; |