comparison src/cpu/ppc/vm/templateTable_ppc_64.cpp @ 20418:b384ba33c9a0

8050942: PPC64: implement template interpreter for ppc64le Reviewed-by: kvn, goetz Contributed-by: asmundak@google.com
author kvn
date Thu, 17 Jul 2014 15:40:04 -0700
parents 63c5920a038d
children f6bde7889409
comparison
equal deleted inserted replaced
20416:99f0593d8c9f 20418:b384ba33c9a0
186 // calls out to InterpreterRuntime::resolve_get_put to do 186 // calls out to InterpreterRuntime::resolve_get_put to do
187 // additional, required work. 187 // additional, required work.
188 assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); 188 assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
189 assert(load_bc_into_bc_reg, "we use bc_reg as temp"); 189 assert(load_bc_into_bc_reg, "we use bc_reg as temp");
190 __ get_cache_and_index_at_bcp(Rtemp /* dst = cache */, 1); 190 __ get_cache_and_index_at_bcp(Rtemp /* dst = cache */, 1);
191 // Big Endian: ((*(cache+indices))>>((1+byte_no)*8))&0xFF 191 // ((*(cache+indices))>>((1+byte_no)*8))&0xFF:
192 #if defined(VM_LITTLE_ENDIAN)
193 __ lbz(Rnew_bc, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 1 + byte_no, Rtemp);
194 #else
192 __ lbz(Rnew_bc, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 7 - (1 + byte_no), Rtemp); 195 __ lbz(Rnew_bc, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 7 - (1 + byte_no), Rtemp);
196 #endif
193 __ cmpwi(CCR0, Rnew_bc, 0); 197 __ cmpwi(CCR0, Rnew_bc, 0);
194 __ li(Rnew_bc, (unsigned int)(unsigned char)new_bc); 198 __ li(Rnew_bc, (unsigned int)(unsigned char)new_bc);
195 __ beq(CCR0, L_patch_done); 199 __ beq(CCR0, L_patch_done);
196 // __ isync(); // acquire not needed 200 // __ isync(); // acquire not needed
197 break; 201 break;
1836 // Align bcp. 1840 // Align bcp.
1837 __ addi(Rdef_offset_addr, R14_bcp, BytesPerInt); 1841 __ addi(Rdef_offset_addr, R14_bcp, BytesPerInt);
1838 __ clrrdi(Rdef_offset_addr, Rdef_offset_addr, log2_long((jlong)BytesPerInt)); 1842 __ clrrdi(Rdef_offset_addr, Rdef_offset_addr, log2_long((jlong)BytesPerInt));
1839 1843
1840 // Load lo & hi. 1844 // Load lo & hi.
1841 __ lwz(Rlow_byte, BytesPerInt, Rdef_offset_addr); 1845 __ get_u4(Rlow_byte, Rdef_offset_addr, BytesPerInt, InterpreterMacroAssembler::Unsigned);
1842 __ lwz(Rhigh_byte, BytesPerInt * 2, Rdef_offset_addr); 1846 __ get_u4(Rhigh_byte, Rdef_offset_addr, 2 *BytesPerInt, InterpreterMacroAssembler::Unsigned);
1843 1847
1844 // Check for default case (=index outside [low,high]). 1848 // Check for default case (=index outside [low,high]).
1845 __ cmpw(CCR0, R17_tos, Rlow_byte); 1849 __ cmpw(CCR0, R17_tos, Rlow_byte);
1846 __ cmpw(CCR1, R17_tos, Rhigh_byte); 1850 __ cmpw(CCR1, R17_tos, Rhigh_byte);
1847 __ blt(CCR0, Ldefault_case); 1851 __ blt(CCR0, Ldefault_case);
1851 __ sub(Rindex, R17_tos, Rlow_byte); 1855 __ sub(Rindex, R17_tos, Rlow_byte);
1852 __ extsw(Rindex, Rindex); 1856 __ extsw(Rindex, Rindex);
1853 __ profile_switch_case(Rindex, Rhigh_byte /* scratch */, Rscratch1, Rscratch2); 1857 __ profile_switch_case(Rindex, Rhigh_byte /* scratch */, Rscratch1, Rscratch2);
1854 __ sldi(Rindex, Rindex, LogBytesPerInt); 1858 __ sldi(Rindex, Rindex, LogBytesPerInt);
1855 __ addi(Rindex, Rindex, 3 * BytesPerInt); 1859 __ addi(Rindex, Rindex, 3 * BytesPerInt);
1860 #if defined(VM_LITTLE_ENDIAN)
1861 __ lwbrx(Roffset, Rdef_offset_addr, Rindex);
1862 __ extsw(Roffset, Roffset);
1863 #else
1856 __ lwax(Roffset, Rdef_offset_addr, Rindex); 1864 __ lwax(Roffset, Rdef_offset_addr, Rindex);
1865 #endif
1857 __ b(Ldispatch); 1866 __ b(Ldispatch);
1858 1867
1859 __ bind(Ldefault_case); 1868 __ bind(Ldefault_case);
1860 __ profile_switch_default(Rhigh_byte, Rscratch1); 1869 __ profile_switch_default(Rhigh_byte, Rscratch1);
1861 __ lwa(Roffset, 0, Rdef_offset_addr); 1870 __ get_u4(Roffset, Rdef_offset_addr, 0, InterpreterMacroAssembler::Signed);
1862 1871
1863 __ bind(Ldispatch); 1872 __ bind(Ldispatch);
1864 1873
1865 __ add(R14_bcp, Roffset, R14_bcp); 1874 __ add(R14_bcp, Roffset, R14_bcp);
1866 __ dispatch_next(vtos); 1875 __ dispatch_next(vtos);
1872 } 1881 }
1873 1882
1874 // Table switch using linear search through cases. 1883 // Table switch using linear search through cases.
1875 // Bytecode stream format: 1884 // Bytecode stream format:
1876 // Bytecode (1) | 4-byte padding | default offset (4) | count (4) | value/offset pair1 (8) | value/offset pair2 (8) | ... 1885 // Bytecode (1) | 4-byte padding | default offset (4) | count (4) | value/offset pair1 (8) | value/offset pair2 (8) | ...
1877 // Note: Everything is big-endian format here. So on little endian machines, we have to revers offset and count and cmp value. 1886 // Note: Everything is big-endian format here.
1878 void TemplateTable::fast_linearswitch() { 1887 void TemplateTable::fast_linearswitch() {
1879 transition(itos, vtos); 1888 transition(itos, vtos);
1880 1889
1881 Label Lloop_entry, Lsearch_loop, Lfound, Lcontinue_execution, Ldefault_case; 1890 Label Lloop_entry, Lsearch_loop, Lcontinue_execution, Ldefault_case;
1882
1883 Register Rcount = R3_ARG1, 1891 Register Rcount = R3_ARG1,
1884 Rcurrent_pair = R4_ARG2, 1892 Rcurrent_pair = R4_ARG2,
1885 Rdef_offset_addr = R5_ARG3, // Is going to contain address of default offset. 1893 Rdef_offset_addr = R5_ARG3, // Is going to contain address of default offset.
1886 Roffset = R31, // Might need to survive C call. 1894 Roffset = R31, // Might need to survive C call.
1887 Rvalue = R12_scratch2, 1895 Rvalue = R12_scratch2,
1891 // Align bcp. 1899 // Align bcp.
1892 __ addi(Rdef_offset_addr, R14_bcp, BytesPerInt); 1900 __ addi(Rdef_offset_addr, R14_bcp, BytesPerInt);
1893 __ clrrdi(Rdef_offset_addr, Rdef_offset_addr, log2_long((jlong)BytesPerInt)); 1901 __ clrrdi(Rdef_offset_addr, Rdef_offset_addr, log2_long((jlong)BytesPerInt));
1894 1902
1895 // Setup loop counter and limit. 1903 // Setup loop counter and limit.
1896 __ lwz(Rcount, BytesPerInt, Rdef_offset_addr); // Load count. 1904 __ get_u4(Rcount, Rdef_offset_addr, BytesPerInt, InterpreterMacroAssembler::Unsigned);
1897 __ addi(Rcurrent_pair, Rdef_offset_addr, 2 * BytesPerInt); // Rcurrent_pair now points to first pair. 1905 __ addi(Rcurrent_pair, Rdef_offset_addr, 2 * BytesPerInt); // Rcurrent_pair now points to first pair.
1898 1906
1899 // Set up search loop. 1907 __ mtctr(Rcount);
1900 __ cmpwi(CCR0, Rcount, 0); 1908 __ cmpwi(CCR0, Rcount, 0);
1901 __ beq(CCR0, Ldefault_case); 1909 __ bne(CCR0, Lloop_entry);
1902 1910
1903 __ mtctr(Rcount); 1911 // Default case
1904
1905 // linear table search
1906 __ bind(Lsearch_loop);
1907
1908 __ lwz(Rvalue, 0, Rcurrent_pair);
1909 __ lwa(Roffset, 1 * BytesPerInt, Rcurrent_pair);
1910
1911 __ cmpw(CCR0, Rvalue, Rcmp_value);
1912 __ beq(CCR0, Lfound);
1913
1914 __ addi(Rcurrent_pair, Rcurrent_pair, 2 * BytesPerInt);
1915 __ bdnz(Lsearch_loop);
1916
1917 // default case
1918 __ bind(Ldefault_case); 1912 __ bind(Ldefault_case);
1919 1913 __ get_u4(Roffset, Rdef_offset_addr, 0, InterpreterMacroAssembler::Signed);
1920 __ lwa(Roffset, 0, Rdef_offset_addr);
1921 if (ProfileInterpreter) { 1914 if (ProfileInterpreter) {
1922 __ profile_switch_default(Rdef_offset_addr, Rcount/* scratch */); 1915 __ profile_switch_default(Rdef_offset_addr, Rcount/* scratch */);
1923 __ b(Lcontinue_execution); 1916 }
1924 } 1917 __ b(Lcontinue_execution);
1925 1918
1926 // Entry found, skip Roffset bytecodes and continue. 1919 // Next iteration
1927 __ bind(Lfound); 1920 __ bind(Lsearch_loop);
1921 __ bdz(Ldefault_case);
1922 __ addi(Rcurrent_pair, Rcurrent_pair, 2 * BytesPerInt);
1923 __ bind(Lloop_entry);
1924 __ get_u4(Rvalue, Rcurrent_pair, 0, InterpreterMacroAssembler::Unsigned);
1925 __ cmpw(CCR0, Rvalue, Rcmp_value);
1926 __ bne(CCR0, Lsearch_loop);
1927
1928 // Found, load offset.
1929 __ get_u4(Roffset, Rcurrent_pair, BytesPerInt, InterpreterMacroAssembler::Signed);
1930 // Calculate case index and profile
1931 __ mfctr(Rcurrent_pair);
1928 if (ProfileInterpreter) { 1932 if (ProfileInterpreter) {
1929 // Calc the num of the pair we hit. Careful, Rcurrent_pair points 2 ints 1933 __ sub(Rcurrent_pair, Rcount, Rcurrent_pair);
1930 // beyond the actual current pair due to the auto update load above!
1931 __ sub(Rcurrent_pair, Rcurrent_pair, Rdef_offset_addr);
1932 __ addi(Rcurrent_pair, Rcurrent_pair, - 2 * BytesPerInt);
1933 __ srdi(Rcurrent_pair, Rcurrent_pair, LogBytesPerInt + 1);
1934 __ profile_switch_case(Rcurrent_pair, Rcount /*scratch*/, Rdef_offset_addr/*scratch*/, Rscratch); 1934 __ profile_switch_case(Rcurrent_pair, Rcount /*scratch*/, Rdef_offset_addr/*scratch*/, Rscratch);
1935 __ bind(Lcontinue_execution); 1935 }
1936 } 1936
1937 __ bind(Lcontinue_execution);
1937 __ add(R14_bcp, Roffset, R14_bcp); 1938 __ add(R14_bcp, Roffset, R14_bcp);
1938 __ dispatch_next(vtos); 1939 __ dispatch_next(vtos);
1939 } 1940 }
1940 1941
1941 // Table switch using binary search (value/offset pairs are ordered). 1942 // Table switch using binary search (value/offset pairs are ordered).
1987 __ addi(Rarray, R14_bcp, 3 * BytesPerInt); 1988 __ addi(Rarray, R14_bcp, 3 * BytesPerInt);
1988 __ clrrdi(Rarray, Rarray, log2_long((jlong)BytesPerInt)); 1989 __ clrrdi(Rarray, Rarray, log2_long((jlong)BytesPerInt));
1989 1990
1990 // initialize i & j 1991 // initialize i & j
1991 __ li(Ri,0); 1992 __ li(Ri,0);
1992 __ lwz(Rj, -BytesPerInt, Rarray); 1993 __ get_u4(Rj, Rarray, -BytesPerInt, InterpreterMacroAssembler::Unsigned);
1993 1994
1994 // and start. 1995 // and start.
1995 Label entry; 1996 Label entry;
1996 __ b(entry); 1997 __ b(entry);
1997 1998
2004 // j = h; 2005 // j = h;
2005 // } else { 2006 // } else {
2006 // i = h; 2007 // i = h;
2007 // } 2008 // }
2008 __ sldi(Rscratch, Rh, log_entry_size); 2009 __ sldi(Rscratch, Rh, log_entry_size);
2010 #if defined(VM_LITTLE_ENDIAN)
2011 __ lwbrx(Rscratch, Rscratch, Rarray);
2012 #else
2009 __ lwzx(Rscratch, Rscratch, Rarray); 2013 __ lwzx(Rscratch, Rscratch, Rarray);
2014 #endif
2010 2015
2011 // if (key < current value) 2016 // if (key < current value)
2012 // Rh = Rj 2017 // Rh = Rj
2013 // else 2018 // else
2014 // Rh = Ri 2019 // Rh = Ri
2036 __ mr(Rh, Ri); // Save index in i for profiling. 2041 __ mr(Rh, Ri); // Save index in i for profiling.
2037 } 2042 }
2038 // Ri = value offset 2043 // Ri = value offset
2039 __ sldi(Ri, Ri, log_entry_size); 2044 __ sldi(Ri, Ri, log_entry_size);
2040 __ add(Ri, Ri, Rarray); 2045 __ add(Ri, Ri, Rarray);
2041 __ lwz(Rscratch, 0, Ri); 2046 __ get_u4(Rscratch, Ri, 0, InterpreterMacroAssembler::Unsigned);
2042 2047
2043 Label not_found; 2048 Label not_found;
2044 // Ri = offset offset 2049 // Ri = offset offset
2045 __ cmpw(CCR0, Rkey, Rscratch); 2050 __ cmpw(CCR0, Rkey, Rscratch);
2046 __ beq(CCR0, not_found); 2051 __ beq(CCR0, not_found);
2047 // entry not found -> j = default offset 2052 // entry not found -> j = default offset
2048 __ lwz(Rj, -2 * BytesPerInt, Rarray); 2053 __ get_u4(Rj, Rarray, -2 * BytesPerInt, InterpreterMacroAssembler::Unsigned);
2049 __ b(default_case); 2054 __ b(default_case);
2050 2055
2051 __ bind(not_found); 2056 __ bind(not_found);
2052 // entry found -> j = offset 2057 // entry found -> j = offset
2053 __ profile_switch_case(Rh, Rj, Rscratch, Rkey); 2058 __ profile_switch_case(Rh, Rj, Rscratch, Rkey);
2054 __ lwz(Rj, BytesPerInt, Ri); 2059 __ get_u4(Rj, Ri, BytesPerInt, InterpreterMacroAssembler::Unsigned);
2055 2060
2056 if (ProfileInterpreter) { 2061 if (ProfileInterpreter) {
2057 __ b(continue_execution); 2062 __ b(continue_execution);
2058 } 2063 }
2059 2064
2144 __ get_cache_and_index_at_bcp(Rcache, 1, index_size); 2149 __ get_cache_and_index_at_bcp(Rcache, 1, index_size);
2145 Label Lresolved, Ldone; 2150 Label Lresolved, Ldone;
2146 2151
2147 assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); 2152 assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
2148 // We are resolved if the indices offset contains the current bytecode. 2153 // We are resolved if the indices offset contains the current bytecode.
2149 // Big Endian: 2154 #if defined(VM_LITTLE_ENDIAN)
2155 __ lbz(Rscratch, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + byte_no + 1, Rcache);
2156 #else
2150 __ lbz(Rscratch, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 7 - (byte_no + 1), Rcache); 2157 __ lbz(Rscratch, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 7 - (byte_no + 1), Rcache);
2158 #endif
2151 // Acquire by cmp-br-isync (see below). 2159 // Acquire by cmp-br-isync (see below).
2152 __ cmpdi(CCR0, Rscratch, (int)bytecode()); 2160 __ cmpdi(CCR0, Rscratch, (int)bytecode());
2153 __ beq(CCR0, Lresolved); 2161 __ beq(CCR0, Lresolved);
2154 2162
2155 address entry = NULL; 2163 address entry = NULL;