Mercurial > hg > graal-jvmci-8
comparison src/cpu/x86/vm/stubGenerator_x86_64.cpp @ 405:2649e5276dd7
6532536: Optimize arraycopy stubs for Intel cpus
Summary: Use SSE2 movdqu in arraycopy stubs on newest Intel's cpus
Reviewed-by: rasbold
author | kvn |
---|---|
date | Tue, 14 Oct 2008 15:10:26 -0700 |
parents | f8199438385b |
children | db4caa99ef11 |
comparison
equal
deleted
inserted
replaced
404:78c058bc5cdc | 405:2649e5276dd7 |
---|---|
1249 ShouldNotReachHere(); | 1249 ShouldNotReachHere(); |
1250 | 1250 |
1251 } | 1251 } |
1252 } | 1252 } |
1253 | 1253 |
1254 | |
1254 // Copy big chunks forward | 1255 // Copy big chunks forward |
1255 // | 1256 // |
1256 // Inputs: | 1257 // Inputs: |
1257 // end_from - source arrays end address | 1258 // end_from - source arrays end address |
1258 // end_to - destination array end address | 1259 // end_to - destination array end address |
1266 Label& L_copy_32_bytes, Label& L_copy_8_bytes) { | 1267 Label& L_copy_32_bytes, Label& L_copy_8_bytes) { |
1267 DEBUG_ONLY(__ stop("enter at entry label, not here")); | 1268 DEBUG_ONLY(__ stop("enter at entry label, not here")); |
1268 Label L_loop; | 1269 Label L_loop; |
1269 __ align(16); | 1270 __ align(16); |
1270 __ BIND(L_loop); | 1271 __ BIND(L_loop); |
1271 __ movq(to, Address(end_from, qword_count, Address::times_8, -24)); | 1272 if(UseUnalignedLoadStores) { |
1272 __ movq(Address(end_to, qword_count, Address::times_8, -24), to); | 1273 __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24)); |
1273 __ movq(to, Address(end_from, qword_count, Address::times_8, -16)); | 1274 __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0); |
1274 __ movq(Address(end_to, qword_count, Address::times_8, -16), to); | 1275 __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8)); |
1275 __ movq(to, Address(end_from, qword_count, Address::times_8, - 8)); | 1276 __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1); |
1276 __ movq(Address(end_to, qword_count, Address::times_8, - 8), to); | 1277 |
1277 __ movq(to, Address(end_from, qword_count, Address::times_8, - 0)); | 1278 } else { |
1278 __ movq(Address(end_to, qword_count, Address::times_8, - 0), to); | 1279 __ movq(to, Address(end_from, qword_count, Address::times_8, -24)); |
1280 __ movq(Address(end_to, qword_count, Address::times_8, -24), to); | |
1281 __ movq(to, Address(end_from, qword_count, Address::times_8, -16)); | |
1282 __ movq(Address(end_to, qword_count, Address::times_8, -16), to); | |
1283 __ movq(to, Address(end_from, qword_count, Address::times_8, - 8)); | |
1284 __ movq(Address(end_to, qword_count, Address::times_8, - 8), to); | |
1285 __ movq(to, Address(end_from, qword_count, Address::times_8, - 0)); | |
1286 __ movq(Address(end_to, qword_count, Address::times_8, - 0), to); | |
1287 } | |
1279 __ BIND(L_copy_32_bytes); | 1288 __ BIND(L_copy_32_bytes); |
1280 __ addptr(qword_count, 4); | 1289 __ addptr(qword_count, 4); |
1281 __ jcc(Assembler::lessEqual, L_loop); | 1290 __ jcc(Assembler::lessEqual, L_loop); |
1282 __ subptr(qword_count, 4); | 1291 __ subptr(qword_count, 4); |
1283 __ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords | 1292 __ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords |
1299 Label& L_copy_32_bytes, Label& L_copy_8_bytes) { | 1308 Label& L_copy_32_bytes, Label& L_copy_8_bytes) { |
1300 DEBUG_ONLY(__ stop("enter at entry label, not here")); | 1309 DEBUG_ONLY(__ stop("enter at entry label, not here")); |
1301 Label L_loop; | 1310 Label L_loop; |
1302 __ align(16); | 1311 __ align(16); |
1303 __ BIND(L_loop); | 1312 __ BIND(L_loop); |
1304 __ movq(to, Address(from, qword_count, Address::times_8, 24)); | 1313 if(UseUnalignedLoadStores) { |
1305 __ movq(Address(dest, qword_count, Address::times_8, 24), to); | 1314 __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16)); |
1306 __ movq(to, Address(from, qword_count, Address::times_8, 16)); | 1315 __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0); |
1307 __ movq(Address(dest, qword_count, Address::times_8, 16), to); | 1316 __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0)); |
1308 __ movq(to, Address(from, qword_count, Address::times_8, 8)); | 1317 __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); |
1309 __ movq(Address(dest, qword_count, Address::times_8, 8), to); | 1318 |
1310 __ movq(to, Address(from, qword_count, Address::times_8, 0)); | 1319 } else { |
1311 __ movq(Address(dest, qword_count, Address::times_8, 0), to); | 1320 __ movq(to, Address(from, qword_count, Address::times_8, 24)); |
1321 __ movq(Address(dest, qword_count, Address::times_8, 24), to); | |
1322 __ movq(to, Address(from, qword_count, Address::times_8, 16)); | |
1323 __ movq(Address(dest, qword_count, Address::times_8, 16), to); | |
1324 __ movq(to, Address(from, qword_count, Address::times_8, 8)); | |
1325 __ movq(Address(dest, qword_count, Address::times_8, 8), to); | |
1326 __ movq(to, Address(from, qword_count, Address::times_8, 0)); | |
1327 __ movq(Address(dest, qword_count, Address::times_8, 0), to); | |
1328 } | |
1312 __ BIND(L_copy_32_bytes); | 1329 __ BIND(L_copy_32_bytes); |
1313 __ subptr(qword_count, 4); | 1330 __ subptr(qword_count, 4); |
1314 __ jcc(Assembler::greaterEqual, L_loop); | 1331 __ jcc(Assembler::greaterEqual, L_loop); |
1315 __ addptr(qword_count, 4); | 1332 __ addptr(qword_count, 4); |
1316 __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords | 1333 __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords |