comparison src/cpu/x86/vm/stubGenerator_x86_64.cpp @ 405:2649e5276dd7

6532536: Optimize arraycopy stubs for Intel cpus Summary: Use SSE2 movdqu in arraycopy stubs on newest Intel's cpus Reviewed-by: rasbold
author kvn
date Tue, 14 Oct 2008 15:10:26 -0700
parents f8199438385b
children db4caa99ef11
comparison
equal deleted inserted replaced
404:78c058bc5cdc 405:2649e5276dd7
1249 ShouldNotReachHere(); 1249 ShouldNotReachHere();
1250 1250
1251 } 1251 }
1252 } 1252 }
1253 1253
1254
1254 // Copy big chunks forward 1255 // Copy big chunks forward
1255 // 1256 //
1256 // Inputs: 1257 // Inputs:
1257 // end_from - source arrays end address 1258 // end_from - source arrays end address
1258 // end_to - destination array end address 1259 // end_to - destination array end address
1266 Label& L_copy_32_bytes, Label& L_copy_8_bytes) { 1267 Label& L_copy_32_bytes, Label& L_copy_8_bytes) {
1267 DEBUG_ONLY(__ stop("enter at entry label, not here")); 1268 DEBUG_ONLY(__ stop("enter at entry label, not here"));
1268 Label L_loop; 1269 Label L_loop;
1269 __ align(16); 1270 __ align(16);
1270 __ BIND(L_loop); 1271 __ BIND(L_loop);
1271 __ movq(to, Address(end_from, qword_count, Address::times_8, -24)); 1272 if(UseUnalignedLoadStores) {
1272 __ movq(Address(end_to, qword_count, Address::times_8, -24), to); 1273 __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
1273 __ movq(to, Address(end_from, qword_count, Address::times_8, -16)); 1274 __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
1274 __ movq(Address(end_to, qword_count, Address::times_8, -16), to); 1275 __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8));
1275 __ movq(to, Address(end_from, qword_count, Address::times_8, - 8)); 1276 __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1);
1276 __ movq(Address(end_to, qword_count, Address::times_8, - 8), to); 1277
1277 __ movq(to, Address(end_from, qword_count, Address::times_8, - 0)); 1278 } else {
1278 __ movq(Address(end_to, qword_count, Address::times_8, - 0), to); 1279 __ movq(to, Address(end_from, qword_count, Address::times_8, -24));
1280 __ movq(Address(end_to, qword_count, Address::times_8, -24), to);
1281 __ movq(to, Address(end_from, qword_count, Address::times_8, -16));
1282 __ movq(Address(end_to, qword_count, Address::times_8, -16), to);
1283 __ movq(to, Address(end_from, qword_count, Address::times_8, - 8));
1284 __ movq(Address(end_to, qword_count, Address::times_8, - 8), to);
1285 __ movq(to, Address(end_from, qword_count, Address::times_8, - 0));
1286 __ movq(Address(end_to, qword_count, Address::times_8, - 0), to);
1287 }
1279 __ BIND(L_copy_32_bytes); 1288 __ BIND(L_copy_32_bytes);
1280 __ addptr(qword_count, 4); 1289 __ addptr(qword_count, 4);
1281 __ jcc(Assembler::lessEqual, L_loop); 1290 __ jcc(Assembler::lessEqual, L_loop);
1282 __ subptr(qword_count, 4); 1291 __ subptr(qword_count, 4);
1283 __ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords 1292 __ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords
1299 Label& L_copy_32_bytes, Label& L_copy_8_bytes) { 1308 Label& L_copy_32_bytes, Label& L_copy_8_bytes) {
1300 DEBUG_ONLY(__ stop("enter at entry label, not here")); 1309 DEBUG_ONLY(__ stop("enter at entry label, not here"));
1301 Label L_loop; 1310 Label L_loop;
1302 __ align(16); 1311 __ align(16);
1303 __ BIND(L_loop); 1312 __ BIND(L_loop);
1304 __ movq(to, Address(from, qword_count, Address::times_8, 24)); 1313 if(UseUnalignedLoadStores) {
1305 __ movq(Address(dest, qword_count, Address::times_8, 24), to); 1314 __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
1306 __ movq(to, Address(from, qword_count, Address::times_8, 16)); 1315 __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0);
1307 __ movq(Address(dest, qword_count, Address::times_8, 16), to); 1316 __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
1308 __ movq(to, Address(from, qword_count, Address::times_8, 8)); 1317 __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
1309 __ movq(Address(dest, qword_count, Address::times_8, 8), to); 1318
1310 __ movq(to, Address(from, qword_count, Address::times_8, 0)); 1319 } else {
1311 __ movq(Address(dest, qword_count, Address::times_8, 0), to); 1320 __ movq(to, Address(from, qword_count, Address::times_8, 24));
1321 __ movq(Address(dest, qword_count, Address::times_8, 24), to);
1322 __ movq(to, Address(from, qword_count, Address::times_8, 16));
1323 __ movq(Address(dest, qword_count, Address::times_8, 16), to);
1324 __ movq(to, Address(from, qword_count, Address::times_8, 8));
1325 __ movq(Address(dest, qword_count, Address::times_8, 8), to);
1326 __ movq(to, Address(from, qword_count, Address::times_8, 0));
1327 __ movq(Address(dest, qword_count, Address::times_8, 0), to);
1328 }
1312 __ BIND(L_copy_32_bytes); 1329 __ BIND(L_copy_32_bytes);
1313 __ subptr(qword_count, 4); 1330 __ subptr(qword_count, 4);
1314 __ jcc(Assembler::greaterEqual, L_loop); 1331 __ jcc(Assembler::greaterEqual, L_loop);
1315 __ addptr(qword_count, 4); 1332 __ addptr(qword_count, 4);
1316 __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords 1333 __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords