Mercurial > hg > truffle
comparison src/cpu/x86/vm/stubGenerator_x86_64.cpp @ 7475:e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
Summary: Use YMM registers in arraycopy and array_fill stubs.
Reviewed-by: roland, twisti
author | kvn |
---|---|
date | Thu, 03 Jan 2013 16:30:47 -0800 |
parents | 2c7f594145dc |
children | 989155e2d07a e961c11b85fe |
comparison
equal
deleted
inserted
replaced
7474:00af3a3a8df4 | 7475:e2e6bf86682c |
---|---|
1284 // Inputs: | 1284 // Inputs: |
1285 // end_from - source arrays end address | 1285 // end_from - source arrays end address |
1286 // end_to - destination array end address | 1286 // end_to - destination array end address |
1287 // qword_count - 64-bits element count, negative | 1287 // qword_count - 64-bits element count, negative |
1288 // to - scratch | 1288 // to - scratch |
1289 // L_copy_32_bytes - entry label | 1289 // L_copy_bytes - entry label |
1290 // L_copy_8_bytes - exit label | 1290 // L_copy_8_bytes - exit label |
1291 // | 1291 // |
1292 void copy_32_bytes_forward(Register end_from, Register end_to, | 1292 void copy_bytes_forward(Register end_from, Register end_to, |
1293 Register qword_count, Register to, | 1293 Register qword_count, Register to, |
1294 Label& L_copy_32_bytes, Label& L_copy_8_bytes) { | 1294 Label& L_copy_bytes, Label& L_copy_8_bytes) { |
1295 DEBUG_ONLY(__ stop("enter at entry label, not here")); | 1295 DEBUG_ONLY(__ stop("enter at entry label, not here")); |
1296 Label L_loop; | 1296 Label L_loop; |
1297 __ align(OptoLoopAlignment); | 1297 __ align(OptoLoopAlignment); |
1298 __ BIND(L_loop); | 1298 if (UseUnalignedLoadStores) { |
1299 if(UseUnalignedLoadStores) { | 1299 Label L_end; |
1300 __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24)); | 1300 // Copy 64-bytes per iteration |
1301 __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0); | 1301 __ BIND(L_loop); |
1302 __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8)); | 1302 if (UseAVX >= 2) { |
1303 __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1); | 1303 __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56)); |
1304 | 1304 __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0); |
1305 __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24)); | |
1306 __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1); | |
1307 } else { | |
1308 __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56)); | |
1309 __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0); | |
1310 __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40)); | |
1311 __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1); | |
1312 __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24)); | |
1313 __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2); | |
1314 __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8)); | |
1315 __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3); | |
1316 } | |
1317 __ BIND(L_copy_bytes); | |
1318 __ addptr(qword_count, 8); | |
1319 __ jcc(Assembler::lessEqual, L_loop); | |
1320 __ subptr(qword_count, 4); // sub(8) and add(4) | |
1321 __ jccb(Assembler::greater, L_end); | |
1322 // Copy trailing 32 bytes | |
1323 if (UseAVX >= 2) { | |
1324 __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24)); | |
1325 __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0); | |
1326 } else { | |
1327 __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24)); | |
1328 __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0); | |
1329 __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8)); | |
1330 __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1); | |
1331 } | |
1332 __ addptr(qword_count, 4); | |
1333 __ BIND(L_end); | |
1305 } else { | 1334 } else { |
1335 // Copy 32-bytes per iteration | |
1336 __ BIND(L_loop); | |
1306 __ movq(to, Address(end_from, qword_count, Address::times_8, -24)); | 1337 __ movq(to, Address(end_from, qword_count, Address::times_8, -24)); |
1307 __ movq(Address(end_to, qword_count, Address::times_8, -24), to); | 1338 __ movq(Address(end_to, qword_count, Address::times_8, -24), to); |
1308 __ movq(to, Address(end_from, qword_count, Address::times_8, -16)); | 1339 __ movq(to, Address(end_from, qword_count, Address::times_8, -16)); |
1309 __ movq(Address(end_to, qword_count, Address::times_8, -16), to); | 1340 __ movq(Address(end_to, qword_count, Address::times_8, -16), to); |
1310 __ movq(to, Address(end_from, qword_count, Address::times_8, - 8)); | 1341 __ movq(to, Address(end_from, qword_count, Address::times_8, - 8)); |
1311 __ movq(Address(end_to, qword_count, Address::times_8, - 8), to); | 1342 __ movq(Address(end_to, qword_count, Address::times_8, - 8), to); |
1312 __ movq(to, Address(end_from, qword_count, Address::times_8, - 0)); | 1343 __ movq(to, Address(end_from, qword_count, Address::times_8, - 0)); |
1313 __ movq(Address(end_to, qword_count, Address::times_8, - 0), to); | 1344 __ movq(Address(end_to, qword_count, Address::times_8, - 0), to); |
1314 } | 1345 |
1315 __ BIND(L_copy_32_bytes); | 1346 __ BIND(L_copy_bytes); |
1316 __ addptr(qword_count, 4); | 1347 __ addptr(qword_count, 4); |
1317 __ jcc(Assembler::lessEqual, L_loop); | 1348 __ jcc(Assembler::lessEqual, L_loop); |
1349 } | |
1318 __ subptr(qword_count, 4); | 1350 __ subptr(qword_count, 4); |
1319 __ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords | 1351 __ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords |
1320 } | 1352 } |
1321 | |
1322 | 1353 |
1323 // Copy big chunks backward | 1354 // Copy big chunks backward |
1324 // | 1355 // |
1325 // Inputs: | 1356 // Inputs: |
1326 // from - source arrays address | 1357 // from - source arrays address |
1327 // dest - destination array address | 1358 // dest - destination array address |
1328 // qword_count - 64-bits element count | 1359 // qword_count - 64-bits element count |
1329 // to - scratch | 1360 // to - scratch |
1330 // L_copy_32_bytes - entry label | 1361 // L_copy_bytes - entry label |
1331 // L_copy_8_bytes - exit label | 1362 // L_copy_8_bytes - exit label |
1332 // | 1363 // |
1333 void copy_32_bytes_backward(Register from, Register dest, | 1364 void copy_bytes_backward(Register from, Register dest, |
1334 Register qword_count, Register to, | 1365 Register qword_count, Register to, |
1335 Label& L_copy_32_bytes, Label& L_copy_8_bytes) { | 1366 Label& L_copy_bytes, Label& L_copy_8_bytes) { |
1336 DEBUG_ONLY(__ stop("enter at entry label, not here")); | 1367 DEBUG_ONLY(__ stop("enter at entry label, not here")); |
1337 Label L_loop; | 1368 Label L_loop; |
1338 __ align(OptoLoopAlignment); | 1369 __ align(OptoLoopAlignment); |
1339 __ BIND(L_loop); | 1370 if (UseUnalignedLoadStores) { |
1340 if(UseUnalignedLoadStores) { | 1371 Label L_end; |
1341 __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16)); | 1372 // Copy 64-bytes per iteration |
1342 __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0); | 1373 __ BIND(L_loop); |
1343 __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0)); | 1374 if (UseAVX >= 2) { |
1344 __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); | 1375 __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32)); |
1345 | 1376 __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0); |
1377 __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0)); | |
1378 __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); | |
1379 } else { | |
1380 __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48)); | |
1381 __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0); | |
1382 __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32)); | |
1383 __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1); | |
1384 __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16)); | |
1385 __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2); | |
1386 __ movdqu(xmm3, Address(from, qword_count, Address::times_8, 0)); | |
1387 __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm3); | |
1388 } | |
1389 __ BIND(L_copy_bytes); | |
1390 __ subptr(qword_count, 8); | |
1391 __ jcc(Assembler::greaterEqual, L_loop); | |
1392 | |
1393 __ addptr(qword_count, 4); // add(8) and sub(4) | |
1394 __ jccb(Assembler::less, L_end); | |
1395 // Copy trailing 32 bytes | |
1396 if (UseAVX >= 2) { | |
1397 __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0)); | |
1398 __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0); | |
1399 } else { | |
1400 __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16)); | |
1401 __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0); | |
1402 __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0)); | |
1403 __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); | |
1404 } | |
1405 __ subptr(qword_count, 4); | |
1406 __ BIND(L_end); | |
1346 } else { | 1407 } else { |
1408 // Copy 32-bytes per iteration | |
1409 __ BIND(L_loop); | |
1347 __ movq(to, Address(from, qword_count, Address::times_8, 24)); | 1410 __ movq(to, Address(from, qword_count, Address::times_8, 24)); |
1348 __ movq(Address(dest, qword_count, Address::times_8, 24), to); | 1411 __ movq(Address(dest, qword_count, Address::times_8, 24), to); |
1349 __ movq(to, Address(from, qword_count, Address::times_8, 16)); | 1412 __ movq(to, Address(from, qword_count, Address::times_8, 16)); |
1350 __ movq(Address(dest, qword_count, Address::times_8, 16), to); | 1413 __ movq(Address(dest, qword_count, Address::times_8, 16), to); |
1351 __ movq(to, Address(from, qword_count, Address::times_8, 8)); | 1414 __ movq(to, Address(from, qword_count, Address::times_8, 8)); |
1352 __ movq(Address(dest, qword_count, Address::times_8, 8), to); | 1415 __ movq(Address(dest, qword_count, Address::times_8, 8), to); |
1353 __ movq(to, Address(from, qword_count, Address::times_8, 0)); | 1416 __ movq(to, Address(from, qword_count, Address::times_8, 0)); |
1354 __ movq(Address(dest, qword_count, Address::times_8, 0), to); | 1417 __ movq(Address(dest, qword_count, Address::times_8, 0), to); |
1355 } | 1418 |
1356 __ BIND(L_copy_32_bytes); | 1419 __ BIND(L_copy_bytes); |
1357 __ subptr(qword_count, 4); | 1420 __ subptr(qword_count, 4); |
1358 __ jcc(Assembler::greaterEqual, L_loop); | 1421 __ jcc(Assembler::greaterEqual, L_loop); |
1422 } | |
1359 __ addptr(qword_count, 4); | 1423 __ addptr(qword_count, 4); |
1360 __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords | 1424 __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords |
1361 } | 1425 } |
1362 | 1426 |
1363 | 1427 |
1383 address generate_disjoint_byte_copy(bool aligned, address* entry, const char *name) { | 1447 address generate_disjoint_byte_copy(bool aligned, address* entry, const char *name) { |
1384 __ align(CodeEntryAlignment); | 1448 __ align(CodeEntryAlignment); |
1385 StubCodeMark mark(this, "StubRoutines", name); | 1449 StubCodeMark mark(this, "StubRoutines", name); |
1386 address start = __ pc(); | 1450 address start = __ pc(); |
1387 | 1451 |
1388 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes; | 1452 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes; |
1389 Label L_copy_byte, L_exit; | 1453 Label L_copy_byte, L_exit; |
1390 const Register from = rdi; // source array address | 1454 const Register from = rdi; // source array address |
1391 const Register to = rsi; // destination array address | 1455 const Register to = rsi; // destination array address |
1392 const Register count = rdx; // elements count | 1456 const Register count = rdx; // elements count |
1393 const Register byte_count = rcx; | 1457 const Register byte_count = rcx; |
1415 | 1479 |
1416 // Copy from low to high addresses. Use 'to' as scratch. | 1480 // Copy from low to high addresses. Use 'to' as scratch. |
1417 __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); | 1481 __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); |
1418 __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); | 1482 __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); |
1419 __ negptr(qword_count); // make the count negative | 1483 __ negptr(qword_count); // make the count negative |
1420 __ jmp(L_copy_32_bytes); | 1484 __ jmp(L_copy_bytes); |
1421 | 1485 |
1422 // Copy trailing qwords | 1486 // Copy trailing qwords |
1423 __ BIND(L_copy_8_bytes); | 1487 __ BIND(L_copy_8_bytes); |
1424 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); | 1488 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); |
1425 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); | 1489 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); |
1458 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free | 1522 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free |
1459 __ xorptr(rax, rax); // return 0 | 1523 __ xorptr(rax, rax); // return 0 |
1460 __ leave(); // required for proper stackwalking of RuntimeStub frame | 1524 __ leave(); // required for proper stackwalking of RuntimeStub frame |
1461 __ ret(0); | 1525 __ ret(0); |
1462 | 1526 |
1463 // Copy in 32-bytes chunks | 1527 // Copy in multi-bytes chunks |
1464 copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); | 1528 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); |
1465 __ jmp(L_copy_4_bytes); | 1529 __ jmp(L_copy_4_bytes); |
1466 | 1530 |
1467 return start; | 1531 return start; |
1468 } | 1532 } |
1469 | 1533 |
1486 address* entry, const char *name) { | 1550 address* entry, const char *name) { |
1487 __ align(CodeEntryAlignment); | 1551 __ align(CodeEntryAlignment); |
1488 StubCodeMark mark(this, "StubRoutines", name); | 1552 StubCodeMark mark(this, "StubRoutines", name); |
1489 address start = __ pc(); | 1553 address start = __ pc(); |
1490 | 1554 |
1491 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes; | 1555 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes; |
1492 const Register from = rdi; // source array address | 1556 const Register from = rdi; // source array address |
1493 const Register to = rsi; // destination array address | 1557 const Register to = rsi; // destination array address |
1494 const Register count = rdx; // elements count | 1558 const Register count = rdx; // elements count |
1495 const Register byte_count = rcx; | 1559 const Register byte_count = rcx; |
1496 const Register qword_count = count; | 1560 const Register qword_count = count; |
1529 __ movw(Address(to, byte_count, Address::times_1, -2), rax); | 1593 __ movw(Address(to, byte_count, Address::times_1, -2), rax); |
1530 | 1594 |
1531 // Check for and copy trailing dword | 1595 // Check for and copy trailing dword |
1532 __ BIND(L_copy_4_bytes); | 1596 __ BIND(L_copy_4_bytes); |
1533 __ testl(byte_count, 4); | 1597 __ testl(byte_count, 4); |
1534 __ jcc(Assembler::zero, L_copy_32_bytes); | 1598 __ jcc(Assembler::zero, L_copy_bytes); |
1535 __ movl(rax, Address(from, qword_count, Address::times_8)); | 1599 __ movl(rax, Address(from, qword_count, Address::times_8)); |
1536 __ movl(Address(to, qword_count, Address::times_8), rax); | 1600 __ movl(Address(to, qword_count, Address::times_8), rax); |
1537 __ jmp(L_copy_32_bytes); | 1601 __ jmp(L_copy_bytes); |
1538 | 1602 |
1539 // Copy trailing qwords | 1603 // Copy trailing qwords |
1540 __ BIND(L_copy_8_bytes); | 1604 __ BIND(L_copy_8_bytes); |
1541 __ movq(rax, Address(from, qword_count, Address::times_8, -8)); | 1605 __ movq(rax, Address(from, qword_count, Address::times_8, -8)); |
1542 __ movq(Address(to, qword_count, Address::times_8, -8), rax); | 1606 __ movq(Address(to, qword_count, Address::times_8, -8), rax); |
1547 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free | 1611 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free |
1548 __ xorptr(rax, rax); // return 0 | 1612 __ xorptr(rax, rax); // return 0 |
1549 __ leave(); // required for proper stackwalking of RuntimeStub frame | 1613 __ leave(); // required for proper stackwalking of RuntimeStub frame |
1550 __ ret(0); | 1614 __ ret(0); |
1551 | 1615 |
1552 // Copy in 32-bytes chunks | 1616 // Copy in multi-bytes chunks |
1553 copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); | 1617 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); |
1554 | 1618 |
1555 restore_arg_regs(); | 1619 restore_arg_regs(); |
1556 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free | 1620 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free |
1557 __ xorptr(rax, rax); // return 0 | 1621 __ xorptr(rax, rax); // return 0 |
1558 __ leave(); // required for proper stackwalking of RuntimeStub frame | 1622 __ leave(); // required for proper stackwalking of RuntimeStub frame |
1583 address generate_disjoint_short_copy(bool aligned, address *entry, const char *name) { | 1647 address generate_disjoint_short_copy(bool aligned, address *entry, const char *name) { |
1584 __ align(CodeEntryAlignment); | 1648 __ align(CodeEntryAlignment); |
1585 StubCodeMark mark(this, "StubRoutines", name); | 1649 StubCodeMark mark(this, "StubRoutines", name); |
1586 address start = __ pc(); | 1650 address start = __ pc(); |
1587 | 1651 |
1588 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes,L_copy_2_bytes,L_exit; | 1652 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes,L_copy_2_bytes,L_exit; |
1589 const Register from = rdi; // source array address | 1653 const Register from = rdi; // source array address |
1590 const Register to = rsi; // destination array address | 1654 const Register to = rsi; // destination array address |
1591 const Register count = rdx; // elements count | 1655 const Register count = rdx; // elements count |
1592 const Register word_count = rcx; | 1656 const Register word_count = rcx; |
1593 const Register qword_count = count; | 1657 const Register qword_count = count; |
1614 | 1678 |
1615 // Copy from low to high addresses. Use 'to' as scratch. | 1679 // Copy from low to high addresses. Use 'to' as scratch. |
1616 __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); | 1680 __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); |
1617 __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); | 1681 __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); |
1618 __ negptr(qword_count); | 1682 __ negptr(qword_count); |
1619 __ jmp(L_copy_32_bytes); | 1683 __ jmp(L_copy_bytes); |
1620 | 1684 |
1621 // Copy trailing qwords | 1685 // Copy trailing qwords |
1622 __ BIND(L_copy_8_bytes); | 1686 __ BIND(L_copy_8_bytes); |
1623 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); | 1687 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); |
1624 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); | 1688 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); |
1650 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free | 1714 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free |
1651 __ xorptr(rax, rax); // return 0 | 1715 __ xorptr(rax, rax); // return 0 |
1652 __ leave(); // required for proper stackwalking of RuntimeStub frame | 1716 __ leave(); // required for proper stackwalking of RuntimeStub frame |
1653 __ ret(0); | 1717 __ ret(0); |
1654 | 1718 |
1655 // Copy in 32-bytes chunks | 1719 // Copy in multi-bytes chunks |
1656 copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); | 1720 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); |
1657 __ jmp(L_copy_4_bytes); | 1721 __ jmp(L_copy_4_bytes); |
1658 | 1722 |
1659 return start; | 1723 return start; |
1660 } | 1724 } |
1661 | 1725 |
1698 address *entry, const char *name) { | 1762 address *entry, const char *name) { |
1699 __ align(CodeEntryAlignment); | 1763 __ align(CodeEntryAlignment); |
1700 StubCodeMark mark(this, "StubRoutines", name); | 1764 StubCodeMark mark(this, "StubRoutines", name); |
1701 address start = __ pc(); | 1765 address start = __ pc(); |
1702 | 1766 |
1703 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes; | 1767 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes; |
1704 const Register from = rdi; // source array address | 1768 const Register from = rdi; // source array address |
1705 const Register to = rsi; // destination array address | 1769 const Register to = rsi; // destination array address |
1706 const Register count = rdx; // elements count | 1770 const Register count = rdx; // elements count |
1707 const Register word_count = rcx; | 1771 const Register word_count = rcx; |
1708 const Register qword_count = count; | 1772 const Register qword_count = count; |
1733 __ movw(Address(to, word_count, Address::times_2, -2), rax); | 1797 __ movw(Address(to, word_count, Address::times_2, -2), rax); |
1734 | 1798 |
1735 // Check for and copy trailing dword | 1799 // Check for and copy trailing dword |
1736 __ BIND(L_copy_4_bytes); | 1800 __ BIND(L_copy_4_bytes); |
1737 __ testl(word_count, 2); | 1801 __ testl(word_count, 2); |
1738 __ jcc(Assembler::zero, L_copy_32_bytes); | 1802 __ jcc(Assembler::zero, L_copy_bytes); |
1739 __ movl(rax, Address(from, qword_count, Address::times_8)); | 1803 __ movl(rax, Address(from, qword_count, Address::times_8)); |
1740 __ movl(Address(to, qword_count, Address::times_8), rax); | 1804 __ movl(Address(to, qword_count, Address::times_8), rax); |
1741 __ jmp(L_copy_32_bytes); | 1805 __ jmp(L_copy_bytes); |
1742 | 1806 |
1743 // Copy trailing qwords | 1807 // Copy trailing qwords |
1744 __ BIND(L_copy_8_bytes); | 1808 __ BIND(L_copy_8_bytes); |
1745 __ movq(rax, Address(from, qword_count, Address::times_8, -8)); | 1809 __ movq(rax, Address(from, qword_count, Address::times_8, -8)); |
1746 __ movq(Address(to, qword_count, Address::times_8, -8), rax); | 1810 __ movq(Address(to, qword_count, Address::times_8, -8), rax); |
1751 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free | 1815 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free |
1752 __ xorptr(rax, rax); // return 0 | 1816 __ xorptr(rax, rax); // return 0 |
1753 __ leave(); // required for proper stackwalking of RuntimeStub frame | 1817 __ leave(); // required for proper stackwalking of RuntimeStub frame |
1754 __ ret(0); | 1818 __ ret(0); |
1755 | 1819 |
1756 // Copy in 32-bytes chunks | 1820 // Copy in multi-bytes chunks |
1757 copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); | 1821 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); |
1758 | 1822 |
1759 restore_arg_regs(); | 1823 restore_arg_regs(); |
1760 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free | 1824 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free |
1761 __ xorptr(rax, rax); // return 0 | 1825 __ xorptr(rax, rax); // return 0 |
1762 __ leave(); // required for proper stackwalking of RuntimeStub frame | 1826 __ leave(); // required for proper stackwalking of RuntimeStub frame |
1788 const char *name, bool dest_uninitialized = false) { | 1852 const char *name, bool dest_uninitialized = false) { |
1789 __ align(CodeEntryAlignment); | 1853 __ align(CodeEntryAlignment); |
1790 StubCodeMark mark(this, "StubRoutines", name); | 1854 StubCodeMark mark(this, "StubRoutines", name); |
1791 address start = __ pc(); | 1855 address start = __ pc(); |
1792 | 1856 |
1793 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit; | 1857 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit; |
1794 const Register from = rdi; // source array address | 1858 const Register from = rdi; // source array address |
1795 const Register to = rsi; // destination array address | 1859 const Register to = rsi; // destination array address |
1796 const Register count = rdx; // elements count | 1860 const Register count = rdx; // elements count |
1797 const Register dword_count = rcx; | 1861 const Register dword_count = rcx; |
1798 const Register qword_count = count; | 1862 const Register qword_count = count; |
1824 | 1888 |
1825 // Copy from low to high addresses. Use 'to' as scratch. | 1889 // Copy from low to high addresses. Use 'to' as scratch. |
1826 __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); | 1890 __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); |
1827 __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); | 1891 __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); |
1828 __ negptr(qword_count); | 1892 __ negptr(qword_count); |
1829 __ jmp(L_copy_32_bytes); | 1893 __ jmp(L_copy_bytes); |
1830 | 1894 |
1831 // Copy trailing qwords | 1895 // Copy trailing qwords |
1832 __ BIND(L_copy_8_bytes); | 1896 __ BIND(L_copy_8_bytes); |
1833 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); | 1897 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); |
1834 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); | 1898 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); |
1851 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free | 1915 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free |
1852 __ xorptr(rax, rax); // return 0 | 1916 __ xorptr(rax, rax); // return 0 |
1853 __ leave(); // required for proper stackwalking of RuntimeStub frame | 1917 __ leave(); // required for proper stackwalking of RuntimeStub frame |
1854 __ ret(0); | 1918 __ ret(0); |
1855 | 1919 |
1856 // Copy 32-bytes chunks | 1920 // Copy in multi-bytes chunks |
1857 copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); | 1921 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); |
1858 __ jmp(L_copy_4_bytes); | 1922 __ jmp(L_copy_4_bytes); |
1859 | 1923 |
1860 return start; | 1924 return start; |
1861 } | 1925 } |
1862 | 1926 |
1880 bool dest_uninitialized = false) { | 1944 bool dest_uninitialized = false) { |
1881 __ align(CodeEntryAlignment); | 1945 __ align(CodeEntryAlignment); |
1882 StubCodeMark mark(this, "StubRoutines", name); | 1946 StubCodeMark mark(this, "StubRoutines", name); |
1883 address start = __ pc(); | 1947 address start = __ pc(); |
1884 | 1948 |
1885 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit; | 1949 Label L_copy_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit; |
1886 const Register from = rdi; // source array address | 1950 const Register from = rdi; // source array address |
1887 const Register to = rsi; // destination array address | 1951 const Register to = rsi; // destination array address |
1888 const Register count = rdx; // elements count | 1952 const Register count = rdx; // elements count |
1889 const Register dword_count = rcx; | 1953 const Register dword_count = rcx; |
1890 const Register qword_count = count; | 1954 const Register qword_count = count; |
1914 | 1978 |
1915 // Copy from high to low addresses. Use 'to' as scratch. | 1979 // Copy from high to low addresses. Use 'to' as scratch. |
1916 | 1980 |
1917 // Check for and copy trailing dword | 1981 // Check for and copy trailing dword |
1918 __ testl(dword_count, 1); | 1982 __ testl(dword_count, 1); |
1919 __ jcc(Assembler::zero, L_copy_32_bytes); | 1983 __ jcc(Assembler::zero, L_copy_bytes); |
1920 __ movl(rax, Address(from, dword_count, Address::times_4, -4)); | 1984 __ movl(rax, Address(from, dword_count, Address::times_4, -4)); |
1921 __ movl(Address(to, dword_count, Address::times_4, -4), rax); | 1985 __ movl(Address(to, dword_count, Address::times_4, -4), rax); |
1922 __ jmp(L_copy_32_bytes); | 1986 __ jmp(L_copy_bytes); |
1923 | 1987 |
1924 // Copy trailing qwords | 1988 // Copy trailing qwords |
1925 __ BIND(L_copy_8_bytes); | 1989 __ BIND(L_copy_8_bytes); |
1926 __ movq(rax, Address(from, qword_count, Address::times_8, -8)); | 1990 __ movq(rax, Address(from, qword_count, Address::times_8, -8)); |
1927 __ movq(Address(to, qword_count, Address::times_8, -8), rax); | 1991 __ movq(Address(to, qword_count, Address::times_8, -8), rax); |
1935 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free | 1999 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free |
1936 __ xorptr(rax, rax); // return 0 | 2000 __ xorptr(rax, rax); // return 0 |
1937 __ leave(); // required for proper stackwalking of RuntimeStub frame | 2001 __ leave(); // required for proper stackwalking of RuntimeStub frame |
1938 __ ret(0); | 2002 __ ret(0); |
1939 | 2003 |
1940 // Copy in 32-bytes chunks | 2004 // Copy in multi-bytes chunks |
1941 copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); | 2005 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); |
1942 | 2006 |
1943 __ bind(L_exit); | 2007 __ bind(L_exit); |
1944 if (is_oop) { | 2008 if (is_oop) { |
1945 Register end_to = rdx; | 2009 Register end_to = rdx; |
1946 __ leaq(end_to, Address(to, dword_count, Address::times_4, -4)); | 2010 __ leaq(end_to, Address(to, dword_count, Address::times_4, -4)); |
1974 const char *name, bool dest_uninitialized = false) { | 2038 const char *name, bool dest_uninitialized = false) { |
1975 __ align(CodeEntryAlignment); | 2039 __ align(CodeEntryAlignment); |
1976 StubCodeMark mark(this, "StubRoutines", name); | 2040 StubCodeMark mark(this, "StubRoutines", name); |
1977 address start = __ pc(); | 2041 address start = __ pc(); |
1978 | 2042 |
1979 Label L_copy_32_bytes, L_copy_8_bytes, L_exit; | 2043 Label L_copy_bytes, L_copy_8_bytes, L_exit; |
1980 const Register from = rdi; // source array address | 2044 const Register from = rdi; // source array address |
1981 const Register to = rsi; // destination array address | 2045 const Register to = rsi; // destination array address |
1982 const Register qword_count = rdx; // elements count | 2046 const Register qword_count = rdx; // elements count |
1983 const Register end_from = from; // source array end address | 2047 const Register end_from = from; // source array end address |
1984 const Register end_to = rcx; // destination array end address | 2048 const Register end_to = rcx; // destination array end address |
2006 | 2070 |
2007 // Copy from low to high addresses. Use 'to' as scratch. | 2071 // Copy from low to high addresses. Use 'to' as scratch. |
2008 __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); | 2072 __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); |
2009 __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); | 2073 __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); |
2010 __ negptr(qword_count); | 2074 __ negptr(qword_count); |
2011 __ jmp(L_copy_32_bytes); | 2075 __ jmp(L_copy_bytes); |
2012 | 2076 |
2013 // Copy trailing qwords | 2077 // Copy trailing qwords |
2014 __ BIND(L_copy_8_bytes); | 2078 __ BIND(L_copy_8_bytes); |
2015 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); | 2079 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); |
2016 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); | 2080 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); |
2025 __ xorptr(rax, rax); // return 0 | 2089 __ xorptr(rax, rax); // return 0 |
2026 __ leave(); // required for proper stackwalking of RuntimeStub frame | 2090 __ leave(); // required for proper stackwalking of RuntimeStub frame |
2027 __ ret(0); | 2091 __ ret(0); |
2028 } | 2092 } |
2029 | 2093 |
2030 // Copy 64-byte chunks | 2094 // Copy in multi-bytes chunks |
2031 copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); | 2095 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); |
2032 | 2096 |
2033 if (is_oop) { | 2097 if (is_oop) { |
2034 __ BIND(L_exit); | 2098 __ BIND(L_exit); |
2035 gen_write_ref_array_post_barrier(saved_to, end_to, rax); | 2099 gen_write_ref_array_post_barrier(saved_to, end_to, rax); |
2036 } | 2100 } |
2063 const char *name, bool dest_uninitialized = false) { | 2127 const char *name, bool dest_uninitialized = false) { |
2064 __ align(CodeEntryAlignment); | 2128 __ align(CodeEntryAlignment); |
2065 StubCodeMark mark(this, "StubRoutines", name); | 2129 StubCodeMark mark(this, "StubRoutines", name); |
2066 address start = __ pc(); | 2130 address start = __ pc(); |
2067 | 2131 |
2068 Label L_copy_32_bytes, L_copy_8_bytes, L_exit; | 2132 Label L_copy_bytes, L_copy_8_bytes, L_exit; |
2069 const Register from = rdi; // source array address | 2133 const Register from = rdi; // source array address |
2070 const Register to = rsi; // destination array address | 2134 const Register to = rsi; // destination array address |
2071 const Register qword_count = rdx; // elements count | 2135 const Register qword_count = rdx; // elements count |
2072 const Register saved_count = rcx; | 2136 const Register saved_count = rcx; |
2073 | 2137 |
2089 __ movptr(saved_count, qword_count); | 2153 __ movptr(saved_count, qword_count); |
2090 // No registers are destroyed by this call | 2154 // No registers are destroyed by this call |
2091 gen_write_ref_array_pre_barrier(to, saved_count, dest_uninitialized); | 2155 gen_write_ref_array_pre_barrier(to, saved_count, dest_uninitialized); |
2092 } | 2156 } |
2093 | 2157 |
2094 __ jmp(L_copy_32_bytes); | 2158 __ jmp(L_copy_bytes); |
2095 | 2159 |
2096 // Copy trailing qwords | 2160 // Copy trailing qwords |
2097 __ BIND(L_copy_8_bytes); | 2161 __ BIND(L_copy_8_bytes); |
2098 __ movq(rax, Address(from, qword_count, Address::times_8, -8)); | 2162 __ movq(rax, Address(from, qword_count, Address::times_8, -8)); |
2099 __ movq(Address(to, qword_count, Address::times_8, -8), rax); | 2163 __ movq(Address(to, qword_count, Address::times_8, -8), rax); |
2108 __ xorptr(rax, rax); // return 0 | 2172 __ xorptr(rax, rax); // return 0 |
2109 __ leave(); // required for proper stackwalking of RuntimeStub frame | 2173 __ leave(); // required for proper stackwalking of RuntimeStub frame |
2110 __ ret(0); | 2174 __ ret(0); |
2111 } | 2175 } |
2112 | 2176 |
2113 // Copy in 32-bytes chunks | 2177 // Copy in multi-bytes chunks |
2114 copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); | 2178 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); |
2115 | 2179 |
2116 if (is_oop) { | 2180 if (is_oop) { |
2117 __ BIND(L_exit); | 2181 __ BIND(L_exit); |
2118 __ lea(rcx, Address(to, saved_count, Address::times_8, -8)); | 2182 __ lea(rcx, Address(to, saved_count, Address::times_8, -8)); |
2119 gen_write_ref_array_post_barrier(to, rcx, rax); | 2183 gen_write_ref_array_post_barrier(to, rcx, rax); |