Mercurial > hg > graal-jvmci-8
comparison src/cpu/x86/vm/macroAssembler_x86.cpp @ 17780:606acabe7b5c
8031320: Use Intel RTM instructions for locks
Summary: Use RTM for inflated locks and stack locks.
Reviewed-by: iveresov, twisti, roland, dcubed
author | kvn |
---|---|
date | Thu, 20 Mar 2014 17:49:27 -0700 |
parents | 4d4ea046d32a |
children | 526acaf3626f |
comparison
equal
deleted
inserted
replaced
17778:a48e16541e6b | 17780:606acabe7b5c |
---|---|
299 | 299 |
300 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { | 300 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { |
301 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); | 301 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); |
302 } | 302 } |
303 | 303 |
304 void MacroAssembler::movptr(Register dst, AddressLiteral src) { | 304 void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) { |
305 // scratch register is not used, | |
306 // it is defined to match parameters of 64-bit version of this method. | |
305 if (src.is_lval()) { | 307 if (src.is_lval()) { |
306 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); | 308 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); |
307 } else { | 309 } else { |
308 movl(dst, as_Address(src)); | 310 movl(dst, as_Address(src)); |
309 } | 311 } |
611 if (value == 0) { ; return; } | 613 if (value == 0) { ; return; } |
612 if (value == 1 && UseIncDec) { decq(dst) ; return; } | 614 if (value == 1 && UseIncDec) { decq(dst) ; return; } |
613 /* else */ { subq(dst, value) ; return; } | 615 /* else */ { subq(dst, value) ; return; } |
614 } | 616 } |
615 | 617 |
618 void MacroAssembler::incrementq(AddressLiteral dst) { | |
619 if (reachable(dst)) { | |
620 incrementq(as_Address(dst)); | |
621 } else { | |
622 lea(rscratch1, dst); | |
623 incrementq(Address(rscratch1, 0)); | |
624 } | |
625 } | |
626 | |
616 void MacroAssembler::incrementq(Register reg, int value) { | 627 void MacroAssembler::incrementq(Register reg, int value) { |
617 if (value == min_jint) { addq(reg, value); return; } | 628 if (value == min_jint) { addq(reg, value); return; } |
618 if (value < 0) { decrementq(reg, -value); return; } | 629 if (value < 0) { decrementq(reg, -value); return; } |
619 if (value == 0) { ; return; } | 630 if (value == 0) { ; return; } |
620 if (value == 1 && UseIncDec) { incq(reg) ; return; } | 631 if (value == 1 && UseIncDec) { incq(reg) ; return; } |
679 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { | 690 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { |
680 mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); | 691 mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); |
681 movq(dst, rscratch1); | 692 movq(dst, rscratch1); |
682 } | 693 } |
683 | 694 |
684 void MacroAssembler::movptr(Register dst, AddressLiteral src) { | 695 void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) { |
685 if (src.is_lval()) { | 696 if (src.is_lval()) { |
686 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); | 697 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); |
687 } else { | 698 } else { |
688 if (reachable(src)) { | 699 if (reachable(src)) { |
689 movq(dst, as_Address(src)); | 700 movq(dst, as_Address(src)); |
690 } else { | 701 } else { |
691 lea(rscratch1, src); | 702 lea(scratch, src); |
692 movq(dst, Address(rscratch1,0)); | 703 movq(dst, Address(scratch, 0)); |
693 } | 704 } |
694 } | 705 } |
695 } | 706 } |
696 | 707 |
697 void MacroAssembler::movptr(ArrayAddress dst, Register src) { | 708 void MacroAssembler::movptr(ArrayAddress dst, Register src) { |
986 | 997 |
987 void MacroAssembler::andptr(Register dst, int32_t imm32) { | 998 void MacroAssembler::andptr(Register dst, int32_t imm32) { |
988 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); | 999 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); |
989 } | 1000 } |
990 | 1001 |
991 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { | 1002 void MacroAssembler::atomic_incl(Address counter_addr) { |
992 pushf(); | 1003 if (os::is_MP()) |
1004 lock(); | |
1005 incrementl(counter_addr); | |
1006 } | |
1007 | |
1008 void MacroAssembler::atomic_incl(AddressLiteral counter_addr, Register scr) { | |
993 if (reachable(counter_addr)) { | 1009 if (reachable(counter_addr)) { |
994 if (os::is_MP()) | 1010 atomic_incl(as_Address(counter_addr)); |
995 lock(); | 1011 } else { |
996 incrementl(as_Address(counter_addr)); | 1012 lea(scr, counter_addr); |
997 } else { | 1013 atomic_incl(Address(scr, 0)); |
998 lea(rscratch1, counter_addr); | 1014 } |
999 if (os::is_MP()) | 1015 } |
1000 lock(); | 1016 |
1001 incrementl(Address(rscratch1, 0)); | 1017 #ifdef _LP64 |
1002 } | 1018 void MacroAssembler::atomic_incq(Address counter_addr) { |
1003 popf(); | 1019 if (os::is_MP()) |
1004 } | 1020 lock(); |
1021 incrementq(counter_addr); | |
1022 } | |
1023 | |
1024 void MacroAssembler::atomic_incq(AddressLiteral counter_addr, Register scr) { | |
1025 if (reachable(counter_addr)) { | |
1026 atomic_incq(as_Address(counter_addr)); | |
1027 } else { | |
1028 lea(scr, counter_addr); | |
1029 atomic_incq(Address(scr, 0)); | |
1030 } | |
1031 } | |
1032 #endif | |
1005 | 1033 |
1006 // Writes to stack successive pages until offset reached to check for | 1034 // Writes to stack successive pages until offset reached to check for |
1007 // stack overflow + shadow pages. This clobbers tmp. | 1035 // stack overflow + shadow pages. This clobbers tmp. |
1008 void MacroAssembler::bang_stack_size(Register size, Register tmp) { | 1036 void MacroAssembler::bang_stack_size(Register size, Register tmp) { |
1009 movptr(tmp, rsp); | 1037 movptr(tmp, rsp); |
1272 cmpptr(temp_reg, markOopDesc::biased_lock_pattern); | 1300 cmpptr(temp_reg, markOopDesc::biased_lock_pattern); |
1273 jcc(Assembler::equal, done); | 1301 jcc(Assembler::equal, done); |
1274 } | 1302 } |
1275 | 1303 |
1276 #ifdef COMPILER2 | 1304 #ifdef COMPILER2 |
1305 | |
1306 #if INCLUDE_RTM_OPT | |
1307 | |
1308 // Update rtm_counters based on abort status | |
1309 // input: abort_status | |
1310 // rtm_counters (RTMLockingCounters*) | |
1311 // flags are killed | |
1312 void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) { | |
1313 | |
1314 atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset())); | |
1315 if (PrintPreciseRTMLockingStatistics) { | |
1316 for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) { | |
1317 Label check_abort; | |
1318 testl(abort_status, (1<<i)); | |
1319 jccb(Assembler::equal, check_abort); | |
1320 atomic_incptr(Address(rtm_counters, RTMLockingCounters::abortX_count_offset() + (i * sizeof(uintx)))); | |
1321 bind(check_abort); | |
1322 } | |
1323 } | |
1324 } | |
1325 | |
1326 // Branch if (random & (count-1) != 0), count is 2^n | |
1327 // tmp, scr and flags are killed | |
1328 void MacroAssembler::branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel) { | |
1329 assert(tmp == rax, ""); | |
1330 assert(scr == rdx, ""); | |
1331 rdtsc(); // modifies EDX:EAX | |
1332 andptr(tmp, count-1); | |
1333 jccb(Assembler::notZero, brLabel); | |
1334 } | |
1335 | |
1336 // Perform abort ratio calculation, set no_rtm bit if high ratio | |
1337 // input: rtm_counters_Reg (RTMLockingCounters* address) | |
1338 // tmpReg, rtm_counters_Reg and flags are killed | |
1339 void MacroAssembler::rtm_abort_ratio_calculation(Register tmpReg, | |
1340 Register rtm_counters_Reg, | |
1341 RTMLockingCounters* rtm_counters, | |
1342 Metadata* method_data) { | |
1343 Label L_done, L_check_always_rtm1, L_check_always_rtm2; | |
1344 | |
1345 if (RTMLockingCalculationDelay > 0) { | |
1346 // Delay calculation | |
1347 movptr(tmpReg, ExternalAddress((address) RTMLockingCounters::rtm_calculation_flag_addr()), tmpReg); | |
1348 testptr(tmpReg, tmpReg); | |
1349 jccb(Assembler::equal, L_done); | |
1350 } | |
1351 // Abort ratio calculation only if abort_count > RTMAbortThreshold | |
1352 // Aborted transactions = abort_count * 100 | |
1353 // All transactions = total_count * RTMTotalCountIncrRate | |
1354 // Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio) | |
1355 | |
1356 movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::abort_count_offset())); | |
1357 cmpptr(tmpReg, RTMAbortThreshold); | |
1358 jccb(Assembler::below, L_check_always_rtm2); | |
1359 imulptr(tmpReg, tmpReg, 100); | |
1360 | |
1361 Register scrReg = rtm_counters_Reg; | |
1362 movptr(scrReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset())); | |
1363 imulptr(scrReg, scrReg, RTMTotalCountIncrRate); | |
1364 imulptr(scrReg, scrReg, RTMAbortRatio); | |
1365 cmpptr(tmpReg, scrReg); | |
1366 jccb(Assembler::below, L_check_always_rtm1); | |
1367 if (method_data != NULL) { | |
1368 // set rtm_state to "no rtm" in MDO | |
1369 mov_metadata(tmpReg, method_data); | |
1370 if (os::is_MP()) { | |
1371 lock(); | |
1372 } | |
1373 orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), NoRTM); | |
1374 } | |
1375 jmpb(L_done); | |
1376 bind(L_check_always_rtm1); | |
1377 // Reload RTMLockingCounters* address | |
1378 lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters)); | |
1379 bind(L_check_always_rtm2); | |
1380 movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset())); | |
1381 cmpptr(tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate); | |
1382 jccb(Assembler::below, L_done); | |
1383 if (method_data != NULL) { | |
1384 // set rtm_state to "always rtm" in MDO | |
1385 mov_metadata(tmpReg, method_data); | |
1386 if (os::is_MP()) { | |
1387 lock(); | |
1388 } | |
1389 orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), UseRTM); | |
1390 } | |
1391 bind(L_done); | |
1392 } | |
1393 | |
1394 // Update counters and perform abort ratio calculation | |
1395 // input: abort_status_Reg | |
1396 // rtm_counters_Reg, flags are killed | |
1397 void MacroAssembler::rtm_profiling(Register abort_status_Reg, | |
1398 Register rtm_counters_Reg, | |
1399 RTMLockingCounters* rtm_counters, | |
1400 Metadata* method_data, | |
1401 bool profile_rtm) { | |
1402 | |
1403 assert(rtm_counters != NULL, "should not be NULL when profiling RTM"); | |
1404 // update rtm counters based on rax value at abort | |
1405 // reads abort_status_Reg, updates flags | |
1406 lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters)); | |
1407 rtm_counters_update(abort_status_Reg, rtm_counters_Reg); | |
1408 if (profile_rtm) { | |
1409 // Save abort status because abort_status_Reg is used by following code. | |
1410 if (RTMRetryCount > 0) { | |
1411 push(abort_status_Reg); | |
1412 } | |
1413 assert(rtm_counters != NULL, "should not be NULL when profiling RTM"); | |
1414 rtm_abort_ratio_calculation(abort_status_Reg, rtm_counters_Reg, rtm_counters, method_data); | |
1415 // restore abort status | |
1416 if (RTMRetryCount > 0) { | |
1417 pop(abort_status_Reg); | |
1418 } | |
1419 } | |
1420 } | |
1421 | |
1422 // Retry on abort if abort's status is 0x6: can retry (0x2) | memory conflict (0x4) | |
1423 // inputs: retry_count_Reg | |
1424 // : abort_status_Reg | |
1425 // output: retry_count_Reg decremented by 1 | |
1426 // flags are killed | |
1427 void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg, Label& retryLabel) { | |
1428 Label doneRetry; | |
1429 assert(abort_status_Reg == rax, ""); | |
1430 // The abort reason bits are in eax (see all states in rtmLocking.hpp) | |
1431 // 0x6 = conflict on which we can retry (0x2) | memory conflict (0x4) | |
1432 // if reason is in 0x6 and retry count != 0 then retry | |
1433 andptr(abort_status_Reg, 0x6); | |
1434 jccb(Assembler::zero, doneRetry); | |
1435 testl(retry_count_Reg, retry_count_Reg); | |
1436 jccb(Assembler::zero, doneRetry); | |
1437 pause(); | |
1438 decrementl(retry_count_Reg); | |
1439 jmp(retryLabel); | |
1440 bind(doneRetry); | |
1441 } | |
1442 | |
1443 // Spin and retry if lock is busy, | |
1444 // inputs: box_Reg (monitor address) | |
1445 // : retry_count_Reg | |
1446 // output: retry_count_Reg decremented by 1 | |
1447 // : clear z flag if retry count exceeded | |
1448 // tmp_Reg, scr_Reg, flags are killed | |
1449 void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register box_Reg, | |
1450 Register tmp_Reg, Register scr_Reg, Label& retryLabel) { | |
1451 Label SpinLoop, SpinExit, doneRetry; | |
1452 // Clean monitor_value bit to get valid pointer | |
1453 int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value; | |
1454 | |
1455 testl(retry_count_Reg, retry_count_Reg); | |
1456 jccb(Assembler::zero, doneRetry); | |
1457 decrementl(retry_count_Reg); | |
1458 movptr(scr_Reg, RTMSpinLoopCount); | |
1459 | |
1460 bind(SpinLoop); | |
1461 pause(); | |
1462 decrementl(scr_Reg); | |
1463 jccb(Assembler::lessEqual, SpinExit); | |
1464 movptr(tmp_Reg, Address(box_Reg, owner_offset)); | |
1465 testptr(tmp_Reg, tmp_Reg); | |
1466 jccb(Assembler::notZero, SpinLoop); | |
1467 | |
1468 bind(SpinExit); | |
1469 jmp(retryLabel); | |
1470 bind(doneRetry); | |
1471 incrementl(retry_count_Reg); // clear z flag | |
1472 } | |
1473 | |
1474 // Use RTM for normal stack locks | |
1475 // Input: objReg (object to lock) | |
1476 void MacroAssembler::rtm_stack_locking(Register objReg, Register tmpReg, Register scrReg, | |
1477 Register retry_on_abort_count_Reg, | |
1478 RTMLockingCounters* stack_rtm_counters, | |
1479 Metadata* method_data, bool profile_rtm, | |
1480 Label& DONE_LABEL, Label& IsInflated) { | |
1481 assert(UseRTMForStackLocks, "why call this otherwise?"); | |
1482 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking"); | |
1483 assert(tmpReg == rax, ""); | |
1484 assert(scrReg == rdx, ""); | |
1485 Label L_rtm_retry, L_decrement_retry, L_on_abort; | |
1486 | |
1487 if (RTMRetryCount > 0) { | |
1488 movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort | |
1489 bind(L_rtm_retry); | |
1490 } | |
1491 if (!UseRTMXendForLockBusy) { | |
1492 movptr(tmpReg, Address(objReg, 0)); | |
1493 testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased | |
1494 jcc(Assembler::notZero, IsInflated); | |
1495 } | |
1496 if (PrintPreciseRTMLockingStatistics || profile_rtm) { | |
1497 Label L_noincrement; | |
1498 if (RTMTotalCountIncrRate > 1) { | |
1499 // tmpReg, scrReg and flags are killed | |
1500 branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement); | |
1501 } | |
1502 assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM"); | |
1503 atomic_incptr(ExternalAddress((address)stack_rtm_counters->total_count_addr()), scrReg); | |
1504 bind(L_noincrement); | |
1505 } | |
1506 xbegin(L_on_abort); | |
1507 movptr(tmpReg, Address(objReg, 0)); // fetch markword | |
1508 andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits | |
1509 cmpptr(tmpReg, markOopDesc::unlocked_value); // bits = 001 unlocked | |
1510 jcc(Assembler::equal, DONE_LABEL); // all done if unlocked | |
1511 | |
1512 Register abort_status_Reg = tmpReg; // status of abort is stored in RAX | |
1513 if (UseRTMXendForLockBusy) { | |
1514 xend(); | |
1515 movptr(tmpReg, Address(objReg, 0)); | |
1516 testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased | |
1517 jcc(Assembler::notZero, IsInflated); | |
1518 movptr(abort_status_Reg, 0x1); // Set the abort status to 1 (as xabort does) | |
1519 jmp(L_decrement_retry); | |
1520 } | |
1521 else { | |
1522 xabort(0); | |
1523 } | |
1524 bind(L_on_abort); | |
1525 if (PrintPreciseRTMLockingStatistics || profile_rtm) { | |
1526 rtm_profiling(abort_status_Reg, scrReg, stack_rtm_counters, method_data, profile_rtm); | |
1527 } | |
1528 bind(L_decrement_retry); | |
1529 if (RTMRetryCount > 0) { | |
1530 // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4) | |
1531 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry); | |
1532 } | |
1533 } | |
1534 | |
1535 // Use RTM for inflating locks | |
1536 // inputs: objReg (object to lock) | |
1537 // boxReg (on-stack box address (displaced header location) - KILLED) | |
1538 // tmpReg (ObjectMonitor address + 2(monitor_value)) | |
1539 void MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, Register tmpReg, | |
1540 Register scrReg, Register retry_on_busy_count_Reg, | |
1541 Register retry_on_abort_count_Reg, | |
1542 RTMLockingCounters* rtm_counters, | |
1543 Metadata* method_data, bool profile_rtm, | |
1544 Label& DONE_LABEL) { | |
1545 assert(UseRTMLocking, "why call this otherwise?"); | |
1546 assert(tmpReg == rax, ""); | |
1547 assert(scrReg == rdx, ""); | |
1548 Label L_rtm_retry, L_decrement_retry, L_on_abort; | |
1549 // Clean monitor_value bit to get valid pointer | |
1550 int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value; | |
1551 | |
1552 // Without cast to int32_t a movptr will destroy r10 which is typically obj | |
1553 movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())); | |
1554 movptr(boxReg, tmpReg); // Save ObjectMonitor address | |
1555 | |
1556 if (RTMRetryCount > 0) { | |
1557 movl(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy | |
1558 movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort | |
1559 bind(L_rtm_retry); | |
1560 } | |
1561 if (PrintPreciseRTMLockingStatistics || profile_rtm) { | |
1562 Label L_noincrement; | |
1563 if (RTMTotalCountIncrRate > 1) { | |
1564 // tmpReg, scrReg and flags are killed | |
1565 branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement); | |
1566 } | |
1567 assert(rtm_counters != NULL, "should not be NULL when profiling RTM"); | |
1568 atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg); | |
1569 bind(L_noincrement); | |
1570 } | |
1571 xbegin(L_on_abort); | |
1572 movptr(tmpReg, Address(objReg, 0)); | |
1573 movptr(tmpReg, Address(tmpReg, owner_offset)); | |
1574 testptr(tmpReg, tmpReg); | |
1575 jcc(Assembler::zero, DONE_LABEL); | |
1576 if (UseRTMXendForLockBusy) { | |
1577 xend(); | |
1578 jmp(L_decrement_retry); | |
1579 } | |
1580 else { | |
1581 xabort(0); | |
1582 } | |
1583 bind(L_on_abort); | |
1584 Register abort_status_Reg = tmpReg; // status of abort is stored in RAX | |
1585 if (PrintPreciseRTMLockingStatistics || profile_rtm) { | |
1586 rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm); | |
1587 } | |
1588 if (RTMRetryCount > 0) { | |
1589 // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4) | |
1590 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry); | |
1591 } | |
1592 | |
1593 movptr(tmpReg, Address(boxReg, owner_offset)) ; | |
1594 testptr(tmpReg, tmpReg) ; | |
1595 jccb(Assembler::notZero, L_decrement_retry) ; | |
1596 | |
1597 // Appears unlocked - try to swing _owner from null to non-null. | |
1598 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. | |
1599 #ifdef _LP64 | |
1600 Register threadReg = r15_thread; | |
1601 #else | |
1602 get_thread(scrReg); | |
1603 Register threadReg = scrReg; | |
1604 #endif | |
1605 if (os::is_MP()) { | |
1606 lock(); | |
1607 } | |
1608 cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg | |
1609 | |
1610 if (RTMRetryCount > 0) { | |
1611 // success done else retry | |
1612 jccb(Assembler::equal, DONE_LABEL) ; | |
1613 bind(L_decrement_retry); | |
1614 // Spin and retry if lock is busy. | |
1615 rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry); | |
1616 } | |
1617 else { | |
1618 bind(L_decrement_retry); | |
1619 } | |
1620 } | |
1621 | |
1622 #endif // INCLUDE_RTM_OPT | |
1623 | |
1277 // Fast_Lock and Fast_Unlock used by C2 | 1624 // Fast_Lock and Fast_Unlock used by C2 |
1278 | 1625 |
1279 // Because the transitions from emitted code to the runtime | 1626 // Because the transitions from emitted code to the runtime |
1280 // monitorenter/exit helper stubs are so slow it's critical that | 1627 // monitorenter/exit helper stubs are so slow it's critical that |
1281 // we inline both the stack-locking fast-path and the inflated fast path. | 1628 // we inline both the stack-locking fast-path and the inflated fast path. |
1348 | 1695 |
1349 // obj: object to lock | 1696 // obj: object to lock |
1350 // box: on-stack box address (displaced header location) - KILLED | 1697 // box: on-stack box address (displaced header location) - KILLED |
1351 // rax,: tmp -- KILLED | 1698 // rax,: tmp -- KILLED |
1352 // scr: tmp -- KILLED | 1699 // scr: tmp -- KILLED |
1353 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg, BiasedLockingCounters* counters) { | 1700 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, |
1701 Register scrReg, Register cx1Reg, Register cx2Reg, | |
1702 BiasedLockingCounters* counters, | |
1703 RTMLockingCounters* rtm_counters, | |
1704 RTMLockingCounters* stack_rtm_counters, | |
1705 Metadata* method_data, | |
1706 bool use_rtm, bool profile_rtm) { | |
1354 // Ensure the register assignents are disjoint | 1707 // Ensure the register assignents are disjoint |
1355 guarantee (objReg != boxReg, ""); | 1708 assert(tmpReg == rax, ""); |
1356 guarantee (objReg != tmpReg, ""); | 1709 |
1357 guarantee (objReg != scrReg, ""); | 1710 if (use_rtm) { |
1358 guarantee (boxReg != tmpReg, ""); | 1711 assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg); |
1359 guarantee (boxReg != scrReg, ""); | 1712 } else { |
1360 guarantee (tmpReg == rax, ""); | 1713 assert(cx1Reg == noreg, ""); |
1714 assert(cx2Reg == noreg, ""); | |
1715 assert_different_registers(objReg, boxReg, tmpReg, scrReg); | |
1716 } | |
1361 | 1717 |
1362 if (counters != NULL) { | 1718 if (counters != NULL) { |
1363 atomic_incl(ExternalAddress((address)counters->total_entry_count_addr())); | 1719 atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg); |
1364 } | 1720 } |
1365 if (EmitSync & 1) { | 1721 if (EmitSync & 1) { |
1366 // set box->dhw = unused_mark (3) | 1722 // set box->dhw = unused_mark (3) |
1367 // Force all sync thru slow-path: slow_enter() and slow_exit() | 1723 // Force all sync thru slow-path: slow_enter() and slow_exit() |
1368 movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())); | 1724 movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())); |
1417 // If this invariant is not held we risk exclusion (safety) failure. | 1773 // If this invariant is not held we risk exclusion (safety) failure. |
1418 if (UseBiasedLocking && !UseOptoBiasInlining) { | 1774 if (UseBiasedLocking && !UseOptoBiasInlining) { |
1419 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, counters); | 1775 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, counters); |
1420 } | 1776 } |
1421 | 1777 |
1778 #if INCLUDE_RTM_OPT | |
1779 if (UseRTMForStackLocks && use_rtm) { | |
1780 rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg, | |
1781 stack_rtm_counters, method_data, profile_rtm, | |
1782 DONE_LABEL, IsInflated); | |
1783 } | |
1784 #endif // INCLUDE_RTM_OPT | |
1785 | |
1422 movptr(tmpReg, Address(objReg, 0)); // [FETCH] | 1786 movptr(tmpReg, Address(objReg, 0)); // [FETCH] |
1423 testl (tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased | 1787 testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased |
1424 jccb (Assembler::notZero, IsInflated); | 1788 jccb(Assembler::notZero, IsInflated); |
1425 | 1789 |
1426 // Attempt stack-locking ... | 1790 // Attempt stack-locking ... |
1427 orptr (tmpReg, 0x1); | 1791 orptr (tmpReg, markOopDesc::unlocked_value); |
1428 movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS | 1792 movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS |
1429 if (os::is_MP()) { | 1793 if (os::is_MP()) { |
1430 lock(); | 1794 lock(); |
1431 } | 1795 } |
1432 cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg | 1796 cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg |
1433 if (counters != NULL) { | 1797 if (counters != NULL) { |
1434 cond_inc32(Assembler::equal, | 1798 cond_inc32(Assembler::equal, |
1435 ExternalAddress((address)counters->fast_path_entry_count_addr())); | 1799 ExternalAddress((address)counters->fast_path_entry_count_addr())); |
1436 } | 1800 } |
1437 jccb(Assembler::equal, DONE_LABEL); | 1801 jcc(Assembler::equal, DONE_LABEL); // Success |
1438 | 1802 |
1439 // Recursive locking | 1803 // Recursive locking. |
1804 // The object is stack-locked: markword contains stack pointer to BasicLock. | |
1805 // Locked by current thread if difference with current SP is less than one page. | |
1440 subptr(tmpReg, rsp); | 1806 subptr(tmpReg, rsp); |
1807 // Next instruction set ZFlag == 1 (Success) if difference is less then one page. | |
1441 andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) ); | 1808 andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) ); |
1442 movptr(Address(boxReg, 0), tmpReg); | 1809 movptr(Address(boxReg, 0), tmpReg); |
1443 if (counters != NULL) { | 1810 if (counters != NULL) { |
1444 cond_inc32(Assembler::equal, | 1811 cond_inc32(Assembler::equal, |
1445 ExternalAddress((address)counters->fast_path_entry_count_addr())); | 1812 ExternalAddress((address)counters->fast_path_entry_count_addr())); |
1446 } | 1813 } |
1447 jmpb(DONE_LABEL); | 1814 jmp(DONE_LABEL); |
1448 | 1815 |
1449 bind(IsInflated); | 1816 bind(IsInflated); |
1817 // The object is inflated. tmpReg contains pointer to ObjectMonitor* + 2(monitor_value) | |
1818 | |
1819 #if INCLUDE_RTM_OPT | |
1820 // Use the same RTM locking code in 32- and 64-bit VM. | |
1821 if (use_rtm) { | |
1822 rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg, | |
1823 rtm_counters, method_data, profile_rtm, DONE_LABEL); | |
1824 } else { | |
1825 #endif // INCLUDE_RTM_OPT | |
1826 | |
1450 #ifndef _LP64 | 1827 #ifndef _LP64 |
1451 // The object is inflated. | 1828 // The object is inflated. |
1452 // | 1829 // |
1453 // TODO-FIXME: eliminate the ugly use of manifest constants: | 1830 // TODO-FIXME: eliminate the ugly use of manifest constants: |
1454 // Use markOopDesc::monitor_value instead of "2". | 1831 // Use markOopDesc::monitor_value instead of "2". |
1574 // avoid an RTO->RTS upgrade on the $line. | 1951 // avoid an RTO->RTS upgrade on the $line. |
1575 | 1952 |
1576 // Without cast to int32_t a movptr will destroy r10 which is typically obj | 1953 // Without cast to int32_t a movptr will destroy r10 which is typically obj |
1577 movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())); | 1954 movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())); |
1578 | 1955 |
1579 mov (boxReg, tmpReg); | 1956 movptr (boxReg, tmpReg); |
1580 movptr (tmpReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); | 1957 movptr (tmpReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); |
1581 testptr(tmpReg, tmpReg); | 1958 testptr(tmpReg, tmpReg); |
1582 jccb (Assembler::notZero, DONE_LABEL); | 1959 jccb (Assembler::notZero, DONE_LABEL); |
1583 | 1960 |
1584 // It's inflated and appears unlocked | 1961 // It's inflated and appears unlocked |
1585 if (os::is_MP()) { | 1962 if (os::is_MP()) { |
1586 lock(); | 1963 lock(); |
1587 } | 1964 } |
1588 cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); | 1965 cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); |
1589 // Intentional fall-through into DONE_LABEL ... | 1966 // Intentional fall-through into DONE_LABEL ... |
1590 | 1967 #endif // _LP64 |
1968 | |
1969 #if INCLUDE_RTM_OPT | |
1970 } // use_rtm() | |
1591 #endif | 1971 #endif |
1592 | |
1593 // DONE_LABEL is a hot target - we'd really like to place it at the | 1972 // DONE_LABEL is a hot target - we'd really like to place it at the |
1594 // start of cache line by padding with NOPs. | 1973 // start of cache line by padding with NOPs. |
1595 // See the AMD and Intel software optimization manuals for the | 1974 // See the AMD and Intel software optimization manuals for the |
1596 // most efficient "long" NOP encodings. | 1975 // most efficient "long" NOP encodings. |
1597 // Unfortunately none of our alignment mechanisms suffice. | 1976 // Unfortunately none of our alignment mechanisms suffice. |
1629 // The only other source of unbalanced locking would be JNI. The "Java Native Interface: | 2008 // The only other source of unbalanced locking would be JNI. The "Java Native Interface: |
1630 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter | 2009 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter |
1631 // should not be unlocked by "normal" java-level locking and vice-versa. The specification | 2010 // should not be unlocked by "normal" java-level locking and vice-versa. The specification |
1632 // doesn't specify what will occur if a program engages in such mixed-mode locking, however. | 2011 // doesn't specify what will occur if a program engages in such mixed-mode locking, however. |
1633 | 2012 |
1634 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) { | 2013 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) { |
1635 guarantee (objReg != boxReg, ""); | 2014 assert(boxReg == rax, ""); |
1636 guarantee (objReg != tmpReg, ""); | 2015 assert_different_registers(objReg, boxReg, tmpReg); |
1637 guarantee (boxReg != tmpReg, ""); | |
1638 guarantee (boxReg == rax, ""); | |
1639 | 2016 |
1640 if (EmitSync & 4) { | 2017 if (EmitSync & 4) { |
1641 // Disable - inhibit all inlining. Force control through the slow-path | 2018 // Disable - inhibit all inlining. Force control through the slow-path |
1642 cmpptr (rsp, 0); | 2019 cmpptr (rsp, 0); |
1643 } else | 2020 } else |
1665 // and appear before the (box->dhw == 0) recursive stack-lock test. | 2042 // and appear before the (box->dhw == 0) recursive stack-lock test. |
1666 if (UseBiasedLocking && !UseOptoBiasInlining) { | 2043 if (UseBiasedLocking && !UseOptoBiasInlining) { |
1667 biased_locking_exit(objReg, tmpReg, DONE_LABEL); | 2044 biased_locking_exit(objReg, tmpReg, DONE_LABEL); |
1668 } | 2045 } |
1669 | 2046 |
2047 #if INCLUDE_RTM_OPT | |
2048 if (UseRTMForStackLocks && use_rtm) { | |
2049 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking"); | |
2050 Label L_regular_unlock; | |
2051 movptr(tmpReg, Address(objReg, 0)); // fetch markword | |
2052 andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits | |
2053 cmpptr(tmpReg, markOopDesc::unlocked_value); // bits = 001 unlocked | |
2054 jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock | |
2055 xend(); // otherwise end... | |
2056 jmp(DONE_LABEL); // ... and we're done | |
2057 bind(L_regular_unlock); | |
2058 } | |
2059 #endif | |
2060 | |
1670 cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header | 2061 cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header |
2062 jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock | |
1671 movptr(tmpReg, Address(objReg, 0)); // Examine the object's markword | 2063 movptr(tmpReg, Address(objReg, 0)); // Examine the object's markword |
1672 jccb (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock | 2064 testptr(tmpReg, markOopDesc::monitor_value); // Inflated? |
1673 | |
1674 testptr(tmpReg, 0x02); // Inflated? | |
1675 jccb (Assembler::zero, Stacked); | 2065 jccb (Assembler::zero, Stacked); |
1676 | 2066 |
1677 // It's inflated. | 2067 // It's inflated. |
2068 #if INCLUDE_RTM_OPT | |
2069 if (use_rtm) { | |
2070 Label L_regular_inflated_unlock; | |
2071 // Clean monitor_value bit to get valid pointer | |
2072 int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value; | |
2073 movptr(boxReg, Address(tmpReg, owner_offset)); | |
2074 testptr(boxReg, boxReg); | |
2075 jccb(Assembler::notZero, L_regular_inflated_unlock); | |
2076 xend(); | |
2077 jmpb(DONE_LABEL); | |
2078 bind(L_regular_inflated_unlock); | |
2079 } | |
2080 #endif | |
2081 | |
1678 // Despite our balanced locking property we still check that m->_owner == Self | 2082 // Despite our balanced locking property we still check that m->_owner == Self |
1679 // as java routines or native JNI code called by this thread might | 2083 // as java routines or native JNI code called by this thread might |
1680 // have released the lock. | 2084 // have released the lock. |
1681 // Refer to the comments in synchronizer.cpp for how we might encode extra | 2085 // Refer to the comments in synchronizer.cpp for how we might encode extra |
1682 // state in _succ so we can avoid fetching EntryList|cxq. | 2086 // state in _succ so we can avoid fetching EntryList|cxq. |
2446 | 2850 |
2447 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { | 2851 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { |
2448 Condition negated_cond = negate_condition(cond); | 2852 Condition negated_cond = negate_condition(cond); |
2449 Label L; | 2853 Label L; |
2450 jcc(negated_cond, L); | 2854 jcc(negated_cond, L); |
2855 pushf(); // Preserve flags | |
2451 atomic_incl(counter_addr); | 2856 atomic_incl(counter_addr); |
2857 popf(); | |
2452 bind(L); | 2858 bind(L); |
2453 } | 2859 } |
2454 | 2860 |
2455 int MacroAssembler::corrected_idivl(Register reg) { | 2861 int MacroAssembler::corrected_idivl(Register reg) { |
2456 // Full implementation of Java idiv and irem; checks for | 2862 // Full implementation of Java idiv and irem; checks for |