comparison src/cpu/x86/vm/macroAssembler_x86.cpp @ 17780:606acabe7b5c

8031320: Use Intel RTM instructions for locks Summary: Use RTM for inflated locks and stack locks. Reviewed-by: iveresov, twisti, roland, dcubed
author kvn
date Thu, 20 Mar 2014 17:49:27 -0700
parents 4d4ea046d32a
children 526acaf3626f
comparison
equal deleted inserted replaced
17778:a48e16541e6b 17780:606acabe7b5c
299 299
300 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { 300 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
301 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 301 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
302 } 302 }
303 303
304 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 304 void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) {
305 // scratch register is not used,
306 // it is defined to match parameters of 64-bit version of this method.
305 if (src.is_lval()) { 307 if (src.is_lval()) {
306 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); 308 mov_literal32(dst, (intptr_t)src.target(), src.rspec());
307 } else { 309 } else {
308 movl(dst, as_Address(src)); 310 movl(dst, as_Address(src));
309 } 311 }
611 if (value == 0) { ; return; } 613 if (value == 0) { ; return; }
612 if (value == 1 && UseIncDec) { decq(dst) ; return; } 614 if (value == 1 && UseIncDec) { decq(dst) ; return; }
613 /* else */ { subq(dst, value) ; return; } 615 /* else */ { subq(dst, value) ; return; }
614 } 616 }
615 617
618 void MacroAssembler::incrementq(AddressLiteral dst) {
619 if (reachable(dst)) {
620 incrementq(as_Address(dst));
621 } else {
622 lea(rscratch1, dst);
623 incrementq(Address(rscratch1, 0));
624 }
625 }
626
616 void MacroAssembler::incrementq(Register reg, int value) { 627 void MacroAssembler::incrementq(Register reg, int value) {
617 if (value == min_jint) { addq(reg, value); return; } 628 if (value == min_jint) { addq(reg, value); return; }
618 if (value < 0) { decrementq(reg, -value); return; } 629 if (value < 0) { decrementq(reg, -value); return; }
619 if (value == 0) { ; return; } 630 if (value == 0) { ; return; }
620 if (value == 1 && UseIncDec) { incq(reg) ; return; } 631 if (value == 1 && UseIncDec) { incq(reg) ; return; }
679 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { 690 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
680 mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); 691 mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate());
681 movq(dst, rscratch1); 692 movq(dst, rscratch1);
682 } 693 }
683 694
684 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 695 void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) {
685 if (src.is_lval()) { 696 if (src.is_lval()) {
686 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 697 mov_literal64(dst, (intptr_t)src.target(), src.rspec());
687 } else { 698 } else {
688 if (reachable(src)) { 699 if (reachable(src)) {
689 movq(dst, as_Address(src)); 700 movq(dst, as_Address(src));
690 } else { 701 } else {
691 lea(rscratch1, src); 702 lea(scratch, src);
692 movq(dst, Address(rscratch1,0)); 703 movq(dst, Address(scratch, 0));
693 } 704 }
694 } 705 }
695 } 706 }
696 707
697 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 708 void MacroAssembler::movptr(ArrayAddress dst, Register src) {
986 997
987 void MacroAssembler::andptr(Register dst, int32_t imm32) { 998 void MacroAssembler::andptr(Register dst, int32_t imm32) {
988 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); 999 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
989 } 1000 }
990 1001
991 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { 1002 void MacroAssembler::atomic_incl(Address counter_addr) {
992 pushf(); 1003 if (os::is_MP())
1004 lock();
1005 incrementl(counter_addr);
1006 }
1007
1008 void MacroAssembler::atomic_incl(AddressLiteral counter_addr, Register scr) {
993 if (reachable(counter_addr)) { 1009 if (reachable(counter_addr)) {
994 if (os::is_MP()) 1010 atomic_incl(as_Address(counter_addr));
995 lock(); 1011 } else {
996 incrementl(as_Address(counter_addr)); 1012 lea(scr, counter_addr);
997 } else { 1013 atomic_incl(Address(scr, 0));
998 lea(rscratch1, counter_addr); 1014 }
999 if (os::is_MP()) 1015 }
1000 lock(); 1016
1001 incrementl(Address(rscratch1, 0)); 1017 #ifdef _LP64
1002 } 1018 void MacroAssembler::atomic_incq(Address counter_addr) {
1003 popf(); 1019 if (os::is_MP())
1004 } 1020 lock();
1021 incrementq(counter_addr);
1022 }
1023
1024 void MacroAssembler::atomic_incq(AddressLiteral counter_addr, Register scr) {
1025 if (reachable(counter_addr)) {
1026 atomic_incq(as_Address(counter_addr));
1027 } else {
1028 lea(scr, counter_addr);
1029 atomic_incq(Address(scr, 0));
1030 }
1031 }
1032 #endif
1005 1033
1006 // Writes to stack successive pages until offset reached to check for 1034 // Writes to stack successive pages until offset reached to check for
1007 // stack overflow + shadow pages. This clobbers tmp. 1035 // stack overflow + shadow pages. This clobbers tmp.
1008 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 1036 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
1009 movptr(tmp, rsp); 1037 movptr(tmp, rsp);
1272 cmpptr(temp_reg, markOopDesc::biased_lock_pattern); 1300 cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
1273 jcc(Assembler::equal, done); 1301 jcc(Assembler::equal, done);
1274 } 1302 }
1275 1303
1276 #ifdef COMPILER2 1304 #ifdef COMPILER2
1305
1306 #if INCLUDE_RTM_OPT
1307
1308 // Update rtm_counters based on abort status
1309 // input: abort_status
1310 // rtm_counters (RTMLockingCounters*)
1311 // flags are killed
1312 void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) {
1313
1314 atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset()));
1315 if (PrintPreciseRTMLockingStatistics) {
1316 for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
1317 Label check_abort;
1318 testl(abort_status, (1<<i));
1319 jccb(Assembler::equal, check_abort);
1320 atomic_incptr(Address(rtm_counters, RTMLockingCounters::abortX_count_offset() + (i * sizeof(uintx))));
1321 bind(check_abort);
1322 }
1323 }
1324 }
1325
1326 // Branch if (random & (count-1) != 0), count is 2^n
1327 // tmp, scr and flags are killed
1328 void MacroAssembler::branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel) {
1329 assert(tmp == rax, "");
1330 assert(scr == rdx, "");
1331 rdtsc(); // modifies EDX:EAX
1332 andptr(tmp, count-1);
1333 jccb(Assembler::notZero, brLabel);
1334 }
1335
1336 // Perform abort ratio calculation, set no_rtm bit if high ratio
1337 // input: rtm_counters_Reg (RTMLockingCounters* address)
1338 // tmpReg, rtm_counters_Reg and flags are killed
1339 void MacroAssembler::rtm_abort_ratio_calculation(Register tmpReg,
1340 Register rtm_counters_Reg,
1341 RTMLockingCounters* rtm_counters,
1342 Metadata* method_data) {
1343 Label L_done, L_check_always_rtm1, L_check_always_rtm2;
1344
1345 if (RTMLockingCalculationDelay > 0) {
1346 // Delay calculation
1347 movptr(tmpReg, ExternalAddress((address) RTMLockingCounters::rtm_calculation_flag_addr()), tmpReg);
1348 testptr(tmpReg, tmpReg);
1349 jccb(Assembler::equal, L_done);
1350 }
1351 // Abort ratio calculation only if abort_count > RTMAbortThreshold
1352 // Aborted transactions = abort_count * 100
1353 // All transactions = total_count * RTMTotalCountIncrRate
1354 // Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio)
1355
1356 movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::abort_count_offset()));
1357 cmpptr(tmpReg, RTMAbortThreshold);
1358 jccb(Assembler::below, L_check_always_rtm2);
1359 imulptr(tmpReg, tmpReg, 100);
1360
1361 Register scrReg = rtm_counters_Reg;
1362 movptr(scrReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset()));
1363 imulptr(scrReg, scrReg, RTMTotalCountIncrRate);
1364 imulptr(scrReg, scrReg, RTMAbortRatio);
1365 cmpptr(tmpReg, scrReg);
1366 jccb(Assembler::below, L_check_always_rtm1);
1367 if (method_data != NULL) {
1368 // set rtm_state to "no rtm" in MDO
1369 mov_metadata(tmpReg, method_data);
1370 if (os::is_MP()) {
1371 lock();
1372 }
1373 orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), NoRTM);
1374 }
1375 jmpb(L_done);
1376 bind(L_check_always_rtm1);
1377 // Reload RTMLockingCounters* address
1378 lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters));
1379 bind(L_check_always_rtm2);
1380 movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset()));
1381 cmpptr(tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate);
1382 jccb(Assembler::below, L_done);
1383 if (method_data != NULL) {
1384 // set rtm_state to "always rtm" in MDO
1385 mov_metadata(tmpReg, method_data);
1386 if (os::is_MP()) {
1387 lock();
1388 }
1389 orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), UseRTM);
1390 }
1391 bind(L_done);
1392 }
1393
1394 // Update counters and perform abort ratio calculation
1395 // input: abort_status_Reg
1396 // rtm_counters_Reg, flags are killed
1397 void MacroAssembler::rtm_profiling(Register abort_status_Reg,
1398 Register rtm_counters_Reg,
1399 RTMLockingCounters* rtm_counters,
1400 Metadata* method_data,
1401 bool profile_rtm) {
1402
1403 assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
1404 // update rtm counters based on rax value at abort
1405 // reads abort_status_Reg, updates flags
1406 lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters));
1407 rtm_counters_update(abort_status_Reg, rtm_counters_Reg);
1408 if (profile_rtm) {
1409 // Save abort status because abort_status_Reg is used by following code.
1410 if (RTMRetryCount > 0) {
1411 push(abort_status_Reg);
1412 }
1413 assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
1414 rtm_abort_ratio_calculation(abort_status_Reg, rtm_counters_Reg, rtm_counters, method_data);
1415 // restore abort status
1416 if (RTMRetryCount > 0) {
1417 pop(abort_status_Reg);
1418 }
1419 }
1420 }
1421
1422 // Retry on abort if abort's status is 0x6: can retry (0x2) | memory conflict (0x4)
1423 // inputs: retry_count_Reg
1424 // : abort_status_Reg
1425 // output: retry_count_Reg decremented by 1
1426 // flags are killed
1427 void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg, Label& retryLabel) {
1428 Label doneRetry;
1429 assert(abort_status_Reg == rax, "");
1430 // The abort reason bits are in eax (see all states in rtmLocking.hpp)
1431 // 0x6 = conflict on which we can retry (0x2) | memory conflict (0x4)
1432 // if reason is in 0x6 and retry count != 0 then retry
1433 andptr(abort_status_Reg, 0x6);
1434 jccb(Assembler::zero, doneRetry);
1435 testl(retry_count_Reg, retry_count_Reg);
1436 jccb(Assembler::zero, doneRetry);
1437 pause();
1438 decrementl(retry_count_Reg);
1439 jmp(retryLabel);
1440 bind(doneRetry);
1441 }
1442
1443 // Spin and retry if lock is busy,
1444 // inputs: box_Reg (monitor address)
1445 // : retry_count_Reg
1446 // output: retry_count_Reg decremented by 1
1447 // : clear z flag if retry count exceeded
1448 // tmp_Reg, scr_Reg, flags are killed
1449 void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register box_Reg,
1450 Register tmp_Reg, Register scr_Reg, Label& retryLabel) {
1451 Label SpinLoop, SpinExit, doneRetry;
1452 // Clean monitor_value bit to get valid pointer
1453 int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
1454
1455 testl(retry_count_Reg, retry_count_Reg);
1456 jccb(Assembler::zero, doneRetry);
1457 decrementl(retry_count_Reg);
1458 movptr(scr_Reg, RTMSpinLoopCount);
1459
1460 bind(SpinLoop);
1461 pause();
1462 decrementl(scr_Reg);
1463 jccb(Assembler::lessEqual, SpinExit);
1464 movptr(tmp_Reg, Address(box_Reg, owner_offset));
1465 testptr(tmp_Reg, tmp_Reg);
1466 jccb(Assembler::notZero, SpinLoop);
1467
1468 bind(SpinExit);
1469 jmp(retryLabel);
1470 bind(doneRetry);
1471 incrementl(retry_count_Reg); // clear z flag
1472 }
1473
1474 // Use RTM for normal stack locks
1475 // Input: objReg (object to lock)
1476 void MacroAssembler::rtm_stack_locking(Register objReg, Register tmpReg, Register scrReg,
1477 Register retry_on_abort_count_Reg,
1478 RTMLockingCounters* stack_rtm_counters,
1479 Metadata* method_data, bool profile_rtm,
1480 Label& DONE_LABEL, Label& IsInflated) {
1481 assert(UseRTMForStackLocks, "why call this otherwise?");
1482 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
1483 assert(tmpReg == rax, "");
1484 assert(scrReg == rdx, "");
1485 Label L_rtm_retry, L_decrement_retry, L_on_abort;
1486
1487 if (RTMRetryCount > 0) {
1488 movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
1489 bind(L_rtm_retry);
1490 }
1491 if (!UseRTMXendForLockBusy) {
1492 movptr(tmpReg, Address(objReg, 0));
1493 testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
1494 jcc(Assembler::notZero, IsInflated);
1495 }
1496 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1497 Label L_noincrement;
1498 if (RTMTotalCountIncrRate > 1) {
1499 // tmpReg, scrReg and flags are killed
1500 branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement);
1501 }
1502 assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM");
1503 atomic_incptr(ExternalAddress((address)stack_rtm_counters->total_count_addr()), scrReg);
1504 bind(L_noincrement);
1505 }
1506 xbegin(L_on_abort);
1507 movptr(tmpReg, Address(objReg, 0)); // fetch markword
1508 andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
1509 cmpptr(tmpReg, markOopDesc::unlocked_value); // bits = 001 unlocked
1510 jcc(Assembler::equal, DONE_LABEL); // all done if unlocked
1511
1512 Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
1513 if (UseRTMXendForLockBusy) {
1514 xend();
1515 movptr(tmpReg, Address(objReg, 0));
1516 testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
1517 jcc(Assembler::notZero, IsInflated);
1518 movptr(abort_status_Reg, 0x1); // Set the abort status to 1 (as xabort does)
1519 jmp(L_decrement_retry);
1520 }
1521 else {
1522 xabort(0);
1523 }
1524 bind(L_on_abort);
1525 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1526 rtm_profiling(abort_status_Reg, scrReg, stack_rtm_counters, method_data, profile_rtm);
1527 }
1528 bind(L_decrement_retry);
1529 if (RTMRetryCount > 0) {
1530 // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1531 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1532 }
1533 }
1534
1535 // Use RTM for inflating locks
1536 // inputs: objReg (object to lock)
1537 // boxReg (on-stack box address (displaced header location) - KILLED)
1538 // tmpReg (ObjectMonitor address + 2(monitor_value))
1539 void MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, Register tmpReg,
1540 Register scrReg, Register retry_on_busy_count_Reg,
1541 Register retry_on_abort_count_Reg,
1542 RTMLockingCounters* rtm_counters,
1543 Metadata* method_data, bool profile_rtm,
1544 Label& DONE_LABEL) {
1545 assert(UseRTMLocking, "why call this otherwise?");
1546 assert(tmpReg == rax, "");
1547 assert(scrReg == rdx, "");
1548 Label L_rtm_retry, L_decrement_retry, L_on_abort;
1549 // Clean monitor_value bit to get valid pointer
1550 int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
1551
1552 // Without cast to int32_t a movptr will destroy r10 which is typically obj
1553 movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
1554 movptr(boxReg, tmpReg); // Save ObjectMonitor address
1555
1556 if (RTMRetryCount > 0) {
1557 movl(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy
1558 movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
1559 bind(L_rtm_retry);
1560 }
1561 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1562 Label L_noincrement;
1563 if (RTMTotalCountIncrRate > 1) {
1564 // tmpReg, scrReg and flags are killed
1565 branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement);
1566 }
1567 assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
1568 atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg);
1569 bind(L_noincrement);
1570 }
1571 xbegin(L_on_abort);
1572 movptr(tmpReg, Address(objReg, 0));
1573 movptr(tmpReg, Address(tmpReg, owner_offset));
1574 testptr(tmpReg, tmpReg);
1575 jcc(Assembler::zero, DONE_LABEL);
1576 if (UseRTMXendForLockBusy) {
1577 xend();
1578 jmp(L_decrement_retry);
1579 }
1580 else {
1581 xabort(0);
1582 }
1583 bind(L_on_abort);
1584 Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
1585 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1586 rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm);
1587 }
1588 if (RTMRetryCount > 0) {
1589 // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1590 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1591 }
1592
1593 movptr(tmpReg, Address(boxReg, owner_offset)) ;
1594 testptr(tmpReg, tmpReg) ;
1595 jccb(Assembler::notZero, L_decrement_retry) ;
1596
1597 // Appears unlocked - try to swing _owner from null to non-null.
1598 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
1599 #ifdef _LP64
1600 Register threadReg = r15_thread;
1601 #else
1602 get_thread(scrReg);
1603 Register threadReg = scrReg;
1604 #endif
1605 if (os::is_MP()) {
1606 lock();
1607 }
1608 cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg
1609
1610 if (RTMRetryCount > 0) {
1611 // success done else retry
1612 jccb(Assembler::equal, DONE_LABEL) ;
1613 bind(L_decrement_retry);
1614 // Spin and retry if lock is busy.
1615 rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry);
1616 }
1617 else {
1618 bind(L_decrement_retry);
1619 }
1620 }
1621
1622 #endif // INCLUDE_RTM_OPT
1623
1277 // Fast_Lock and Fast_Unlock used by C2 1624 // Fast_Lock and Fast_Unlock used by C2
1278 1625
1279 // Because the transitions from emitted code to the runtime 1626 // Because the transitions from emitted code to the runtime
1280 // monitorenter/exit helper stubs are so slow it's critical that 1627 // monitorenter/exit helper stubs are so slow it's critical that
1281 // we inline both the stack-locking fast-path and the inflated fast path. 1628 // we inline both the stack-locking fast-path and the inflated fast path.
1348 1695
1349 // obj: object to lock 1696 // obj: object to lock
1350 // box: on-stack box address (displaced header location) - KILLED 1697 // box: on-stack box address (displaced header location) - KILLED
1351 // rax,: tmp -- KILLED 1698 // rax,: tmp -- KILLED
1352 // scr: tmp -- KILLED 1699 // scr: tmp -- KILLED
1353 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg, BiasedLockingCounters* counters) { 1700 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
1701 Register scrReg, Register cx1Reg, Register cx2Reg,
1702 BiasedLockingCounters* counters,
1703 RTMLockingCounters* rtm_counters,
1704 RTMLockingCounters* stack_rtm_counters,
1705 Metadata* method_data,
1706 bool use_rtm, bool profile_rtm) {
1354 // Ensure the register assignents are disjoint 1707 // Ensure the register assignents are disjoint
1355 guarantee (objReg != boxReg, ""); 1708 assert(tmpReg == rax, "");
1356 guarantee (objReg != tmpReg, ""); 1709
1357 guarantee (objReg != scrReg, ""); 1710 if (use_rtm) {
1358 guarantee (boxReg != tmpReg, ""); 1711 assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
1359 guarantee (boxReg != scrReg, ""); 1712 } else {
1360 guarantee (tmpReg == rax, ""); 1713 assert(cx1Reg == noreg, "");
1714 assert(cx2Reg == noreg, "");
1715 assert_different_registers(objReg, boxReg, tmpReg, scrReg);
1716 }
1361 1717
1362 if (counters != NULL) { 1718 if (counters != NULL) {
1363 atomic_incl(ExternalAddress((address)counters->total_entry_count_addr())); 1719 atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
1364 } 1720 }
1365 if (EmitSync & 1) { 1721 if (EmitSync & 1) {
1366 // set box->dhw = unused_mark (3) 1722 // set box->dhw = unused_mark (3)
1367 // Force all sync thru slow-path: slow_enter() and slow_exit() 1723 // Force all sync thru slow-path: slow_enter() and slow_exit()
1368 movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())); 1724 movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
1417 // If this invariant is not held we risk exclusion (safety) failure. 1773 // If this invariant is not held we risk exclusion (safety) failure.
1418 if (UseBiasedLocking && !UseOptoBiasInlining) { 1774 if (UseBiasedLocking && !UseOptoBiasInlining) {
1419 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, counters); 1775 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, counters);
1420 } 1776 }
1421 1777
1778 #if INCLUDE_RTM_OPT
1779 if (UseRTMForStackLocks && use_rtm) {
1780 rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
1781 stack_rtm_counters, method_data, profile_rtm,
1782 DONE_LABEL, IsInflated);
1783 }
1784 #endif // INCLUDE_RTM_OPT
1785
1422 movptr(tmpReg, Address(objReg, 0)); // [FETCH] 1786 movptr(tmpReg, Address(objReg, 0)); // [FETCH]
1423 testl (tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased 1787 testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
1424 jccb (Assembler::notZero, IsInflated); 1788 jccb(Assembler::notZero, IsInflated);
1425 1789
1426 // Attempt stack-locking ... 1790 // Attempt stack-locking ...
1427 orptr (tmpReg, 0x1); 1791 orptr (tmpReg, markOopDesc::unlocked_value);
1428 movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS 1792 movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
1429 if (os::is_MP()) { 1793 if (os::is_MP()) {
1430 lock(); 1794 lock();
1431 } 1795 }
1432 cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg 1796 cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
1433 if (counters != NULL) { 1797 if (counters != NULL) {
1434 cond_inc32(Assembler::equal, 1798 cond_inc32(Assembler::equal,
1435 ExternalAddress((address)counters->fast_path_entry_count_addr())); 1799 ExternalAddress((address)counters->fast_path_entry_count_addr()));
1436 } 1800 }
1437 jccb(Assembler::equal, DONE_LABEL); 1801 jcc(Assembler::equal, DONE_LABEL); // Success
1438 1802
1439 // Recursive locking 1803 // Recursive locking.
1804 // The object is stack-locked: markword contains stack pointer to BasicLock.
1805 // Locked by current thread if difference with current SP is less than one page.
1440 subptr(tmpReg, rsp); 1806 subptr(tmpReg, rsp);
1807 // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
1441 andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) ); 1808 andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
1442 movptr(Address(boxReg, 0), tmpReg); 1809 movptr(Address(boxReg, 0), tmpReg);
1443 if (counters != NULL) { 1810 if (counters != NULL) {
1444 cond_inc32(Assembler::equal, 1811 cond_inc32(Assembler::equal,
1445 ExternalAddress((address)counters->fast_path_entry_count_addr())); 1812 ExternalAddress((address)counters->fast_path_entry_count_addr()));
1446 } 1813 }
1447 jmpb(DONE_LABEL); 1814 jmp(DONE_LABEL);
1448 1815
1449 bind(IsInflated); 1816 bind(IsInflated);
1817 // The object is inflated. tmpReg contains pointer to ObjectMonitor* + 2(monitor_value)
1818
1819 #if INCLUDE_RTM_OPT
1820 // Use the same RTM locking code in 32- and 64-bit VM.
1821 if (use_rtm) {
1822 rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
1823 rtm_counters, method_data, profile_rtm, DONE_LABEL);
1824 } else {
1825 #endif // INCLUDE_RTM_OPT
1826
1450 #ifndef _LP64 1827 #ifndef _LP64
1451 // The object is inflated. 1828 // The object is inflated.
1452 // 1829 //
1453 // TODO-FIXME: eliminate the ugly use of manifest constants: 1830 // TODO-FIXME: eliminate the ugly use of manifest constants:
1454 // Use markOopDesc::monitor_value instead of "2". 1831 // Use markOopDesc::monitor_value instead of "2".
1574 // avoid an RTO->RTS upgrade on the $line. 1951 // avoid an RTO->RTS upgrade on the $line.
1575 1952
1576 // Without cast to int32_t a movptr will destroy r10 which is typically obj 1953 // Without cast to int32_t a movptr will destroy r10 which is typically obj
1577 movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())); 1954 movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
1578 1955
1579 mov (boxReg, tmpReg); 1956 movptr (boxReg, tmpReg);
1580 movptr (tmpReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); 1957 movptr (tmpReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
1581 testptr(tmpReg, tmpReg); 1958 testptr(tmpReg, tmpReg);
1582 jccb (Assembler::notZero, DONE_LABEL); 1959 jccb (Assembler::notZero, DONE_LABEL);
1583 1960
1584 // It's inflated and appears unlocked 1961 // It's inflated and appears unlocked
1585 if (os::is_MP()) { 1962 if (os::is_MP()) {
1586 lock(); 1963 lock();
1587 } 1964 }
1588 cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); 1965 cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
1589 // Intentional fall-through into DONE_LABEL ... 1966 // Intentional fall-through into DONE_LABEL ...
1590 1967 #endif // _LP64
1968
1969 #if INCLUDE_RTM_OPT
1970 } // use_rtm()
1591 #endif 1971 #endif
1592
1593 // DONE_LABEL is a hot target - we'd really like to place it at the 1972 // DONE_LABEL is a hot target - we'd really like to place it at the
1594 // start of cache line by padding with NOPs. 1973 // start of cache line by padding with NOPs.
1595 // See the AMD and Intel software optimization manuals for the 1974 // See the AMD and Intel software optimization manuals for the
1596 // most efficient "long" NOP encodings. 1975 // most efficient "long" NOP encodings.
1597 // Unfortunately none of our alignment mechanisms suffice. 1976 // Unfortunately none of our alignment mechanisms suffice.
1629 // The only other source of unbalanced locking would be JNI. The "Java Native Interface: 2008 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
1630 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter 2009 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
1631 // should not be unlocked by "normal" java-level locking and vice-versa. The specification 2010 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
1632 // doesn't specify what will occur if a program engages in such mixed-mode locking, however. 2011 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
1633 2012
1634 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) { 2013 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
1635 guarantee (objReg != boxReg, ""); 2014 assert(boxReg == rax, "");
1636 guarantee (objReg != tmpReg, ""); 2015 assert_different_registers(objReg, boxReg, tmpReg);
1637 guarantee (boxReg != tmpReg, "");
1638 guarantee (boxReg == rax, "");
1639 2016
1640 if (EmitSync & 4) { 2017 if (EmitSync & 4) {
1641 // Disable - inhibit all inlining. Force control through the slow-path 2018 // Disable - inhibit all inlining. Force control through the slow-path
1642 cmpptr (rsp, 0); 2019 cmpptr (rsp, 0);
1643 } else 2020 } else
1665 // and appear before the (box->dhw == 0) recursive stack-lock test. 2042 // and appear before the (box->dhw == 0) recursive stack-lock test.
1666 if (UseBiasedLocking && !UseOptoBiasInlining) { 2043 if (UseBiasedLocking && !UseOptoBiasInlining) {
1667 biased_locking_exit(objReg, tmpReg, DONE_LABEL); 2044 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
1668 } 2045 }
1669 2046
2047 #if INCLUDE_RTM_OPT
2048 if (UseRTMForStackLocks && use_rtm) {
2049 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
2050 Label L_regular_unlock;
2051 movptr(tmpReg, Address(objReg, 0)); // fetch markword
2052 andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
2053 cmpptr(tmpReg, markOopDesc::unlocked_value); // bits = 001 unlocked
2054 jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
2055 xend(); // otherwise end...
2056 jmp(DONE_LABEL); // ... and we're done
2057 bind(L_regular_unlock);
2058 }
2059 #endif
2060
1670 cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header 2061 cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
2062 jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
1671 movptr(tmpReg, Address(objReg, 0)); // Examine the object's markword 2063 movptr(tmpReg, Address(objReg, 0)); // Examine the object's markword
1672 jccb (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock 2064 testptr(tmpReg, markOopDesc::monitor_value); // Inflated?
1673
1674 testptr(tmpReg, 0x02); // Inflated?
1675 jccb (Assembler::zero, Stacked); 2065 jccb (Assembler::zero, Stacked);
1676 2066
1677 // It's inflated. 2067 // It's inflated.
2068 #if INCLUDE_RTM_OPT
2069 if (use_rtm) {
2070 Label L_regular_inflated_unlock;
2071 // Clean monitor_value bit to get valid pointer
2072 int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
2073 movptr(boxReg, Address(tmpReg, owner_offset));
2074 testptr(boxReg, boxReg);
2075 jccb(Assembler::notZero, L_regular_inflated_unlock);
2076 xend();
2077 jmpb(DONE_LABEL);
2078 bind(L_regular_inflated_unlock);
2079 }
2080 #endif
2081
1678 // Despite our balanced locking property we still check that m->_owner == Self 2082 // Despite our balanced locking property we still check that m->_owner == Self
1679 // as java routines or native JNI code called by this thread might 2083 // as java routines or native JNI code called by this thread might
1680 // have released the lock. 2084 // have released the lock.
1681 // Refer to the comments in synchronizer.cpp for how we might encode extra 2085 // Refer to the comments in synchronizer.cpp for how we might encode extra
1682 // state in _succ so we can avoid fetching EntryList|cxq. 2086 // state in _succ so we can avoid fetching EntryList|cxq.
2446 2850
2447 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { 2851 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
2448 Condition negated_cond = negate_condition(cond); 2852 Condition negated_cond = negate_condition(cond);
2449 Label L; 2853 Label L;
2450 jcc(negated_cond, L); 2854 jcc(negated_cond, L);
2855 pushf(); // Preserve flags
2451 atomic_incl(counter_addr); 2856 atomic_incl(counter_addr);
2857 popf();
2452 bind(L); 2858 bind(L);
2453 } 2859 }
2454 2860
2455 int MacroAssembler::corrected_idivl(Register reg) { 2861 int MacroAssembler::corrected_idivl(Register reg) {
2456 // Full implementation of Java idiv and irem; checks for 2862 // Full implementation of Java idiv and irem; checks for