comparison src/cpu/x86/vm/x86_32.ad @ 18041:52b4284cb496

Merge with jdk8u20-b26
author Gilles Duboscq <duboscq@ssw.jku.at>
date Wed, 15 Oct 2014 16:02:50 +0200
parents 89152779163c 0bf37f737702
children
comparison
equal deleted inserted replaced
17606:45d7b2c7029d 18041:52b4284cb496
485 485
486 int Compile::ConstantTable::calculate_table_base_offset() const { 486 int Compile::ConstantTable::calculate_table_base_offset() const {
487 return 0; // absolute addressing, no offset 487 return 0; // absolute addressing, no offset
488 } 488 }
489 489
490 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
491 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
492 ShouldNotReachHere();
493 }
494
490 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 495 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
491 // Empty encoding 496 // Empty encoding
492 } 497 }
493 498
494 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 499 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
505 //============================================================================= 510 //=============================================================================
506 #ifndef PRODUCT 511 #ifndef PRODUCT
507 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 512 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
508 Compile* C = ra_->C; 513 Compile* C = ra_->C;
509 514
510 int framesize = C->frame_slots() << LogBytesPerInt; 515 int framesize = C->frame_size_in_bytes();
516 int bangsize = C->bang_size_in_bytes();
511 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 517 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
512 // Remove wordSize for return addr which is already pushed. 518 // Remove wordSize for return addr which is already pushed.
513 framesize -= wordSize; 519 framesize -= wordSize;
514 520
515 if (C->need_stack_bang(framesize)) { 521 if (C->need_stack_bang(bangsize)) {
516 framesize -= wordSize; 522 framesize -= wordSize;
517 st->print("# stack bang"); 523 st->print("# stack bang (%d bytes)", bangsize);
518 st->print("\n\t"); 524 st->print("\n\t");
519 st->print("PUSH EBP\t# Save EBP"); 525 st->print("PUSH EBP\t# Save EBP");
520 if (framesize) { 526 if (framesize) {
521 st->print("\n\t"); 527 st->print("\n\t");
522 st->print("SUB ESP, #%d\t# Create frame",framesize); 528 st->print("SUB ESP, #%d\t# Create frame",framesize);
556 562
557 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 563 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
558 Compile* C = ra_->C; 564 Compile* C = ra_->C;
559 MacroAssembler _masm(&cbuf); 565 MacroAssembler _masm(&cbuf);
560 566
561 int framesize = C->frame_slots() << LogBytesPerInt; 567 int framesize = C->frame_size_in_bytes();
562 568 int bangsize = C->bang_size_in_bytes();
563 __ verified_entry(framesize, C->need_stack_bang(framesize), C->in_24_bit_fp_mode()); 569
570 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
564 571
565 C->set_frame_complete(cbuf.insts_size()); 572 C->set_frame_complete(cbuf.insts_size());
566 573
567 if (C->has_mach_constant_base_node()) { 574 if (C->has_mach_constant_base_node()) {
568 // NOTE: We set the table base offset here because users might be 575 // NOTE: We set the table base offset here because users might be
582 589
583 //============================================================================= 590 //=============================================================================
584 #ifndef PRODUCT 591 #ifndef PRODUCT
585 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 592 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
586 Compile *C = ra_->C; 593 Compile *C = ra_->C;
587 int framesize = C->frame_slots() << LogBytesPerInt; 594 int framesize = C->frame_size_in_bytes();
588 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 595 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
589 // Remove two words for return addr and rbp, 596 // Remove two words for return addr and rbp,
590 framesize -= 2*wordSize; 597 framesize -= 2*wordSize;
591 598
592 if (C->max_vector_size() > 16) { 599 if (C->max_vector_size() > 16) {
622 if (C->in_24_bit_fp_mode()) { 629 if (C->in_24_bit_fp_mode()) {
623 MacroAssembler masm(&cbuf); 630 MacroAssembler masm(&cbuf);
624 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 631 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
625 } 632 }
626 633
627 int framesize = C->frame_slots() << LogBytesPerInt; 634 int framesize = C->frame_size_in_bytes();
628 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 635 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
629 // Remove two words for return addr and rbp, 636 // Remove two words for return addr and rbp,
630 framesize -= 2*wordSize; 637 framesize -= 2*wordSize;
631 638
632 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 639 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
656 // If method set FPU control word, restore to standard control word 663 // If method set FPU control word, restore to standard control word
657 int size = C->in_24_bit_fp_mode() ? 6 : 0; 664 int size = C->in_24_bit_fp_mode() ? 6 : 0;
658 if (C->max_vector_size() > 16) size += 3; // vzeroupper 665 if (C->max_vector_size() > 16) size += 3; // vzeroupper
659 if (do_polling() && C->is_method_compilation()) size += 6; 666 if (do_polling() && C->is_method_compilation()) size += 6;
660 667
661 int framesize = C->frame_slots() << LogBytesPerInt; 668 int framesize = C->frame_size_in_bytes();
662 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 669 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
663 // Remove two words for return addr and rbp, 670 // Remove two words for return addr and rbp,
664 framesize -= 2*wordSize; 671 framesize -= 2*wordSize;
665 672
666 size++; // popl rbp, 673 size++; // popl rbp,
1290 return OptoBreakpoint ? 11 : 12; 1297 return OptoBreakpoint ? 11 : 12;
1291 } 1298 }
1292 1299
1293 1300
1294 //============================================================================= 1301 //=============================================================================
1295 uint size_exception_handler() {
1296 // NativeCall instruction size is the same as NativeJump.
1297 // exception handler starts out as jump and can be patched to
1298 // a call be deoptimization. (4932387)
1299 // Note that this value is also credited (in output.cpp) to
1300 // the size of the code section.
1301 return NativeJump::instruction_size;
1302 }
1303
1304 // Emit exception handler code. Stuff framesize into a register
1305 // and call a VM stub routine.
1306 int emit_exception_handler(CodeBuffer& cbuf) {
1307
1308 // Note that the code buffer's insts_mark is always relative to insts.
1309 // That's why we must use the macroassembler to generate a handler.
1310 MacroAssembler _masm(&cbuf);
1311 address base =
1312 __ start_a_stub(size_exception_handler());
1313 if (base == NULL) return 0; // CodeBuffer::expand failed
1314 int offset = __ offset();
1315 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1316 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1317 __ end_a_stub();
1318 return offset;
1319 }
1320
1321 uint size_deopt_handler() {
1322 // NativeCall instruction size is the same as NativeJump.
1323 // exception handler starts out as jump and can be patched to
1324 // a call be deoptimization. (4932387)
1325 // Note that this value is also credited (in output.cpp) to
1326 // the size of the code section.
1327 return 5 + NativeJump::instruction_size; // pushl(); jmp;
1328 }
1329
1330 // Emit deopt handler code.
1331 int emit_deopt_handler(CodeBuffer& cbuf) {
1332
1333 // Note that the code buffer's insts_mark is always relative to insts.
1334 // That's why we must use the macroassembler to generate a handler.
1335 MacroAssembler _masm(&cbuf);
1336 address base =
1337 __ start_a_stub(size_exception_handler());
1338 if (base == NULL) return 0; // CodeBuffer::expand failed
1339 int offset = __ offset();
1340 InternalAddress here(__ pc());
1341 __ pushptr(here.addr());
1342
1343 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1344 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1345 __ end_a_stub();
1346 return offset;
1347 }
1348 1302
1349 int Matcher::regnum_to_fpu_offset(int regnum) { 1303 int Matcher::regnum_to_fpu_offset(int regnum) {
1350 return regnum - 32; // The FP registers are in the second chunk 1304 return regnum - 32; // The FP registers are in the second chunk
1351 } 1305 }
1352 1306
1386 // Needs 2 CMOV's for longs. 1340 // Needs 2 CMOV's for longs.
1387 const int Matcher::long_cmove_cost() { return 1; } 1341 const int Matcher::long_cmove_cost() { return 1; }
1388 1342
1389 // No CMOVF/CMOVD with SSE/SSE2 1343 // No CMOVF/CMOVD with SSE/SSE2
1390 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1344 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1345
1346 // Does the CPU require late expand (see block.cpp for description of late expand)?
1347 const bool Matcher::require_postalloc_expand = false;
1391 1348
1392 // Should the Matcher clone shifts on addressing modes, expecting them to 1349 // Should the Matcher clone shifts on addressing modes, expecting them to
1393 // be subsumed into complex addressing expressions or compute them into 1350 // be subsumed into complex addressing expressions or compute them into
1394 // registers? True for Intel but false for most RISCs 1351 // registers? True for Intel but false for most RISCs
1395 const bool Matcher::clone_shift_expressions = true; 1352 const bool Matcher::clone_shift_expressions = true;
1530 return RegMask(); 1487 return RegMask();
1531 } 1488 }
1532 1489
1533 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1490 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1534 return EBP_REG_mask(); 1491 return EBP_REG_mask();
1535 }
1536
1537 const RegMask Matcher::mathExactI_result_proj_mask() {
1538 return EAX_REG_mask();
1539 }
1540
1541 const RegMask Matcher::mathExactL_result_proj_mask() {
1542 ShouldNotReachHere();
1543 return RegMask();
1544 }
1545
1546 const RegMask Matcher::mathExactI_flags_proj_mask() {
1547 return INT_FLAGS_mask();
1548 } 1492 }
1549 1493
1550 // Returns true if the high 32 bits of the value is known to be zero. 1494 // Returns true if the high 32 bits of the value is known to be zero.
1551 bool is_operand_hi32_zero(Node* n) { 1495 bool is_operand_hi32_zero(Node* n) {
1552 int opc = n->Opcode(); 1496 int opc = n->Opcode();
2907 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2851 emit_rm (cbuf,0x3, 0x3, $dst$$reg );
2908 emit_opcode(cbuf,0x83); // SBB hi,0 2852 emit_opcode(cbuf,0x83); // SBB hi,0
2909 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2853 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2910 emit_d8 (cbuf,0 ); 2854 emit_d8 (cbuf,0 );
2911 %} 2855 %}
2912
2913
2914 // Because the transitions from emitted code to the runtime
2915 // monitorenter/exit helper stubs are so slow it's critical that
2916 // we inline both the stack-locking fast-path and the inflated fast path.
2917 //
2918 // See also: cmpFastLock and cmpFastUnlock.
2919 //
2920 // What follows is a specialized inline transliteration of the code
2921 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat
2922 // another option would be to emit TrySlowEnter and TrySlowExit methods
2923 // at startup-time. These methods would accept arguments as
2924 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
2925 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
2926 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
2927 // In practice, however, the # of lock sites is bounded and is usually small.
2928 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
2929 // if the processor uses simple bimodal branch predictors keyed by EIP
2930 // Since the helper routines would be called from multiple synchronization
2931 // sites.
2932 //
2933 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
2934 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
2935 // to those specialized methods. That'd give us a mostly platform-independent
2936 // implementation that the JITs could optimize and inline at their pleasure.
2937 // Done correctly, the only time we'd need to cross to native could would be
2938 // to park() or unpark() threads. We'd also need a few more unsafe operators
2939 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
2940 // (b) explicit barriers or fence operations.
2941 //
2942 // TODO:
2943 //
2944 // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
2945 // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
2946 // Given TLAB allocation, Self is usually manifested in a register, so passing it into
2947 // the lock operators would typically be faster than reifying Self.
2948 //
2949 // * Ideally I'd define the primitives as:
2950 // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
2951 // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
2952 // Unfortunately ADLC bugs prevent us from expressing the ideal form.
2953 // Instead, we're stuck with a rather awkward and brittle register assignments below.
2954 // Furthermore the register assignments are overconstrained, possibly resulting in
2955 // sub-optimal code near the synchronization site.
2956 //
2957 // * Eliminate the sp-proximity tests and just use "== Self" tests instead.
2958 // Alternately, use a better sp-proximity test.
2959 //
2960 // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
2961 // Either one is sufficient to uniquely identify a thread.
2962 // TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
2963 //
2964 // * Intrinsify notify() and notifyAll() for the common cases where the
2965 // object is locked by the calling thread but the waitlist is empty.
2966 // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
2967 //
2968 // * use jccb and jmpb instead of jcc and jmp to improve code density.
2969 // But beware of excessive branch density on AMD Opterons.
2970 //
2971 // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
2972 // or failure of the fast-path. If the fast-path fails then we pass
2973 // control to the slow-path, typically in C. In Fast_Lock and
2974 // Fast_Unlock we often branch to DONE_LABEL, just to find that C2
2975 // will emit a conditional branch immediately after the node.
2976 // So we have branches to branches and lots of ICC.ZF games.
2977 // Instead, it might be better to have C2 pass a "FailureLabel"
2978 // into Fast_Lock and Fast_Unlock. In the case of success, control
2979 // will drop through the node. ICC.ZF is undefined at exit.
2980 // In the case of failure, the node will branch directly to the
2981 // FailureLabel
2982
2983
2984 // obj: object to lock
2985 // box: on-stack box address (displaced header location) - KILLED
2986 // rax,: tmp -- KILLED
2987 // scr: tmp -- KILLED
2988 enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{
2989
2990 Register objReg = as_Register($obj$$reg);
2991 Register boxReg = as_Register($box$$reg);
2992 Register tmpReg = as_Register($tmp$$reg);
2993 Register scrReg = as_Register($scr$$reg);
2994
2995 // Ensure the register assignents are disjoint
2996 guarantee (objReg != boxReg, "") ;
2997 guarantee (objReg != tmpReg, "") ;
2998 guarantee (objReg != scrReg, "") ;
2999 guarantee (boxReg != tmpReg, "") ;
3000 guarantee (boxReg != scrReg, "") ;
3001 guarantee (tmpReg == as_Register(EAX_enc), "") ;
3002
3003 MacroAssembler masm(&cbuf);
3004
3005 if (_counters != NULL) {
3006 masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3007 }
3008 if (EmitSync & 1) {
3009 // set box->dhw = unused_mark (3)
3010 // Force all sync thru slow-path: slow_enter() and slow_exit()
3011 masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ;
3012 masm.cmpptr (rsp, (int32_t)0) ;
3013 } else
3014 if (EmitSync & 2) {
3015 Label DONE_LABEL ;
3016 if (UseBiasedLocking) {
3017 // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3018 masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3019 }
3020
3021 masm.movptr(tmpReg, Address(objReg, 0)) ; // fetch markword
3022 masm.orptr (tmpReg, 0x1);
3023 masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
3024 if (os::is_MP()) { masm.lock(); }
3025 masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3026 masm.jcc(Assembler::equal, DONE_LABEL);
3027 // Recursive locking
3028 masm.subptr(tmpReg, rsp);
3029 masm.andptr(tmpReg, (int32_t) 0xFFFFF003 );
3030 masm.movptr(Address(boxReg, 0), tmpReg);
3031 masm.bind(DONE_LABEL) ;
3032 } else {
3033 // Possible cases that we'll encounter in fast_lock
3034 // ------------------------------------------------
3035 // * Inflated
3036 // -- unlocked
3037 // -- Locked
3038 // = by self
3039 // = by other
3040 // * biased
3041 // -- by Self
3042 // -- by other
3043 // * neutral
3044 // * stack-locked
3045 // -- by self
3046 // = sp-proximity test hits
3047 // = sp-proximity test generates false-negative
3048 // -- by other
3049 //
3050
3051 Label IsInflated, DONE_LABEL, PopDone ;
3052
3053 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
3054 // order to reduce the number of conditional branches in the most common cases.
3055 // Beware -- there's a subtle invariant that fetch of the markword
3056 // at [FETCH], below, will never observe a biased encoding (*101b).
3057 // If this invariant is not held we risk exclusion (safety) failure.
3058 if (UseBiasedLocking && !UseOptoBiasInlining) {
3059 masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3060 }
3061
3062 masm.movptr(tmpReg, Address(objReg, 0)) ; // [FETCH]
3063 masm.testptr(tmpReg, 0x02) ; // Inflated v (Stack-locked or neutral)
3064 masm.jccb (Assembler::notZero, IsInflated) ;
3065
3066 // Attempt stack-locking ...
3067 masm.orptr (tmpReg, 0x1);
3068 masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
3069 if (os::is_MP()) { masm.lock(); }
3070 masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3071 if (_counters != NULL) {
3072 masm.cond_inc32(Assembler::equal,
3073 ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3074 }
3075 masm.jccb (Assembler::equal, DONE_LABEL);
3076
3077 // Recursive locking
3078 masm.subptr(tmpReg, rsp);
3079 masm.andptr(tmpReg, 0xFFFFF003 );
3080 masm.movptr(Address(boxReg, 0), tmpReg);
3081 if (_counters != NULL) {
3082 masm.cond_inc32(Assembler::equal,
3083 ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3084 }
3085 masm.jmp (DONE_LABEL) ;
3086
3087 masm.bind (IsInflated) ;
3088
3089 // The object is inflated.
3090 //
3091 // TODO-FIXME: eliminate the ugly use of manifest constants:
3092 // Use markOopDesc::monitor_value instead of "2".
3093 // use markOop::unused_mark() instead of "3".
3094 // The tmpReg value is an objectMonitor reference ORed with
3095 // markOopDesc::monitor_value (2). We can either convert tmpReg to an
3096 // objectmonitor pointer by masking off the "2" bit or we can just
3097 // use tmpReg as an objectmonitor pointer but bias the objectmonitor
3098 // field offsets with "-2" to compensate for and annul the low-order tag bit.
3099 //
3100 // I use the latter as it avoids AGI stalls.
3101 // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
3102 // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
3103 //
3104 #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
3105
3106 // boxReg refers to the on-stack BasicLock in the current frame.
3107 // We'd like to write:
3108 // set box->_displaced_header = markOop::unused_mark(). Any non-0 value suffices.
3109 // This is convenient but results a ST-before-CAS penalty. The following CAS suffers
3110 // additional latency as we have another ST in the store buffer that must drain.
3111
3112 if (EmitSync & 8192) {
3113 masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty
3114 masm.get_thread (scrReg) ;
3115 masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
3116 masm.movptr(tmpReg, NULL_WORD); // consider: xor vs mov
3117 if (os::is_MP()) { masm.lock(); }
3118 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3119 } else
3120 if ((EmitSync & 128) == 0) { // avoid ST-before-CAS
3121 masm.movptr(scrReg, boxReg) ;
3122 masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
3123
3124 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3125 if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
3126 // prefetchw [eax + Offset(_owner)-2]
3127 masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3128 }
3129
3130 if ((EmitSync & 64) == 0) {
3131 // Optimistic form: consider XORL tmpReg,tmpReg
3132 masm.movptr(tmpReg, NULL_WORD) ;
3133 } else {
3134 // Can suffer RTS->RTO upgrades on shared or cold $ lines
3135 // Test-And-CAS instead of CAS
3136 masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner
3137 masm.testptr(tmpReg, tmpReg) ; // Locked ?
3138 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3139 }
3140
3141 // Appears unlocked - try to swing _owner from null to non-null.
3142 // Ideally, I'd manifest "Self" with get_thread and then attempt
3143 // to CAS the register containing Self into m->Owner.
3144 // But we don't have enough registers, so instead we can either try to CAS
3145 // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
3146 // we later store "Self" into m->Owner. Transiently storing a stack address
3147 // (rsp or the address of the box) into m->owner is harmless.
3148 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
3149 if (os::is_MP()) { masm.lock(); }
3150 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3151 masm.movptr(Address(scrReg, 0), 3) ; // box->_displaced_header = 3
3152 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3153 masm.get_thread (scrReg) ; // beware: clobbers ICCs
3154 masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ;
3155 masm.xorptr(boxReg, boxReg) ; // set icc.ZFlag = 1 to indicate success
3156
3157 // If the CAS fails we can either retry or pass control to the slow-path.
3158 // We use the latter tactic.
3159 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3160 // If the CAS was successful ...
3161 // Self has acquired the lock
3162 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3163 // Intentional fall-through into DONE_LABEL ...
3164 } else {
3165 masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty
3166 masm.movptr(boxReg, tmpReg) ;
3167
3168 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3169 if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
3170 // prefetchw [eax + Offset(_owner)-2]
3171 masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3172 }
3173
3174 if ((EmitSync & 64) == 0) {
3175 // Optimistic form
3176 masm.xorptr (tmpReg, tmpReg) ;
3177 } else {
3178 // Can suffer RTS->RTO upgrades on shared or cold $ lines
3179 masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner
3180 masm.testptr(tmpReg, tmpReg) ; // Locked ?
3181 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3182 }
3183
3184 // Appears unlocked - try to swing _owner from null to non-null.
3185 // Use either "Self" (in scr) or rsp as thread identity in _owner.
3186 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
3187 masm.get_thread (scrReg) ;
3188 if (os::is_MP()) { masm.lock(); }
3189 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3190
3191 // If the CAS fails we can either retry or pass control to the slow-path.
3192 // We use the latter tactic.
3193 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3194 // If the CAS was successful ...
3195 // Self has acquired the lock
3196 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3197 // Intentional fall-through into DONE_LABEL ...
3198 }
3199
3200 // DONE_LABEL is a hot target - we'd really like to place it at the
3201 // start of cache line by padding with NOPs.
3202 // See the AMD and Intel software optimization manuals for the
3203 // most efficient "long" NOP encodings.
3204 // Unfortunately none of our alignment mechanisms suffice.
3205 masm.bind(DONE_LABEL);
3206
3207 // Avoid branch-to-branch on AMD processors
3208 // This appears to be superstition.
3209 if (EmitSync & 32) masm.nop() ;
3210
3211
3212 // At DONE_LABEL the icc ZFlag is set as follows ...
3213 // Fast_Unlock uses the same protocol.
3214 // ZFlag == 1 -> Success
3215 // ZFlag == 0 -> Failure - force control through the slow-path
3216 }
3217 %}
3218
3219 // obj: object to unlock
3220 // box: box address (displaced header location), killed. Must be EAX.
3221 // rbx,: killed tmp; cannot be obj nor box.
3222 //
3223 // Some commentary on balanced locking:
3224 //
3225 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
3226 // Methods that don't have provably balanced locking are forced to run in the
3227 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
3228 // The interpreter provides two properties:
3229 // I1: At return-time the interpreter automatically and quietly unlocks any
3230 // objects acquired the current activation (frame). Recall that the
3231 // interpreter maintains an on-stack list of locks currently held by
3232 // a frame.
3233 // I2: If a method attempts to unlock an object that is not held by the
3234 // the frame the interpreter throws IMSX.
3235 //
3236 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
3237 // B() doesn't have provably balanced locking so it runs in the interpreter.
3238 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
3239 // is still locked by A().
3240 //
3241 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
3242 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
3243 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
3244 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
3245
3246 enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{
3247
3248 Register objReg = as_Register($obj$$reg);
3249 Register boxReg = as_Register($box$$reg);
3250 Register tmpReg = as_Register($tmp$$reg);
3251
3252 guarantee (objReg != boxReg, "") ;
3253 guarantee (objReg != tmpReg, "") ;
3254 guarantee (boxReg != tmpReg, "") ;
3255 guarantee (boxReg == as_Register(EAX_enc), "") ;
3256 MacroAssembler masm(&cbuf);
3257
3258 if (EmitSync & 4) {
3259 // Disable - inhibit all inlining. Force control through the slow-path
3260 masm.cmpptr (rsp, 0) ;
3261 } else
3262 if (EmitSync & 8) {
3263 Label DONE_LABEL ;
3264 if (UseBiasedLocking) {
3265 masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3266 }
3267 // classic stack-locking code ...
3268 masm.movptr(tmpReg, Address(boxReg, 0)) ;
3269 masm.testptr(tmpReg, tmpReg) ;
3270 masm.jcc (Assembler::zero, DONE_LABEL) ;
3271 if (os::is_MP()) { masm.lock(); }
3272 masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
3273 masm.bind(DONE_LABEL);
3274 } else {
3275 Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
3276
3277 // Critically, the biased locking test must have precedence over
3278 // and appear before the (box->dhw == 0) recursive stack-lock test.
3279 if (UseBiasedLocking && !UseOptoBiasInlining) {
3280 masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3281 }
3282
3283 masm.cmpptr(Address(boxReg, 0), 0) ; // Examine the displaced header
3284 masm.movptr(tmpReg, Address(objReg, 0)) ; // Examine the object's markword
3285 masm.jccb (Assembler::zero, DONE_LABEL) ; // 0 indicates recursive stack-lock
3286
3287 masm.testptr(tmpReg, 0x02) ; // Inflated?
3288 masm.jccb (Assembler::zero, Stacked) ;
3289
3290 masm.bind (Inflated) ;
3291 // It's inflated.
3292 // Despite our balanced locking property we still check that m->_owner == Self
3293 // as java routines or native JNI code called by this thread might
3294 // have released the lock.
3295 // Refer to the comments in synchronizer.cpp for how we might encode extra
3296 // state in _succ so we can avoid fetching EntryList|cxq.
3297 //
3298 // I'd like to add more cases in fast_lock() and fast_unlock() --
3299 // such as recursive enter and exit -- but we have to be wary of
3300 // I$ bloat, T$ effects and BP$ effects.
3301 //
3302 // If there's no contention try a 1-0 exit. That is, exit without
3303 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
3304 // we detect and recover from the race that the 1-0 exit admits.
3305 //
3306 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
3307 // before it STs null into _owner, releasing the lock. Updates
3308 // to data protected by the critical section must be visible before
3309 // we drop the lock (and thus before any other thread could acquire
3310 // the lock and observe the fields protected by the lock).
3311 // IA32's memory-model is SPO, so STs are ordered with respect to
3312 // each other and there's no need for an explicit barrier (fence).
3313 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
3314
3315 masm.get_thread (boxReg) ;
3316 if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
3317 // prefetchw [ebx + Offset(_owner)-2]
3318 masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2));
3319 }
3320
3321 // Note that we could employ various encoding schemes to reduce
3322 // the number of loads below (currently 4) to just 2 or 3.
3323 // Refer to the comments in synchronizer.cpp.
3324 // In practice the chain of fetches doesn't seem to impact performance, however.
3325 if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
3326 // Attempt to reduce branch density - AMD's branch predictor.
3327 masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3328 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3329 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3330 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3331 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3332 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
3333 masm.jmpb (DONE_LABEL) ;
3334 } else {
3335 masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3336 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3337 masm.jccb (Assembler::notZero, DONE_LABEL) ;
3338 masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3339 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3340 masm.jccb (Assembler::notZero, CheckSucc) ;
3341 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
3342 masm.jmpb (DONE_LABEL) ;
3343 }
3344
3345 // The Following code fragment (EmitSync & 65536) improves the performance of
3346 // contended applications and contended synchronization microbenchmarks.
3347 // Unfortunately the emission of the code - even though not executed - causes regressions
3348 // in scimark and jetstream, evidently because of $ effects. Replacing the code
3349 // with an equal number of never-executed NOPs results in the same regression.
3350 // We leave it off by default.
3351
3352 if ((EmitSync & 65536) != 0) {
3353 Label LSuccess, LGoSlowPath ;
3354
3355 masm.bind (CheckSucc) ;
3356
3357 // Optional pre-test ... it's safe to elide this
3358 if ((EmitSync & 16) == 0) {
3359 masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ;
3360 masm.jccb (Assembler::zero, LGoSlowPath) ;
3361 }
3362
3363 // We have a classic Dekker-style idiom:
3364 // ST m->_owner = 0 ; MEMBAR; LD m->_succ
3365 // There are a number of ways to implement the barrier:
3366 // (1) lock:andl &m->_owner, 0
3367 // is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
3368 // LOCK: ANDL [ebx+Offset(_Owner)-2], 0
3369 // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
3370 // (2) If supported, an explicit MFENCE is appealing.
3371 // In older IA32 processors MFENCE is slower than lock:add or xchg
3372 // particularly if the write-buffer is full as might be the case if
3373 // if stores closely precede the fence or fence-equivalent instruction.
3374 // In more modern implementations MFENCE appears faster, however.
3375 // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
3376 // The $lines underlying the top-of-stack should be in M-state.
3377 // The locked add instruction is serializing, of course.
3378 // (4) Use xchg, which is serializing
3379 // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
3380 // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
3381 // The integer condition codes will tell us if succ was 0.
3382 // Since _succ and _owner should reside in the same $line and
3383 // we just stored into _owner, it's likely that the $line
3384 // remains in M-state for the lock:orl.
3385 //
3386 // We currently use (3), although it's likely that switching to (2)
3387 // is correct for the future.
3388
3389 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
3390 if (os::is_MP()) {
3391 if (VM_Version::supports_sse2() && 1 == FenceInstruction) {
3392 masm.mfence();
3393 } else {
3394 masm.lock () ; masm.addptr(Address(rsp, 0), 0) ;
3395 }
3396 }
3397 // Ratify _succ remains non-null
3398 masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ;
3399 masm.jccb (Assembler::notZero, LSuccess) ;
3400
3401 masm.xorptr(boxReg, boxReg) ; // box is really EAX
3402 if (os::is_MP()) { masm.lock(); }
3403 masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3404 masm.jccb (Assembler::notEqual, LSuccess) ;
3405 // Since we're low on registers we installed rsp as a placeholding in _owner.
3406 // Now install Self over rsp. This is safe as we're transitioning from
3407 // non-null to non=null
3408 masm.get_thread (boxReg) ;
3409 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ;
3410 // Intentional fall-through into LGoSlowPath ...
3411
3412 masm.bind (LGoSlowPath) ;
3413 masm.orptr(boxReg, 1) ; // set ICC.ZF=0 to indicate failure
3414 masm.jmpb (DONE_LABEL) ;
3415
3416 masm.bind (LSuccess) ;
3417 masm.xorptr(boxReg, boxReg) ; // set ICC.ZF=1 to indicate success
3418 masm.jmpb (DONE_LABEL) ;
3419 }
3420
3421 masm.bind (Stacked) ;
3422 // It's not inflated and it's not recursively stack-locked and it's not biased.
3423 // It must be stack-locked.
3424 // Try to reset the header to displaced header.
3425 // The "box" value on the stack is stable, so we can reload
3426 // and be assured we observe the same value as above.
3427 masm.movptr(tmpReg, Address(boxReg, 0)) ;
3428 if (os::is_MP()) { masm.lock(); }
3429 masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
3430 // Intention fall-thru into DONE_LABEL
3431
3432
3433 // DONE_LABEL is a hot target - we'd really like to place it at the
3434 // start of cache line by padding with NOPs.
3435 // See the AMD and Intel software optimization manuals for the
3436 // most efficient "long" NOP encodings.
3437 // Unfortunately none of our alignment mechanisms suffice.
3438 if ((EmitSync & 65536) == 0) {
3439 masm.bind (CheckSucc) ;
3440 }
3441 masm.bind(DONE_LABEL);
3442
3443 // Avoid branch to branch on AMD processors
3444 if (EmitSync & 32768) { masm.nop() ; }
3445 }
3446 %}
3447
3448 2856
3449 enc_class enc_pop_rdx() %{ 2857 enc_class enc_pop_rdx() %{
3450 emit_opcode(cbuf,0x5A); 2858 emit_opcode(cbuf,0x5A);
3451 %} 2859 %}
3452 2860
3766 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3174 // offsets are based on outgoing arguments, i.e. a CALLER setting up
3767 // arguments for a CALLEE. Incoming stack arguments are 3175 // arguments for a CALLEE. Incoming stack arguments are
3768 // automatically biased by the preserve_stack_slots field above. 3176 // automatically biased by the preserve_stack_slots field above.
3769 c_calling_convention %{ 3177 c_calling_convention %{
3770 // This is obviously always outgoing 3178 // This is obviously always outgoing
3771 (void) SharedRuntime::c_calling_convention(sig_bt, regs, length); 3179 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3772 %} 3180 %}
3773 3181
3774 // Location of C & interpreter return values 3182 // Location of C & interpreter return values
3775 c_return_value %{ 3183 c_return_value %{
3776 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3184 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
5702 %} 5110 %}
5703 ins_pipe(ialu_reg); 5111 ins_pipe(ialu_reg);
5704 %} 5112 %}
5705 5113
5706 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5114 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5115 predicate(UseCountTrailingZerosInstruction);
5116 match(Set dst (CountTrailingZerosI src));
5117 effect(KILL cr);
5118
5119 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %}
5120 ins_encode %{
5121 __ tzcntl($dst$$Register, $src$$Register);
5122 %}
5123 ins_pipe(ialu_reg);
5124 %}
5125
5126 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5127 predicate(!UseCountTrailingZerosInstruction);
5707 match(Set dst (CountTrailingZerosI src)); 5128 match(Set dst (CountTrailingZerosI src));
5708 effect(KILL cr); 5129 effect(KILL cr);
5709 5130
5710 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5131 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t"
5711 "JNZ done\n\t" 5132 "JNZ done\n\t"
5721 %} 5142 %}
5722 ins_pipe(ialu_reg); 5143 ins_pipe(ialu_reg);
5723 %} 5144 %}
5724 5145
5725 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5146 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5147 predicate(UseCountTrailingZerosInstruction);
5148 match(Set dst (CountTrailingZerosL src));
5149 effect(TEMP dst, KILL cr);
5150
5151 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t"
5152 "JNC done\n\t"
5153 "TZCNT $dst, $src.hi\n\t"
5154 "ADD $dst, 32\n"
5155 "done:" %}
5156 ins_encode %{
5157 Register Rdst = $dst$$Register;
5158 Register Rsrc = $src$$Register;
5159 Label done;
5160 __ tzcntl(Rdst, Rsrc);
5161 __ jccb(Assembler::carryClear, done);
5162 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5163 __ addl(Rdst, BitsPerInt);
5164 __ bind(done);
5165 %}
5166 ins_pipe(ialu_reg);
5167 %}
5168
5169 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5170 predicate(!UseCountTrailingZerosInstruction);
5726 match(Set dst (CountTrailingZerosL src)); 5171 match(Set dst (CountTrailingZerosL src));
5727 effect(TEMP dst, KILL cr); 5172 effect(TEMP dst, KILL cr);
5728 5173
5729 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5174 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t"
5730 "JNZ done\n\t" 5175 "JNZ done\n\t"
7097 //----------MemBar Instructions----------------------------------------------- 6542 //----------MemBar Instructions-----------------------------------------------
7098 // Memory barrier flavors 6543 // Memory barrier flavors
7099 6544
7100 instruct membar_acquire() %{ 6545 instruct membar_acquire() %{
7101 match(MemBarAcquire); 6546 match(MemBarAcquire);
6547 match(LoadFence);
7102 ins_cost(400); 6548 ins_cost(400);
7103 6549
7104 size(0); 6550 size(0);
7105 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6551 format %{ "MEMBAR-acquire ! (empty encoding)" %}
7106 ins_encode(); 6552 ins_encode();
7117 ins_pipe(empty); 6563 ins_pipe(empty);
7118 %} 6564 %}
7119 6565
7120 instruct membar_release() %{ 6566 instruct membar_release() %{
7121 match(MemBarRelease); 6567 match(MemBarRelease);
6568 match(StoreFence);
7122 ins_cost(400); 6569 ins_cost(400);
7123 6570
7124 size(0); 6571 size(0);
7125 format %{ "MEMBAR-release ! (empty encoding)" %} 6572 format %{ "MEMBAR-release ! (empty encoding)" %}
7126 ins_encode( ); 6573 ins_encode( );
7533 %} 6980 %}
7534 6981
7535 //----------Arithmetic Instructions-------------------------------------------- 6982 //----------Arithmetic Instructions--------------------------------------------
7536 //----------Addition Instructions---------------------------------------------- 6983 //----------Addition Instructions----------------------------------------------
7537 6984
7538 instruct addExactI_eReg(eAXRegI dst, rRegI src, eFlagsReg cr)
7539 %{
7540 match(AddExactI dst src);
7541 effect(DEF cr);
7542
7543 format %{ "ADD $dst, $src\t# addExact int" %}
7544 ins_encode %{
7545 __ addl($dst$$Register, $src$$Register);
7546 %}
7547 ins_pipe(ialu_reg_reg);
7548 %}
7549
7550 instruct addExactI_eReg_imm(eAXRegI dst, immI src, eFlagsReg cr)
7551 %{
7552 match(AddExactI dst src);
7553 effect(DEF cr);
7554
7555 format %{ "ADD $dst, $src\t# addExact int" %}
7556 ins_encode %{
7557 __ addl($dst$$Register, $src$$constant);
7558 %}
7559 ins_pipe(ialu_reg_reg);
7560 %}
7561
7562 instruct addExactI_eReg_mem(eAXRegI dst, memory src, eFlagsReg cr)
7563 %{
7564 match(AddExactI dst (LoadI src));
7565 effect(DEF cr);
7566
7567 ins_cost(125);
7568 format %{ "ADD $dst,$src\t# addExact int" %}
7569 ins_encode %{
7570 __ addl($dst$$Register, $src$$Address);
7571 %}
7572 ins_pipe( ialu_reg_mem );
7573 %}
7574
7575
7576 // Integer Addition Instructions 6985 // Integer Addition Instructions
7577 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 6986 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7578 match(Set dst (AddI dst src)); 6987 match(Set dst (AddI dst src));
7579 effect(KILL cr); 6988 effect(KILL cr);
7580 6989
7880 ins_pipe( pipe_cmpxchg ); 7289 ins_pipe( pipe_cmpxchg );
7881 %} 7290 %}
7882 7291
7883 //----------Subtraction Instructions------------------------------------------- 7292 //----------Subtraction Instructions-------------------------------------------
7884 7293
7885 instruct subExactI_eReg(eAXRegI dst, rRegI src, eFlagsReg cr)
7886 %{
7887 match(SubExactI dst src);
7888 effect(DEF cr);
7889
7890 format %{ "SUB $dst, $src\t# subExact int" %}
7891 ins_encode %{
7892 __ subl($dst$$Register, $src$$Register);
7893 %}
7894 ins_pipe(ialu_reg_reg);
7895 %}
7896
7897 instruct subExactI_eReg_imm(eAXRegI dst, immI src, eFlagsReg cr)
7898 %{
7899 match(SubExactI dst src);
7900 effect(DEF cr);
7901
7902 format %{ "SUB $dst, $src\t# subExact int" %}
7903 ins_encode %{
7904 __ subl($dst$$Register, $src$$constant);
7905 %}
7906 ins_pipe(ialu_reg_reg);
7907 %}
7908
7909 instruct subExactI_eReg_mem(eAXRegI dst, memory src, eFlagsReg cr)
7910 %{
7911 match(SubExactI dst (LoadI src));
7912 effect(DEF cr);
7913
7914 ins_cost(125);
7915 format %{ "SUB $dst,$src\t# subExact int" %}
7916 ins_encode %{
7917 __ subl($dst$$Register, $src$$Address);
7918 %}
7919 ins_pipe( ialu_reg_mem );
7920 %}
7921
7922 // Integer Subtraction Instructions 7294 // Integer Subtraction Instructions
7923 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7295 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7924 match(Set dst (SubI dst src)); 7296 match(Set dst (SubI dst src));
7925 effect(KILL cr); 7297 effect(KILL cr);
7926 7298
7983 size(2); 7355 size(2);
7984 format %{ "NEG $dst" %} 7356 format %{ "NEG $dst" %}
7985 opcode(0xF7,0x03); // Opcode F7 /3 7357 opcode(0xF7,0x03); // Opcode F7 /3
7986 ins_encode( OpcP, RegOpc( dst ) ); 7358 ins_encode( OpcP, RegOpc( dst ) );
7987 ins_pipe( ialu_reg ); 7359 ins_pipe( ialu_reg );
7988 %}
7989
7990 instruct negExactI_eReg(eAXRegI dst, eFlagsReg cr) %{
7991 match(NegExactI dst);
7992 effect(DEF cr);
7993
7994 format %{ "NEG $dst\t# negExact int"%}
7995 ins_encode %{
7996 __ negl($dst$$Register);
7997 %}
7998 ins_pipe(ialu_reg);
7999 %} 7360 %}
8000 7361
8001 //----------Multiplication/Division Instructions------------------------------- 7362 //----------Multiplication/Division Instructions-------------------------------
8002 // Integer Multiplication Instructions 7363 // Integer Multiplication Instructions
8003 // Multiply Register 7364 // Multiply Register
8206 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7567 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t"
8207 "ADD EDX,$tmp" %} 7568 "ADD EDX,$tmp" %}
8208 ins_encode( long_multiply_con( dst, src, tmp ) ); 7569 ins_encode( long_multiply_con( dst, src, tmp ) );
8209 ins_pipe( pipe_slow ); 7570 ins_pipe( pipe_slow );
8210 %} 7571 %}
8211
8212 instruct mulExactI_eReg(eAXRegI dst, rRegI src, eFlagsReg cr)
8213 %{
8214 match(MulExactI dst src);
8215 effect(DEF cr);
8216
8217 ins_cost(300);
8218 format %{ "IMUL $dst, $src\t# mulExact int" %}
8219 ins_encode %{
8220 __ imull($dst$$Register, $src$$Register);
8221 %}
8222 ins_pipe(ialu_reg_reg_alu0);
8223 %}
8224
8225 instruct mulExactI_eReg_imm(eAXRegI dst, rRegI src, immI imm, eFlagsReg cr)
8226 %{
8227 match(MulExactI src imm);
8228 effect(DEF cr);
8229
8230 ins_cost(300);
8231 format %{ "IMUL $dst, $src, $imm\t# mulExact int" %}
8232 ins_encode %{
8233 __ imull($dst$$Register, $src$$Register, $imm$$constant);
8234 %}
8235 ins_pipe(ialu_reg_reg_alu0);
8236 %}
8237
8238 instruct mulExactI_eReg_mem(eAXRegI dst, memory src, eFlagsReg cr)
8239 %{
8240 match(MulExactI dst (LoadI src));
8241 effect(DEF cr);
8242
8243 ins_cost(350);
8244 format %{ "IMUL $dst, $src\t# mulExact int" %}
8245 ins_encode %{
8246 __ imull($dst$$Register, $src$$Address);
8247 %}
8248 ins_pipe(ialu_reg_mem_alu0);
8249 %}
8250
8251 7572
8252 // Integer DIV with Register 7573 // Integer DIV with Register
8253 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7574 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8254 match(Set rax (DivI rax div)); 7575 match(Set rax (DivI rax div));
8255 effect(KILL rdx, KILL cr); 7576 effect(KILL rdx, KILL cr);
8690 // ins_encode( MemImm( dst, src) ); 8011 // ins_encode( MemImm( dst, src) );
8691 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8012 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8692 ins_pipe( ialu_mem_imm ); 8013 ins_pipe( ialu_mem_imm );
8693 %} 8014 %}
8694 8015
8016 // BMI1 instructions
8017 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8018 match(Set dst (AndI (XorI src1 minus_1) src2));
8019 predicate(UseBMI1Instructions);
8020 effect(KILL cr);
8021
8022 format %{ "ANDNL $dst, $src1, $src2" %}
8023
8024 ins_encode %{
8025 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8026 %}
8027 ins_pipe(ialu_reg);
8028 %}
8029
8030 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8031 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8032 predicate(UseBMI1Instructions);
8033 effect(KILL cr);
8034
8035 ins_cost(125);
8036 format %{ "ANDNL $dst, $src1, $src2" %}
8037
8038 ins_encode %{
8039 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8040 %}
8041 ins_pipe(ialu_reg_mem);
8042 %}
8043
8044 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8045 match(Set dst (AndI (SubI imm_zero src) src));
8046 predicate(UseBMI1Instructions);
8047 effect(KILL cr);
8048
8049 format %{ "BLSIL $dst, $src" %}
8050
8051 ins_encode %{
8052 __ blsil($dst$$Register, $src$$Register);
8053 %}
8054 ins_pipe(ialu_reg);
8055 %}
8056
8057 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8058 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8059 predicate(UseBMI1Instructions);
8060 effect(KILL cr);
8061
8062 ins_cost(125);
8063 format %{ "BLSIL $dst, $src" %}
8064
8065 ins_encode %{
8066 __ blsil($dst$$Register, $src$$Address);
8067 %}
8068 ins_pipe(ialu_reg_mem);
8069 %}
8070
8071 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8072 %{
8073 match(Set dst (XorI (AddI src minus_1) src));
8074 predicate(UseBMI1Instructions);
8075 effect(KILL cr);
8076
8077 format %{ "BLSMSKL $dst, $src" %}
8078
8079 ins_encode %{
8080 __ blsmskl($dst$$Register, $src$$Register);
8081 %}
8082
8083 ins_pipe(ialu_reg);
8084 %}
8085
8086 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8087 %{
8088 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8089 predicate(UseBMI1Instructions);
8090 effect(KILL cr);
8091
8092 ins_cost(125);
8093 format %{ "BLSMSKL $dst, $src" %}
8094
8095 ins_encode %{
8096 __ blsmskl($dst$$Register, $src$$Address);
8097 %}
8098
8099 ins_pipe(ialu_reg_mem);
8100 %}
8101
8102 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8103 %{
8104 match(Set dst (AndI (AddI src minus_1) src) );
8105 predicate(UseBMI1Instructions);
8106 effect(KILL cr);
8107
8108 format %{ "BLSRL $dst, $src" %}
8109
8110 ins_encode %{
8111 __ blsrl($dst$$Register, $src$$Register);
8112 %}
8113
8114 ins_pipe(ialu_reg);
8115 %}
8116
8117 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8118 %{
8119 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8120 predicate(UseBMI1Instructions);
8121 effect(KILL cr);
8122
8123 ins_cost(125);
8124 format %{ "BLSRL $dst, $src" %}
8125
8126 ins_encode %{
8127 __ blsrl($dst$$Register, $src$$Address);
8128 %}
8129
8130 ins_pipe(ialu_reg_mem);
8131 %}
8132
8695 // Or Instructions 8133 // Or Instructions
8696 // Or Register with Register 8134 // Or Register with Register
8697 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8135 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8698 match(Set dst (OrI dst src)); 8136 match(Set dst (OrI dst src));
8699 effect(KILL cr); 8137 effect(KILL cr);
9112 8550
9113 /* If I enable this, I encourage spilling in the inner loop of compress. 8551 /* If I enable this, I encourage spilling in the inner loop of compress.
9114 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8552 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
9115 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8553 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
9116 */ 8554 */
8555 //----------Overflow Math Instructions-----------------------------------------
8556
8557 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8558 %{
8559 match(Set cr (OverflowAddI op1 op2));
8560 effect(DEF cr, USE_KILL op1, USE op2);
8561
8562 format %{ "ADD $op1, $op2\t# overflow check int" %}
8563
8564 ins_encode %{
8565 __ addl($op1$$Register, $op2$$Register);
8566 %}
8567 ins_pipe(ialu_reg_reg);
8568 %}
8569
8570 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8571 %{
8572 match(Set cr (OverflowAddI op1 op2));
8573 effect(DEF cr, USE_KILL op1, USE op2);
8574
8575 format %{ "ADD $op1, $op2\t# overflow check int" %}
8576
8577 ins_encode %{
8578 __ addl($op1$$Register, $op2$$constant);
8579 %}
8580 ins_pipe(ialu_reg_reg);
8581 %}
8582
8583 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8584 %{
8585 match(Set cr (OverflowSubI op1 op2));
8586
8587 format %{ "CMP $op1, $op2\t# overflow check int" %}
8588 ins_encode %{
8589 __ cmpl($op1$$Register, $op2$$Register);
8590 %}
8591 ins_pipe(ialu_reg_reg);
8592 %}
8593
8594 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8595 %{
8596 match(Set cr (OverflowSubI op1 op2));
8597
8598 format %{ "CMP $op1, $op2\t# overflow check int" %}
8599 ins_encode %{
8600 __ cmpl($op1$$Register, $op2$$constant);
8601 %}
8602 ins_pipe(ialu_reg_reg);
8603 %}
8604
8605 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8606 %{
8607 match(Set cr (OverflowSubI zero op2));
8608 effect(DEF cr, USE_KILL op2);
8609
8610 format %{ "NEG $op2\t# overflow check int" %}
8611 ins_encode %{
8612 __ negl($op2$$Register);
8613 %}
8614 ins_pipe(ialu_reg_reg);
8615 %}
8616
8617 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8618 %{
8619 match(Set cr (OverflowMulI op1 op2));
8620 effect(DEF cr, USE_KILL op1, USE op2);
8621
8622 format %{ "IMUL $op1, $op2\t# overflow check int" %}
8623 ins_encode %{
8624 __ imull($op1$$Register, $op2$$Register);
8625 %}
8626 ins_pipe(ialu_reg_reg_alu0);
8627 %}
8628
8629 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8630 %{
8631 match(Set cr (OverflowMulI op1 op2));
8632 effect(DEF cr, TEMP tmp, USE op1, USE op2);
8633
8634 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %}
8635 ins_encode %{
8636 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8637 %}
8638 ins_pipe(ialu_reg_reg_alu0);
8639 %}
9117 8640
9118 //----------Long Instructions------------------------------------------------ 8641 //----------Long Instructions------------------------------------------------
9119 // Add Long Register with Register 8642 // Add Long Register with Register
9120 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8643 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9121 match(Set dst (AddL dst src)); 8644 match(Set dst (AddL dst src));
9225 format %{ "AND $dst.lo,$mem\n\t" 8748 format %{ "AND $dst.lo,$mem\n\t"
9226 "AND $dst.hi,$mem+4" %} 8749 "AND $dst.hi,$mem+4" %}
9227 opcode(0x23, 0x23); 8750 opcode(0x23, 0x23);
9228 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8751 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9229 ins_pipe( ialu_reg_long_mem ); 8752 ins_pipe( ialu_reg_long_mem );
8753 %}
8754
8755 // BMI1 instructions
8756 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8757 match(Set dst (AndL (XorL src1 minus_1) src2));
8758 predicate(UseBMI1Instructions);
8759 effect(KILL cr, TEMP dst);
8760
8761 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t"
8762 "ANDNL $dst.hi, $src1.hi, $src2.hi"
8763 %}
8764
8765 ins_encode %{
8766 Register Rdst = $dst$$Register;
8767 Register Rsrc1 = $src1$$Register;
8768 Register Rsrc2 = $src2$$Register;
8769 __ andnl(Rdst, Rsrc1, Rsrc2);
8770 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8771 %}
8772 ins_pipe(ialu_reg_reg_long);
8773 %}
8774
8775 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8776 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8777 predicate(UseBMI1Instructions);
8778 effect(KILL cr, TEMP dst);
8779
8780 ins_cost(125);
8781 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t"
8782 "ANDNL $dst.hi, $src1.hi, $src2+4"
8783 %}
8784
8785 ins_encode %{
8786 Register Rdst = $dst$$Register;
8787 Register Rsrc1 = $src1$$Register;
8788 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8789
8790 __ andnl(Rdst, Rsrc1, $src2$$Address);
8791 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8792 %}
8793 ins_pipe(ialu_reg_mem);
8794 %}
8795
8796 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8797 match(Set dst (AndL (SubL imm_zero src) src));
8798 predicate(UseBMI1Instructions);
8799 effect(KILL cr, TEMP dst);
8800
8801 format %{ "MOVL $dst.hi, 0\n\t"
8802 "BLSIL $dst.lo, $src.lo\n\t"
8803 "JNZ done\n\t"
8804 "BLSIL $dst.hi, $src.hi\n"
8805 "done:"
8806 %}
8807
8808 ins_encode %{
8809 Label done;
8810 Register Rdst = $dst$$Register;
8811 Register Rsrc = $src$$Register;
8812 __ movl(HIGH_FROM_LOW(Rdst), 0);
8813 __ blsil(Rdst, Rsrc);
8814 __ jccb(Assembler::notZero, done);
8815 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8816 __ bind(done);
8817 %}
8818 ins_pipe(ialu_reg);
8819 %}
8820
8821 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8822 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8823 predicate(UseBMI1Instructions);
8824 effect(KILL cr, TEMP dst);
8825
8826 ins_cost(125);
8827 format %{ "MOVL $dst.hi, 0\n\t"
8828 "BLSIL $dst.lo, $src\n\t"
8829 "JNZ done\n\t"
8830 "BLSIL $dst.hi, $src+4\n"
8831 "done:"
8832 %}
8833
8834 ins_encode %{
8835 Label done;
8836 Register Rdst = $dst$$Register;
8837 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8838
8839 __ movl(HIGH_FROM_LOW(Rdst), 0);
8840 __ blsil(Rdst, $src$$Address);
8841 __ jccb(Assembler::notZero, done);
8842 __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8843 __ bind(done);
8844 %}
8845 ins_pipe(ialu_reg_mem);
8846 %}
8847
8848 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8849 %{
8850 match(Set dst (XorL (AddL src minus_1) src));
8851 predicate(UseBMI1Instructions);
8852 effect(KILL cr, TEMP dst);
8853
8854 format %{ "MOVL $dst.hi, 0\n\t"
8855 "BLSMSKL $dst.lo, $src.lo\n\t"
8856 "JNC done\n\t"
8857 "BLSMSKL $dst.hi, $src.hi\n"
8858 "done:"
8859 %}
8860
8861 ins_encode %{
8862 Label done;
8863 Register Rdst = $dst$$Register;
8864 Register Rsrc = $src$$Register;
8865 __ movl(HIGH_FROM_LOW(Rdst), 0);
8866 __ blsmskl(Rdst, Rsrc);
8867 __ jccb(Assembler::carryClear, done);
8868 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8869 __ bind(done);
8870 %}
8871
8872 ins_pipe(ialu_reg);
8873 %}
8874
8875 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8876 %{
8877 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8878 predicate(UseBMI1Instructions);
8879 effect(KILL cr, TEMP dst);
8880
8881 ins_cost(125);
8882 format %{ "MOVL $dst.hi, 0\n\t"
8883 "BLSMSKL $dst.lo, $src\n\t"
8884 "JNC done\n\t"
8885 "BLSMSKL $dst.hi, $src+4\n"
8886 "done:"
8887 %}
8888
8889 ins_encode %{
8890 Label done;
8891 Register Rdst = $dst$$Register;
8892 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8893
8894 __ movl(HIGH_FROM_LOW(Rdst), 0);
8895 __ blsmskl(Rdst, $src$$Address);
8896 __ jccb(Assembler::carryClear, done);
8897 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
8898 __ bind(done);
8899 %}
8900
8901 ins_pipe(ialu_reg_mem);
8902 %}
8903
8904 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8905 %{
8906 match(Set dst (AndL (AddL src minus_1) src) );
8907 predicate(UseBMI1Instructions);
8908 effect(KILL cr, TEMP dst);
8909
8910 format %{ "MOVL $dst.hi, $src.hi\n\t"
8911 "BLSRL $dst.lo, $src.lo\n\t"
8912 "JNC done\n\t"
8913 "BLSRL $dst.hi, $src.hi\n"
8914 "done:"
8915 %}
8916
8917 ins_encode %{
8918 Label done;
8919 Register Rdst = $dst$$Register;
8920 Register Rsrc = $src$$Register;
8921 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8922 __ blsrl(Rdst, Rsrc);
8923 __ jccb(Assembler::carryClear, done);
8924 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8925 __ bind(done);
8926 %}
8927
8928 ins_pipe(ialu_reg);
8929 %}
8930
8931 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8932 %{
8933 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
8934 predicate(UseBMI1Instructions);
8935 effect(KILL cr, TEMP dst);
8936
8937 ins_cost(125);
8938 format %{ "MOVL $dst.hi, $src+4\n\t"
8939 "BLSRL $dst.lo, $src\n\t"
8940 "JNC done\n\t"
8941 "BLSRL $dst.hi, $src+4\n"
8942 "done:"
8943 %}
8944
8945 ins_encode %{
8946 Label done;
8947 Register Rdst = $dst$$Register;
8948 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8949 __ movl(HIGH_FROM_LOW(Rdst), src_hi);
8950 __ blsrl(Rdst, $src$$Address);
8951 __ jccb(Assembler::carryClear, done);
8952 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
8953 __ bind(done);
8954 %}
8955
8956 ins_pipe(ialu_reg_mem);
9230 %} 8957 %}
9231 8958
9232 // Or Long Register with Register 8959 // Or Long Register with Register
9233 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8960 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9234 match(Set dst (OrL dst src)); 8961 match(Set dst (OrL dst src));
13145 ins_pipe( pipe_jmp ); 12872 ins_pipe( pipe_jmp );
13146 %} 12873 %}
13147 12874
13148 // inlined locking and unlocking 12875 // inlined locking and unlocking
13149 12876
13150 12877 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13151 instruct cmpFastLock( eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 12878 predicate(Compile::current()->use_rtm());
13152 match( Set cr (FastLock object box) ); 12879 match(Set cr (FastLock object box));
13153 effect( TEMP tmp, TEMP scr, USE_KILL box ); 12880 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
12881 ins_cost(300);
12882 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
12883 ins_encode %{
12884 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12885 $scr$$Register, $cx1$$Register, $cx2$$Register,
12886 _counters, _rtm_counters, _stack_rtm_counters,
12887 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
12888 true, ra_->C->profile_rtm());
12889 %}
12890 ins_pipe(pipe_slow);
12891 %}
12892
12893 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
12894 predicate(!Compile::current()->use_rtm());
12895 match(Set cr (FastLock object box));
12896 effect(TEMP tmp, TEMP scr, USE_KILL box);
13154 ins_cost(300); 12897 ins_cost(300);
13155 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 12898 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13156 ins_encode( Fast_Lock(object,box,tmp,scr) ); 12899 ins_encode %{
13157 ins_pipe( pipe_slow ); 12900 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13158 %} 12901 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13159 12902 %}
13160 instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 12903 ins_pipe(pipe_slow);
13161 match( Set cr (FastUnlock object box) ); 12904 %}
13162 effect( TEMP tmp, USE_KILL box ); 12905
12906 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
12907 match(Set cr (FastUnlock object box));
12908 effect(TEMP tmp, USE_KILL box);
13163 ins_cost(300); 12909 ins_cost(300);
13164 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 12910 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13165 ins_encode( Fast_Unlock(object,box,tmp) ); 12911 ins_encode %{
13166 ins_pipe( pipe_slow ); 12912 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
12913 %}
12914 ins_pipe(pipe_slow);
13167 %} 12915 %}
13168 12916
13169 12917
13170 12918
13171 // ============================================================================ 12919 // ============================================================================