Mercurial > hg > truffle
diff src/cpu/x86/vm/assembler_x86.cpp @ 1930:2d26b0046e0d
Merge.
author | Thomas Wuerthinger <wuerthinger@ssw.jku.at> |
---|---|
date | Tue, 30 Nov 2010 14:53:30 +0100 |
parents | 3483ec571caf 2fe998383789 |
children | 06f017f7daa7 |
line wrap: on
line diff
--- a/src/cpu/x86/vm/assembler_x86.cpp Mon Nov 29 18:32:30 2010 +0100 +++ b/src/cpu/x86/vm/assembler_x86.cpp Tue Nov 30 14:53:30 2010 +0100 @@ -1,5 +1,5 @@ /* - * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -16,9 +16,9 @@ * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. * */ @@ -1275,6 +1275,12 @@ emit_byte(0xF8 | encode); } +void Assembler::divl(Register src) { // Unsigned + int encode = prefix_and_encode(src->encoding()); + emit_byte(0xF7); + emit_byte(0xF0 | encode); +} + void Assembler::imull(Register dst, Register src) { int encode = prefix_and_encode(dst->encoding(), src->encoding()); emit_byte(0x0F); @@ -1288,7 +1294,7 @@ if (is8bit(value)) { emit_byte(0x6B); emit_byte(0xC0 | encode); - emit_byte(value); + emit_byte(value & 0xFF); } else { emit_byte(0x69); emit_byte(0xC0 | encode); @@ -3903,7 +3909,7 @@ if (is8bit(value)) { emit_byte(0x6B); emit_byte(0xC0 | encode); - emit_byte(value); + emit_byte(value & 0xFF); } else { emit_byte(0x69); emit_byte(0xC0 | encode); @@ -4993,19 +4999,22 @@ ttyLocker ttyl; tty->print_cr("eip = 0x%08x", eip); #ifndef PRODUCT - tty->cr(); - findpc(eip); - tty->cr(); + if ((WizardMode || Verbose) && PrintMiscellaneous) { + tty->cr(); + findpc(eip); + tty->cr(); + } #endif - tty->print_cr("rax, = 0x%08x", rax); - tty->print_cr("rbx, = 0x%08x", rbx); + tty->print_cr("rax = 0x%08x", rax); + tty->print_cr("rbx = 0x%08x", rbx); tty->print_cr("rcx = 0x%08x", rcx); tty->print_cr("rdx = 0x%08x", rdx); tty->print_cr("rdi = 0x%08x", rdi); tty->print_cr("rsi = 0x%08x", rsi); - tty->print_cr("rbp, = 0x%08x", rbp); + tty->print_cr("rbp = 0x%08x", rbp); tty->print_cr("rsp = 0x%08x", rsp); BREAKPOINT; + assert(false, "start up GDB"); } } else { ttyLocker ttyl; @@ -6494,24 +6503,19 @@ } void MacroAssembler::load_sized_value(Register dst, Address src, - int size_in_bytes, bool is_signed) { - switch (size_in_bytes ^ (is_signed ? -1 : 0)) { + size_t size_in_bytes, bool is_signed) { + switch (size_in_bytes) { #ifndef _LP64 // For case 8, caller is responsible for manually loading // the second word into another register. - case ~8: // fall through: - case 8: movl( dst, src ); break; + case 8: movl(dst, src); break; #else - case ~8: // fall through: - case 8: movq( dst, src ); break; + case 8: movq(dst, src); break; #endif - case ~4: // fall through: - case 4: movl( dst, src ); break; - case ~2: load_signed_short( dst, src ); break; - case 2: load_unsigned_short( dst, src ); break; - case ~1: load_signed_byte( dst, src ); break; - case 1: load_unsigned_byte( dst, src ); break; - default: ShouldNotReachHere(); + case 4: movl(dst, src); break; + case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; + case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; + default: ShouldNotReachHere(); } } @@ -7158,7 +7162,7 @@ subptr(t1, typeArrayOopDesc::header_size(T_INT)); addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint))); - movptr(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); + movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); // set klass to intArrayKlass // dubious reloc why not an oop reloc? movptr(t1, ExternalAddress((address) Universe::intArrayKlassObj_addr())); @@ -7575,21 +7579,27 @@ // Scan RCX words at [RDI] for an occurrence of RAX. // Set NZ/Z based on last compare. + // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does + // not change flags (only scas instruction which is repeated sets flags). + // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. #ifdef _LP64 // This part is tricky, as values in supers array could be 32 or 64 bit wide // and we store values in objArrays always encoded, thus we need to encode // the value of rax before repne. Note that rax is dead after the repne. if (UseCompressedOops) { - encode_heap_oop_not_null(rax); + encode_heap_oop_not_null(rax); // Changes flags. // The superclass is never null; it would be a basic system error if a null // pointer were to sneak in here. Note that we have already loaded the // Klass::super_check_offset from the super_klass in the fast path, // so if there is a null in that register, we are already in the afterlife. + testl(rax,rax); // Set Z = 0 repne_scanl(); } else #endif // _LP64 + { + testptr(rax,rax); // Set Z = 0 repne_scan(); - + } // Unspill the temp. registers: if (pushed_rdi) pop(rdi); if (pushed_rcx) pop(rcx); @@ -7650,6 +7660,9 @@ // Pass register number to verify_oop_subroutine char* b = new char[strlen(s) + 50]; sprintf(b, "verify_oop: %s: %s", reg->name(), s); +#ifdef _LP64 + push(rscratch1); // save r10, trashed by movptr() +#endif push(rax); // save rax, push(reg); // pass register argument ExternalAddress buffer((address) b); @@ -7660,6 +7673,7 @@ // call indirectly to solve generation ordering problem movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); call(rax); + // Caller pops the arguments (oop, message) and restores rax, r10 } @@ -7674,11 +7688,19 @@ movptr(tmp, ExternalAddress((address) delayed_value_addr)); #ifdef ASSERT - Label L; - testptr(tmp, tmp); - jccb(Assembler::notZero, L); - hlt(); - bind(L); + { Label L; + testptr(tmp, tmp); + if (WizardMode) { + jcc(Assembler::notZero, L); + char* buf = new char[40]; + sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]); + stop(buf); + } else { + jccb(Assembler::notZero, L); + hlt(); + } + bind(L); + } #endif if (offset != 0) @@ -7695,9 +7717,14 @@ void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg, Register temp_reg, Label& wrong_method_type) { - if (UseCompressedOops) unimplemented(); // field accesses must decode + Address type_addr(mh_reg, delayed_value(java_dyn_MethodHandle::type_offset_in_bytes, temp_reg)); // compare method type against that of the receiver - cmpptr(mtype_reg, Address(mh_reg, delayed_value(java_dyn_MethodHandle::type_offset_in_bytes, temp_reg))); + if (UseCompressedOops) { + load_heap_oop(temp_reg, type_addr); + cmpptr(mtype_reg, temp_reg); + } else { + cmpptr(mtype_reg, type_addr); + } jcc(Assembler::notEqual, wrong_method_type); } @@ -7708,15 +7735,15 @@ // method handle's MethodType. This macro hides the distinction. void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg, Register temp_reg) { - if (UseCompressedOops) unimplemented(); // field accesses must decode + assert_different_registers(vmslots_reg, mh_reg, temp_reg); // load mh.type.form.vmslots if (java_dyn_MethodHandle::vmslots_offset_in_bytes() != 0) { // hoist vmslots into every mh to avoid dependent load chain movl(vmslots_reg, Address(mh_reg, delayed_value(java_dyn_MethodHandle::vmslots_offset_in_bytes, temp_reg))); } else { Register temp2_reg = vmslots_reg; - movptr(temp2_reg, Address(mh_reg, delayed_value(java_dyn_MethodHandle::type_offset_in_bytes, temp_reg))); - movptr(temp2_reg, Address(temp2_reg, delayed_value(java_dyn_MethodType::form_offset_in_bytes, temp_reg))); + load_heap_oop(temp2_reg, Address(mh_reg, delayed_value(java_dyn_MethodHandle::type_offset_in_bytes, temp_reg))); + load_heap_oop(temp2_reg, Address(temp2_reg, delayed_value(java_dyn_MethodType::form_offset_in_bytes, temp_reg))); movl(vmslots_reg, Address(temp2_reg, delayed_value(java_dyn_MethodTypeForm::vmslots_offset_in_bytes, temp_reg))); } } @@ -7730,9 +7757,8 @@ assert(mh_reg == rcx, "caller must put MH object in rcx"); assert_different_registers(mh_reg, temp_reg); - if (UseCompressedOops) unimplemented(); // field accesses must decode - // pick out the interpreted side of the handler + // NOTE: vmentry is not an oop! movptr(temp_reg, Address(mh_reg, delayed_value(java_dyn_MethodHandle::vmentry_offset_in_bytes, temp_reg))); // off we go... @@ -7746,7 +7772,7 @@ Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, int extra_slot_offset) { // cf. TemplateTable::prepare_invoke(), if (load_receiver). - int stackElementSize = Interpreter::stackElementSize(); + int stackElementSize = Interpreter::stackElementSize; int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); #ifdef ASSERT int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); @@ -7773,6 +7799,9 @@ char* b = new char[strlen(s) + 50]; sprintf(b, "verify_oop_addr: %s", s); +#ifdef _LP64 + push(rscratch1); // save r10, trashed by movptr() +#endif push(rax); // save rax, // addr may contain rsp so we will have to adjust it based on the push // we just did @@ -7795,7 +7824,7 @@ // call indirectly to solve generation ordering problem movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); call(rax); - // Caller pops the arguments and restores rax, from the stack + // Caller pops the arguments (addr, message) and restores rax, r10. } void MacroAssembler::verify_tlab() { @@ -7977,7 +8006,7 @@ case 2: return "special"; case 3: return "empty"; } - ShouldNotReachHere() + ShouldNotReachHere(); return NULL; } @@ -8191,9 +8220,14 @@ assert (Universe::heap() != NULL, "java heap should be initialized"); movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); if (Universe::narrow_oop_shift() != 0) { - assert(Address::times_8 == LogMinObjAlignmentInBytes && - Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong"); - movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (LogMinObjAlignmentInBytes == Address::times_8) { + movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + } else { + // OK to use shift since we don't need to preserve flags. + shlq(dst, LogMinObjAlignmentInBytes); + movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + } } else { movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); } @@ -8215,6 +8249,40 @@ movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); } +void MacroAssembler::load_heap_oop(Register dst, Address src) { +#ifdef _LP64 + if (UseCompressedOops) { + movl(dst, src); + decode_heap_oop(dst); + } else +#endif + movptr(dst, src); +} + +void MacroAssembler::store_heap_oop(Address dst, Register src) { +#ifdef _LP64 + if (UseCompressedOops) { + assert(!dst.uses(src), "not enough registers"); + encode_heap_oop(src); + movl(dst, src); + } else +#endif + movptr(dst, src); +} + +// Used for storing NULLs. +void MacroAssembler::store_heap_oop_null(Address dst) { +#ifdef _LP64 + if (UseCompressedOops) { + movl(dst, (int32_t)NULL_WORD); + } else { + movslq(dst, (int32_t)NULL_WORD); + } +#else + movl(dst, (int32_t)NULL_WORD); +#endif +} + #ifdef _LP64 void MacroAssembler::store_klass_gap(Register dst, Register src) { if (UseCompressedOops) { @@ -8223,58 +8291,35 @@ } } -void MacroAssembler::load_heap_oop(Register dst, Address src) { - if (UseCompressedOops) { - movl(dst, src); - decode_heap_oop(dst); - } else { - movq(dst, src); - } -} - -void MacroAssembler::store_heap_oop(Address dst, Register src) { - if (UseCompressedOops) { - assert(!dst.uses(src), "not enough registers"); - encode_heap_oop(src); - movl(dst, src); - } else { - movq(dst, src); - } -} - -// Used for storing NULLs. -void MacroAssembler::store_heap_oop_null(Address dst) { - if (UseCompressedOops) { - movl(dst, (int32_t)NULL_WORD); - } else { - movslq(dst, (int32_t)NULL_WORD); - } -} +#ifdef ASSERT +void MacroAssembler::verify_heapbase(const char* msg) { + assert (UseCompressedOops, "should be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + if (CheckCompressedOops) { + Label ok; + push(rscratch1); // cmpptr trashes rscratch1 + cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); + jcc(Assembler::equal, ok); + stop(msg); + bind(ok); + pop(rscratch1); + } +} +#endif // Algorithm must match oop.inline.hpp encode_heap_oop. void MacroAssembler::encode_heap_oop(Register r) { - assert (UseCompressedOops, "should be compressed"); - assert (Universe::heap() != NULL, "java heap should be initialized"); +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); +#endif + verify_oop(r, "broken oop in encode_heap_oop"); if (Universe::narrow_oop_base() == NULL) { - verify_oop(r, "broken oop in encode_heap_oop"); if (Universe::narrow_oop_shift() != 0) { assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); shrq(r, LogMinObjAlignmentInBytes); } return; } -#ifdef ASSERT - if (CheckCompressedOops) { - Label ok; - push(rscratch1); // cmpptr trashes rscratch1 - cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); - jcc(Assembler::equal, ok); - stop("MacroAssembler::encode_heap_oop: heap base corrupted?"); - bind(ok); - pop(rscratch1); - } -#endif - verify_oop(r, "broken oop in encode_heap_oop"); testq(r, r); cmovq(Assembler::equal, r, r12_heapbase); subq(r, r12_heapbase); @@ -8282,9 +8327,8 @@ } void MacroAssembler::encode_heap_oop_not_null(Register r) { - assert (UseCompressedOops, "should be compressed"); - assert (Universe::heap() != NULL, "java heap should be initialized"); #ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); if (CheckCompressedOops) { Label ok; testq(r, r); @@ -8304,9 +8348,8 @@ } void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { - assert (UseCompressedOops, "should be compressed"); - assert (Universe::heap() != NULL, "java heap should be initialized"); #ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); if (CheckCompressedOops) { Label ok; testq(src, src); @@ -8329,72 +8372,67 @@ } void MacroAssembler::decode_heap_oop(Register r) { - assert (UseCompressedOops, "should be compressed"); - assert (Universe::heap() != NULL, "java heap should be initialized"); +#ifdef ASSERT + verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); +#endif if (Universe::narrow_oop_base() == NULL) { if (Universe::narrow_oop_shift() != 0) { assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); shlq(r, LogMinObjAlignmentInBytes); } - verify_oop(r, "broken oop in decode_heap_oop"); - return; - } -#ifdef ASSERT - if (CheckCompressedOops) { - Label ok; - push(rscratch1); - cmpptr(r12_heapbase, - ExternalAddress((address)Universe::narrow_oop_base_addr())); - jcc(Assembler::equal, ok); - stop("MacroAssembler::decode_heap_oop: heap base corrupted?"); - bind(ok); - pop(rscratch1); - } -#endif - - Label done; - shlq(r, LogMinObjAlignmentInBytes); - jccb(Assembler::equal, done); - addq(r, r12_heapbase); -#if 0 - // alternate decoding probably a wash. - testq(r, r); - jccb(Assembler::equal, done); - leaq(r, Address(r12_heapbase, r, Address::times_8, 0)); -#endif - bind(done); + } else { + Label done; + shlq(r, LogMinObjAlignmentInBytes); + jccb(Assembler::equal, done); + addq(r, r12_heapbase); + bind(done); + } verify_oop(r, "broken oop in decode_heap_oop"); } void MacroAssembler::decode_heap_oop_not_null(Register r) { + // Note: it will change flags assert (UseCompressedOops, "should only be used for compressed headers"); assert (Universe::heap() != NULL, "java heap should be initialized"); // Cannot assert, unverified entry point counts instructions (see .ad file) // vtableStubs also counts instructions in pd_code_size_limit. // Also do not verify_oop as this is called by verify_oop. if (Universe::narrow_oop_shift() != 0) { - assert (Address::times_8 == LogMinObjAlignmentInBytes && - Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong"); - // Don't use Shift since it modifies flags. - leaq(r, Address(r12_heapbase, r, Address::times_8, 0)); + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shlq(r, LogMinObjAlignmentInBytes); + if (Universe::narrow_oop_base() != NULL) { + addq(r, r12_heapbase); + } } else { assert (Universe::narrow_oop_base() == NULL, "sanity"); } } void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { + // Note: it will change flags assert (UseCompressedOops, "should only be used for compressed headers"); assert (Universe::heap() != NULL, "java heap should be initialized"); // Cannot assert, unverified entry point counts instructions (see .ad file) // vtableStubs also counts instructions in pd_code_size_limit. // Also do not verify_oop as this is called by verify_oop. if (Universe::narrow_oop_shift() != 0) { - assert (Address::times_8 == LogMinObjAlignmentInBytes && - Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong"); - leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); - } else if (dst != src) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (LogMinObjAlignmentInBytes == Address::times_8) { + leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); + } else { + if (dst != src) { + movq(dst, src); + } + shlq(dst, LogMinObjAlignmentInBytes); + if (Universe::narrow_oop_base() != NULL) { + addq(dst, r12_heapbase); + } + } + } else { assert (Universe::narrow_oop_base() == NULL, "sanity"); - movq(dst, src); + if (dst != src) { + movq(dst, src); + } } } @@ -8757,6 +8795,186 @@ bind(DONE); } +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) block_comment(str) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") +void MacroAssembler::generate_fill(BasicType t, bool aligned, + Register to, Register value, Register count, + Register rtmp, XMMRegister xtmp) { + assert_different_registers(to, value, count, rtmp); + Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; + Label L_fill_2_bytes, L_fill_4_bytes; + + int shift = -1; + switch (t) { + case T_BYTE: + shift = 2; + break; + case T_SHORT: + shift = 1; + break; + case T_INT: + shift = 0; + break; + default: ShouldNotReachHere(); + } + + if (t == T_BYTE) { + andl(value, 0xff); + movl(rtmp, value); + shll(rtmp, 8); + orl(value, rtmp); + } + if (t == T_SHORT) { + andl(value, 0xffff); + } + if (t == T_BYTE || t == T_SHORT) { + movl(rtmp, value); + shll(rtmp, 16); + orl(value, rtmp); + } + + cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element + jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp + if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { + // align source address at 4 bytes address boundary + if (t == T_BYTE) { + // One byte misalignment happens only for byte arrays + testptr(to, 1); + jccb(Assembler::zero, L_skip_align1); + movb(Address(to, 0), value); + increment(to); + decrement(count); + BIND(L_skip_align1); + } + // Two bytes misalignment happens only for byte and short (char) arrays + testptr(to, 2); + jccb(Assembler::zero, L_skip_align2); + movw(Address(to, 0), value); + addptr(to, 2); + subl(count, 1<<(shift-1)); + BIND(L_skip_align2); + } + if (UseSSE < 2) { + Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; + // Fill 32-byte chunks + subl(count, 8 << shift); + jcc(Assembler::less, L_check_fill_8_bytes); + align(16); + + BIND(L_fill_32_bytes_loop); + + for (int i = 0; i < 32; i += 4) { + movl(Address(to, i), value); + } + + addptr(to, 32); + subl(count, 8 << shift); + jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); + BIND(L_check_fill_8_bytes); + addl(count, 8 << shift); + jccb(Assembler::zero, L_exit); + jmpb(L_fill_8_bytes); + + // + // length is too short, just fill qwords + // + BIND(L_fill_8_bytes_loop); + movl(Address(to, 0), value); + movl(Address(to, 4), value); + addptr(to, 8); + BIND(L_fill_8_bytes); + subl(count, 1 << (shift + 1)); + jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); + // fall through to fill 4 bytes + } else { + Label L_fill_32_bytes; + if (!UseUnalignedLoadStores) { + // align to 8 bytes, we know we are 4 byte aligned to start + testptr(to, 4); + jccb(Assembler::zero, L_fill_32_bytes); + movl(Address(to, 0), value); + addptr(to, 4); + subl(count, 1<<shift); + } + BIND(L_fill_32_bytes); + { + assert( UseSSE >= 2, "supported cpu only" ); + Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; + // Fill 32-byte chunks + movdl(xtmp, value); + pshufd(xtmp, xtmp, 0); + + subl(count, 8 << shift); + jcc(Assembler::less, L_check_fill_8_bytes); + align(16); + + BIND(L_fill_32_bytes_loop); + + if (UseUnalignedLoadStores) { + movdqu(Address(to, 0), xtmp); + movdqu(Address(to, 16), xtmp); + } else { + movq(Address(to, 0), xtmp); + movq(Address(to, 8), xtmp); + movq(Address(to, 16), xtmp); + movq(Address(to, 24), xtmp); + } + + addptr(to, 32); + subl(count, 8 << shift); + jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); + BIND(L_check_fill_8_bytes); + addl(count, 8 << shift); + jccb(Assembler::zero, L_exit); + jmpb(L_fill_8_bytes); + + // + // length is too short, just fill qwords + // + BIND(L_fill_8_bytes_loop); + movq(Address(to, 0), xtmp); + addptr(to, 8); + BIND(L_fill_8_bytes); + subl(count, 1 << (shift + 1)); + jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); + } + } + // fill trailing 4 bytes + BIND(L_fill_4_bytes); + testl(count, 1<<shift); + jccb(Assembler::zero, L_fill_2_bytes); + movl(Address(to, 0), value); + if (t == T_BYTE || t == T_SHORT) { + addptr(to, 4); + BIND(L_fill_2_bytes); + // fill trailing 2 bytes + testl(count, 1<<(shift-1)); + jccb(Assembler::zero, L_fill_byte); + movw(Address(to, 0), value); + if (t == T_BYTE) { + addptr(to, 2); + BIND(L_fill_byte); + // fill trailing byte + testl(count, 1); + jccb(Assembler::zero, L_exit); + movb(Address(to, 0), value); + } else { + BIND(L_fill_byte); + } + } else { + BIND(L_fill_2_bytes); + } + BIND(L_exit); +} +#undef BIND +#undef BLOCK_COMMENT + + Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { switch (cond) { // Note some conditions are synonyms for others