Mercurial > hg > truffle
diff src/os_cpu/solaris_x86/vm/solaris_x86_64.s @ 0:a61af66fc99e jdk7-b24
Initial load
author | duke |
---|---|
date | Sat, 01 Dec 2007 00:00:00 +0000 |
parents | |
children | c18cbe5936b8 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/os_cpu/solaris_x86/vm/solaris_x86_64.s Sat Dec 01 00:00:00 2007 +0000 @@ -0,0 +1,406 @@ +/ +/ Copyright 2004-2005 Sun Microsystems, Inc. All Rights Reserved. +/ DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +/ +/ This code is free software; you can redistribute it and/or modify it +/ under the terms of the GNU General Public License version 2 only, as +/ published by the Free Software Foundation. +/ +/ This code is distributed in the hope that it will be useful, but WITHOUT +/ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +/ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +/ version 2 for more details (a copy is included in the LICENSE file that +/ accompanied this code). +/ +/ You should have received a copy of the GNU General Public License version +/ 2 along with this work; if not, write to the Free Software Foundation, +/ Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +/ +/ Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, +/ CA 95054 USA or visit www.sun.com if you need additional information or +/ have any questions. +/ + + .globl fs_load + .globl fs_thread + + // NOTE WELL! The _Copy functions are called directly + // from server-compiler-generated code via CallLeafNoFP, + // which means that they *must* either not use floating + // point or use it in the same manner as does the server + // compiler. + + .globl _Copy_arrayof_conjoint_bytes + .globl _Copy_conjoint_jshorts_atomic + .globl _Copy_arrayof_conjoint_jshorts + .globl _Copy_conjoint_jints_atomic + .globl _Copy_arrayof_conjoint_jints + .globl _Copy_conjoint_jlongs_atomic + .globl _Copy_arrayof_conjoint_jlongs + + .section .text,"ax" + + / Fast thread accessors, used by threadLS_solaris_amd64.cpp + .align 16 +fs_load: + movq %fs:(%rdi),%rax + ret + + .align 16 +fs_thread: + movq %fs:0x0,%rax + ret + + .globl SafeFetch32, Fetch32PFI, Fetch32Resume + .align 16 + // Prototype: int SafeFetch32 (int * Adr, int ErrValue) +SafeFetch32: + movl %esi, %eax +Fetch32PFI: + movl (%rdi), %eax +Fetch32Resume: + ret + + .globl SafeFetchN, FetchNPFI, FetchNResume + .align 16 + // Prototype: intptr_t SafeFetchN (intptr_t * Adr, intptr_t ErrValue) +SafeFetchN: + movq %rsi, %rax +FetchNPFI: + movq (%rdi), %rax +FetchNResume: + ret + + .globl SpinPause + .align 16 +SpinPause: + rep + nop + movq $1, %rax + ret + + + / Support for void Copy::arrayof_conjoint_bytes(void* from, + / void* to, + / size_t count) + / rdi - from + / rsi - to + / rdx - count, treated as ssize_t + / + .align 16 +_Copy_arrayof_conjoint_bytes: + movq %rdx,%r8 / byte count + shrq $3,%rdx / qword count + cmpq %rdi,%rsi + leaq -1(%rdi,%r8,1),%rax / from + bcount*1 - 1 + jbe acb_CopyRight + cmpq %rax,%rsi + jbe acb_CopyLeft +acb_CopyRight: + leaq -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8 + leaq -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8 + negq %rdx + jmp 7f + .align 16 +1: movq 8(%rax,%rdx,8),%rsi + movq %rsi,8(%rcx,%rdx,8) + addq $1,%rdx + jnz 1b +2: testq $4,%r8 / check for trailing dword + jz 3f + movl 8(%rax),%esi / copy trailing dword + movl %esi,8(%rcx) + addq $4,%rax + addq $4,%rcx / original %rsi is trashed, so we + / can't use it as a base register +3: testq $2,%r8 / check for trailing word + jz 4f + movw 8(%rax),%si / copy trailing word + movw %si,8(%rcx) + addq $2,%rcx +4: testq $1,%r8 / check for trailing byte + jz 5f + movb -1(%rdi,%r8,1),%al / copy trailing byte + movb %al,8(%rcx) +5: ret + .align 16 +6: movq -24(%rax,%rdx,8),%rsi + movq %rsi,-24(%rcx,%rdx,8) + movq -16(%rax,%rdx,8),%rsi + movq %rsi,-16(%rcx,%rdx,8) + movq -8(%rax,%rdx,8),%rsi + movq %rsi,-8(%rcx,%rdx,8) + movq (%rax,%rdx,8),%rsi + movq %rsi,(%rcx,%rdx,8) +7: addq $4,%rdx + jle 6b + subq $4,%rdx + jl 1b + jmp 2b +acb_CopyLeft: + testq $1,%r8 / check for trailing byte + jz 1f + movb -1(%rdi,%r8,1),%cl / copy trailing byte + movb %cl,-1(%rsi,%r8,1) + subq $1,%r8 / adjust for possible trailing word +1: testq $2,%r8 / check for trailing word + jz 2f + movw -2(%rdi,%r8,1),%cx / copy trailing word + movw %cx,-2(%rsi,%r8,1) +2: testq $4,%r8 / check for trailing dword + jz 5f + movl (%rdi,%rdx,8),%ecx / copy trailing dword + movl %ecx,(%rsi,%rdx,8) + jmp 5f + .align 16 +3: movq -8(%rdi,%rdx,8),%rcx + movq %rcx,-8(%rsi,%rdx,8) + subq $1,%rdx + jnz 3b + ret + .align 16 +4: movq 24(%rdi,%rdx,8),%rcx + movq %rcx,24(%rsi,%rdx,8) + movq 16(%rdi,%rdx,8),%rcx + movq %rcx,16(%rsi,%rdx,8) + movq 8(%rdi,%rdx,8),%rcx + movq %rcx,8(%rsi,%rdx,8) + movq (%rdi,%rdx,8),%rcx + movq %rcx,(%rsi,%rdx,8) +5: subq $4,%rdx + jge 4b + addq $4,%rdx + jg 3b + ret + + / Support for void Copy::arrayof_conjoint_jshorts(void* from, + / void* to, + / size_t count) + / Equivalent to + / conjoint_jshorts_atomic + / + / If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we + / let the hardware handle it. The tow or four words within dwords + / or qwords that span cache line boundaries will still be loaded + / and stored atomically. + / + / rdi - from + / rsi - to + / rdx - count, treated as ssize_t + / + .align 16 +_Copy_arrayof_conjoint_jshorts: +_Copy_conjoint_jshorts_atomic: + movq %rdx,%r8 / word count + shrq $2,%rdx / qword count + cmpq %rdi,%rsi + leaq -2(%rdi,%r8,2),%rax / from + wcount*2 - 2 + jbe acs_CopyRight + cmpq %rax,%rsi + jbe acs_CopyLeft +acs_CopyRight: + leaq -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8 + leaq -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8 + negq %rdx + jmp 6f +1: movq 8(%rax,%rdx,8),%rsi + movq %rsi,8(%rcx,%rdx,8) + addq $1,%rdx + jnz 1b +2: testq $2,%r8 / check for trailing dword + jz 3f + movl 8(%rax),%esi / copy trailing dword + movl %esi,8(%rcx) + addq $4,%rcx / original %rsi is trashed, so we + / can't use it as a base register +3: testq $1,%r8 / check for trailing word + jz 4f + movw -2(%rdi,%r8,2),%si / copy trailing word + movw %si,8(%rcx) +4: ret + .align 16 +5: movq -24(%rax,%rdx,8),%rsi + movq %rsi,-24(%rcx,%rdx,8) + movq -16(%rax,%rdx,8),%rsi + movq %rsi,-16(%rcx,%rdx,8) + movq -8(%rax,%rdx,8),%rsi + movq %rsi,-8(%rcx,%rdx,8) + movq (%rax,%rdx,8),%rsi + movq %rsi,(%rcx,%rdx,8) +6: addq $4,%rdx + jle 5b + subq $4,%rdx + jl 1b + jmp 2b +acs_CopyLeft: + testq $1,%r8 / check for trailing word + jz 1f + movw -2(%rdi,%r8,2),%cx / copy trailing word + movw %cx,-2(%rsi,%r8,2) +1: testq $2,%r8 / check for trailing dword + jz 4f + movl (%rdi,%rdx,8),%ecx / copy trailing dword + movl %ecx,(%rsi,%rdx,8) + jmp 4f +2: movq -8(%rdi,%rdx,8),%rcx + movq %rcx,-8(%rsi,%rdx,8) + subq $1,%rdx + jnz 2b + ret + .align 16 +3: movq 24(%rdi,%rdx,8),%rcx + movq %rcx,24(%rsi,%rdx,8) + movq 16(%rdi,%rdx,8),%rcx + movq %rcx,16(%rsi,%rdx,8) + movq 8(%rdi,%rdx,8),%rcx + movq %rcx,8(%rsi,%rdx,8) + movq (%rdi,%rdx,8),%rcx + movq %rcx,(%rsi,%rdx,8) +4: subq $4,%rdx + jge 3b + addq $4,%rdx + jg 2b + ret + + / Support for void Copy::arrayof_conjoint_jints(jint* from, + / jint* to, + / size_t count) + / Equivalent to + / conjoint_jints_atomic + / + / If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + / the hardware handle it. The two dwords within qwords that span + / cache line boundaries will still be loaded and stored atomically. + / + / rdi - from + / rsi - to + / rdx - count, treated as ssize_t + / + .align 16 +_Copy_arrayof_conjoint_jints: +_Copy_conjoint_jints_atomic: + movq %rdx,%r8 / dword count + shrq %rdx / qword count + cmpq %rdi,%rsi + leaq -4(%rdi,%r8,4),%rax / from + dcount*4 - 4 + jbe aci_CopyRight + cmpq %rax,%rsi + jbe aci_CopyLeft +aci_CopyRight: + leaq -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8 + leaq -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8 + negq %rdx + jmp 5f + .align 16 +1: movq 8(%rax,%rdx,8),%rsi + movq %rsi,8(%rcx,%rdx,8) + addq $1,%rdx + jnz 1b +2: testq $1,%r8 / check for trailing dword + jz 3f + movl 8(%rax),%esi / copy trailing dword + movl %esi,8(%rcx) +3: ret + .align 16 +4: movq -24(%rax,%rdx,8),%rsi + movq %rsi,-24(%rcx,%rdx,8) + movq -16(%rax,%rdx,8),%rsi + movq %rsi,-16(%rcx,%rdx,8) + movq -8(%rax,%rdx,8),%rsi + movq %rsi,-8(%rcx,%rdx,8) + movq (%rax,%rdx,8),%rsi + movq %rsi,(%rcx,%rdx,8) +5: addq $4,%rdx + jle 4b + subq $4,%rdx + jl 1b + jmp 2b +aci_CopyLeft: + testq $1,%r8 / check for trailing dword + jz 3f + movl -4(%rdi,%r8,4),%ecx / copy trailing dword + movl %ecx,-4(%rsi,%r8,4) + jmp 3f +1: movq -8(%rdi,%rdx,8),%rcx + movq %rcx,-8(%rsi,%rdx,8) + subq $1,%rdx + jnz 1b + ret + .align 16 +2: movq 24(%rdi,%rdx,8),%rcx + movq %rcx,24(%rsi,%rdx,8) + movq 16(%rdi,%rdx,8),%rcx + movq %rcx,16(%rsi,%rdx,8) + movq 8(%rdi,%rdx,8),%rcx + movq %rcx,8(%rsi,%rdx,8) + movq (%rdi,%rdx,8),%rcx + movq %rcx,(%rsi,%rdx,8) +3: subq $4,%rdx + jge 2b + addq $4,%rdx + jg 1b + ret + + / Support for void Copy::arrayof_conjoint_jlongs(jlong* from, + / jlong* to, + / size_t count) + / Equivalent to + / conjoint_jlongs_atomic + / arrayof_conjoint_oops + / conjoint_oops_atomic + / + / rdi - from + / rsi - to + / rdx - count, treated as ssize_t + / + .align 16 +_Copy_arrayof_conjoint_jlongs: +_Copy_conjoint_jlongs_atomic: + cmpq %rdi,%rsi + leaq -8(%rdi,%rdx,8),%rax / from + count*8 - 8 + jbe acl_CopyRight + cmpq %rax,%rsi + jbe acl_CopyLeft +acl_CopyRight: + leaq -8(%rsi,%rdx,8),%rcx / to + count*8 - 8 + negq %rdx + jmp 3f +1: movq 8(%rax,%rdx,8),%rsi + movq %rsi,8(%rcx,%rdx,8) + addq $1,%rdx + jnz 1b + ret + .align 16 +2: movq -24(%rax,%rdx,8),%rsi + movq %rsi,-24(%rcx,%rdx,8) + movq -16(%rax,%rdx,8),%rsi + movq %rsi,-16(%rcx,%rdx,8) + movq -8(%rax,%rdx,8),%rsi + movq %rsi,-8(%rcx,%rdx,8) + movq (%rax,%rdx,8),%rsi + movq %rsi,(%rcx,%rdx,8) +3: addq $4,%rdx + jle 2b + subq $4,%rdx + jl 1b + ret +4: movq -8(%rdi,%rdx,8),%rcx + movq %rcx,-8(%rsi,%rdx,8) + subq $1,%rdx + jnz 4b + ret + .align 16 +5: movq 24(%rdi,%rdx,8),%rcx + movq %rcx,24(%rsi,%rdx,8) + movq 16(%rdi,%rdx,8),%rcx + movq %rcx,16(%rsi,%rdx,8) + movq 8(%rdi,%rdx,8),%rcx + movq %rcx,8(%rsi,%rdx,8) + movq (%rdi,%rdx,8),%rcx + movq %rcx,(%rsi,%rdx,8) +acl_CopyLeft: + subq $4,%rdx + jge 5b + addq $4,%rdx + jg 4b + ret