Mercurial > hg > graal-jvmci-8
diff src/os_cpu/linux_x86/vm/linux_x86_64.s @ 0:a61af66fc99e jdk7-b24
Initial load
author | duke |
---|---|
date | Sat, 01 Dec 2007 00:00:00 +0000 |
parents | |
children | c18cbe5936b8 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/os_cpu/linux_x86/vm/linux_x86_64.s Sat Dec 01 00:00:00 2007 +0000 @@ -0,0 +1,402 @@ +# +# Copyright 2004-2007 Sun Microsystems, Inc. All Rights Reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, +# CA 95054 USA or visit www.sun.com if you need additional information or +# have any questions. +# + + + # NOTE WELL! The _Copy functions are called directly + # from server-compiler-generated code via CallLeafNoFP, + # which means that they *must* either not use floating + # point or use it in the same manner as does the server + # compiler. + + .globl _Copy_arrayof_conjoint_bytes + .globl _Copy_arrayof_conjoint_jshorts + .globl _Copy_conjoint_jshorts_atomic + .globl _Copy_arrayof_conjoint_jints + .globl _Copy_conjoint_jints_atomic + .globl _Copy_arrayof_conjoint_jlongs + .globl _Copy_conjoint_jlongs_atomic + + .text + + .globl SafeFetch32, Fetch32PFI, Fetch32Resume + .align 16 + .type SafeFetch32,@function + // Prototype: int SafeFetch32 (int * Adr, int ErrValue) +SafeFetch32: + movl %esi, %eax +Fetch32PFI: + movl (%rdi), %eax +Fetch32Resume: + ret + + .globl SafeFetchN, FetchNPFI, FetchNResume + .align 16 + .type SafeFetchN,@function + // Prototype: intptr_t SafeFetchN (intptr_t * Adr, intptr_t ErrValue) +SafeFetchN: + movq %rsi, %rax +FetchNPFI: + movq (%rdi), %rax +FetchNResume: + ret + + .globl SpinPause + .align 16 + .type SpinPause,@function +SpinPause: + rep + nop + movq $1, %rax + ret + + # Support for void Copy::arrayof_conjoint_bytes(void* from, + # void* to, + # size_t count) + # rdi - from + # rsi - to + # rdx - count, treated as ssize_t + # + .p2align 4,,15 + .type _Copy_arrayof_conjoint_bytes,@function +_Copy_arrayof_conjoint_bytes: + movq %rdx,%r8 # byte count + shrq $3,%rdx # qword count + cmpq %rdi,%rsi + leaq -1(%rdi,%r8,1),%rax # from + bcount*1 - 1 + jbe acb_CopyRight + cmpq %rax,%rsi + jbe acb_CopyLeft +acb_CopyRight: + leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8 + leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8 + negq %rdx + jmp 7f + .p2align 4,,15 +1: movq 8(%rax,%rdx,8),%rsi + movq %rsi,8(%rcx,%rdx,8) + addq $1,%rdx + jnz 1b +2: testq $4,%r8 # check for trailing dword + jz 3f + movl 8(%rax),%esi # copy trailing dword + movl %esi,8(%rcx) + addq $4,%rax + addq $4,%rcx # original %rsi is trashed, so we + # can't use it as a base register +3: testq $2,%r8 # check for trailing word + jz 4f + movw 8(%rax),%si # copy trailing word + movw %si,8(%rcx) + addq $2,%rcx +4: testq $1,%r8 # check for trailing byte + jz 5f + movb -1(%rdi,%r8,1),%al # copy trailing byte + movb %al,8(%rcx) +5: ret + .p2align 4,,15 +6: movq -24(%rax,%rdx,8),%rsi + movq %rsi,-24(%rcx,%rdx,8) + movq -16(%rax,%rdx,8),%rsi + movq %rsi,-16(%rcx,%rdx,8) + movq -8(%rax,%rdx,8),%rsi + movq %rsi,-8(%rcx,%rdx,8) + movq (%rax,%rdx,8),%rsi + movq %rsi,(%rcx,%rdx,8) +7: addq $4,%rdx + jle 6b + subq $4,%rdx + jl 1b + jmp 2b +acb_CopyLeft: + testq $1,%r8 # check for trailing byte + jz 1f + movb -1(%rdi,%r8,1),%cl # copy trailing byte + movb %cl,-1(%rsi,%r8,1) + subq $1,%r8 # adjust for possible trailing word +1: testq $2,%r8 # check for trailing word + jz 2f + movw -2(%rdi,%r8,1),%cx # copy trailing word + movw %cx,-2(%rsi,%r8,1) +2: testq $4,%r8 # check for trailing dword + jz 5f + movl (%rdi,%rdx,8),%ecx # copy trailing dword + movl %ecx,(%rsi,%rdx,8) + jmp 5f + .p2align 4,,15 +3: movq -8(%rdi,%rdx,8),%rcx + movq %rcx,-8(%rsi,%rdx,8) + subq $1,%rdx + jnz 3b + ret + .p2align 4,,15 +4: movq 24(%rdi,%rdx,8),%rcx + movq %rcx,24(%rsi,%rdx,8) + movq 16(%rdi,%rdx,8),%rcx + movq %rcx,16(%rsi,%rdx,8) + movq 8(%rdi,%rdx,8),%rcx + movq %rcx,8(%rsi,%rdx,8) + movq (%rdi,%rdx,8),%rcx + movq %rcx,(%rsi,%rdx,8) +5: subq $4,%rdx + jge 4b + addq $4,%rdx + jg 3b + ret + + # Support for void Copy::arrayof_conjoint_jshorts(void* from, + # void* to, + # size_t count) + # Equivalent to + # conjoint_jshorts_atomic + # + # If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we + # let the hardware handle it. The tow or four words within dwords + # or qwords that span cache line boundaries will still be loaded + # and stored atomically. + # + # rdi - from + # rsi - to + # rdx - count, treated as ssize_t + # + .p2align 4,,15 + .type _Copy_arrayof_conjoint_jshorts,@function + .type _Copy_conjoint_jshorts_atomic,@function +_Copy_arrayof_conjoint_jshorts: +_Copy_conjoint_jshorts_atomic: + movq %rdx,%r8 # word count + shrq $2,%rdx # qword count + cmpq %rdi,%rsi + leaq -2(%rdi,%r8,2),%rax # from + wcount*2 - 2 + jbe acs_CopyRight + cmpq %rax,%rsi + jbe acs_CopyLeft +acs_CopyRight: + leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8 + leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8 + negq %rdx + jmp 6f +1: movq 8(%rax,%rdx,8),%rsi + movq %rsi,8(%rcx,%rdx,8) + addq $1,%rdx + jnz 1b +2: testq $2,%r8 # check for trailing dword + jz 3f + movl 8(%rax),%esi # copy trailing dword + movl %esi,8(%rcx) + addq $4,%rcx # original %rsi is trashed, so we + # can't use it as a base register +3: testq $1,%r8 # check for trailing word + jz 4f + movw -2(%rdi,%r8,2),%si # copy trailing word + movw %si,8(%rcx) +4: ret + .p2align 4,,15 +5: movq -24(%rax,%rdx,8),%rsi + movq %rsi,-24(%rcx,%rdx,8) + movq -16(%rax,%rdx,8),%rsi + movq %rsi,-16(%rcx,%rdx,8) + movq -8(%rax,%rdx,8),%rsi + movq %rsi,-8(%rcx,%rdx,8) + movq (%rax,%rdx,8),%rsi + movq %rsi,(%rcx,%rdx,8) +6: addq $4,%rdx + jle 5b + subq $4,%rdx + jl 1b + jmp 2b +acs_CopyLeft: + testq $1,%r8 # check for trailing word + jz 1f + movw -2(%rdi,%r8,2),%cx # copy trailing word + movw %cx,-2(%rsi,%r8,2) +1: testq $2,%r8 # check for trailing dword + jz 4f + movl (%rdi,%rdx,8),%ecx # copy trailing dword + movl %ecx,(%rsi,%rdx,8) + jmp 4f +2: movq -8(%rdi,%rdx,8),%rcx + movq %rcx,-8(%rsi,%rdx,8) + subq $1,%rdx + jnz 2b + ret + .p2align 4,,15 +3: movq 24(%rdi,%rdx,8),%rcx + movq %rcx,24(%rsi,%rdx,8) + movq 16(%rdi,%rdx,8),%rcx + movq %rcx,16(%rsi,%rdx,8) + movq 8(%rdi,%rdx,8),%rcx + movq %rcx,8(%rsi,%rdx,8) + movq (%rdi,%rdx,8),%rcx + movq %rcx,(%rsi,%rdx,8) +4: subq $4,%rdx + jge 3b + addq $4,%rdx + jg 2b + ret + + # Support for void Copy::arrayof_conjoint_jints(jint* from, + # jint* to, + # size_t count) + # Equivalent to + # conjoint_jints_atomic + # + # If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + # the hardware handle it. The two dwords within qwords that span + # cache line boundaries will still be loaded and stored atomically. + # + # rdi - from + # rsi - to + # rdx - count, treated as ssize_t + # + .p2align 4,,15 + .type _Copy_arrayof_conjoint_jints,@function + .type _Copy_conjoint_jints_atomic,@function +_Copy_arrayof_conjoint_jints: +_Copy_conjoint_jints_atomic: + movq %rdx,%r8 # dword count + shrq %rdx # qword count + cmpq %rdi,%rsi + leaq -4(%rdi,%r8,4),%rax # from + dcount*4 - 4 + jbe aci_CopyRight + cmpq %rax,%rsi + jbe aci_CopyLeft +aci_CopyRight: + leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8 + leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8 + negq %rdx + jmp 5f + .p2align 4,,15 +1: movq 8(%rax,%rdx,8),%rsi + movq %rsi,8(%rcx,%rdx,8) + addq $1,%rdx + jnz 1b +2: testq $1,%r8 # check for trailing dword + jz 3f + movl 8(%rax),%esi # copy trailing dword + movl %esi,8(%rcx) +3: ret + .p2align 4,,15 +4: movq -24(%rax,%rdx,8),%rsi + movq %rsi,-24(%rcx,%rdx,8) + movq -16(%rax,%rdx,8),%rsi + movq %rsi,-16(%rcx,%rdx,8) + movq -8(%rax,%rdx,8),%rsi + movq %rsi,-8(%rcx,%rdx,8) + movq (%rax,%rdx,8),%rsi + movq %rsi,(%rcx,%rdx,8) +5: addq $4,%rdx + jle 4b + subq $4,%rdx + jl 1b + jmp 2b +aci_CopyLeft: + testq $1,%r8 # check for trailing dword + jz 3f + movl -4(%rdi,%r8,4),%ecx # copy trailing dword + movl %ecx,-4(%rsi,%r8,4) + jmp 3f +1: movq -8(%rdi,%rdx,8),%rcx + movq %rcx,-8(%rsi,%rdx,8) + subq $1,%rdx + jnz 1b + ret + .p2align 4,,15 +2: movq 24(%rdi,%rdx,8),%rcx + movq %rcx,24(%rsi,%rdx,8) + movq 16(%rdi,%rdx,8),%rcx + movq %rcx,16(%rsi,%rdx,8) + movq 8(%rdi,%rdx,8),%rcx + movq %rcx,8(%rsi,%rdx,8) + movq (%rdi,%rdx,8),%rcx + movq %rcx,(%rsi,%rdx,8) +3: subq $4,%rdx + jge 2b + addq $4,%rdx + jg 1b + ret + + # Support for void Copy::arrayof_conjoint_jlongs(jlong* from, + # jlong* to, + # size_t count) + # Equivalent to + # conjoint_jlongs_atomic + # arrayof_conjoint_oops + # conjoint_oops_atomic + # + # rdi - from + # rsi - to + # rdx - count, treated as ssize_t + # + .p2align 4,,15 + .type _Copy_arrayof_conjoint_jlongs,@function + .type _Copy_conjoint_jlongs_atomic,@function +_Copy_arrayof_conjoint_jlongs: +_Copy_conjoint_jlongs_atomic: + cmpq %rdi,%rsi + leaq -8(%rdi,%rdx,8),%rax # from + count*8 - 8 + jbe acl_CopyRight + cmpq %rax,%rsi + jbe acl_CopyLeft +acl_CopyRight: + leaq -8(%rsi,%rdx,8),%rcx # to + count*8 - 8 + negq %rdx + jmp 3f +1: movq 8(%rax,%rdx,8),%rsi + movq %rsi,8(%rcx,%rdx,8) + addq $1,%rdx + jnz 1b + ret + .p2align 4,,15 +2: movq -24(%rax,%rdx,8),%rsi + movq %rsi,-24(%rcx,%rdx,8) + movq -16(%rax,%rdx,8),%rsi + movq %rsi,-16(%rcx,%rdx,8) + movq -8(%rax,%rdx,8),%rsi + movq %rsi,-8(%rcx,%rdx,8) + movq (%rax,%rdx,8),%rsi + movq %rsi,(%rcx,%rdx,8) +3: addq $4,%rdx + jle 2b + subq $4,%rdx + jl 1b + ret +4: movq -8(%rdi,%rdx,8),%rcx + movq %rcx,-8(%rsi,%rdx,8) + subq $1,%rdx + jnz 4b + ret + .p2align 4,,15 +5: movq 24(%rdi,%rdx,8),%rcx + movq %rcx,24(%rsi,%rdx,8) + movq 16(%rdi,%rdx,8),%rcx + movq %rcx,16(%rsi,%rdx,8) + movq 8(%rdi,%rdx,8),%rcx + movq %rcx,8(%rsi,%rdx,8) + movq (%rdi,%rdx,8),%rcx + movq %rcx,(%rsi,%rdx,8) +acl_CopyLeft: + subq $4,%rdx + jge 5b + addq $4,%rdx + jg 4b + ret