diff src/os_cpu/solaris_x86/vm/solaris_x86_64.s @ 0:a61af66fc99e jdk7-b24

Initial load
author duke
date Sat, 01 Dec 2007 00:00:00 +0000
parents
children c18cbe5936b8
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/solaris_x86/vm/solaris_x86_64.s	Sat Dec 01 00:00:00 2007 +0000
@@ -0,0 +1,406 @@
+/
+/ Copyright 2004-2005 Sun Microsystems, Inc.  All Rights Reserved.
+/ DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+/
+/ This code is free software; you can redistribute it and/or modify it
+/ under the terms of the GNU General Public License version 2 only, as
+/ published by the Free Software Foundation.
+/
+/ This code is distributed in the hope that it will be useful, but WITHOUT
+/ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+/ FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+/ version 2 for more details (a copy is included in the LICENSE file that
+/ accompanied this code).
+/
+/ You should have received a copy of the GNU General Public License version
+/ 2 along with this work; if not, write to the Free Software Foundation,
+/ Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+/
+/ Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+/ CA 95054 USA or visit www.sun.com if you need additional information or
+/ have any questions.
+/
+
+	.globl fs_load
+	.globl fs_thread
+
+        // NOTE WELL!  The _Copy functions are called directly
+	// from server-compiler-generated code via CallLeafNoFP,
+	// which means that they *must* either not use floating
+	// point or use it in the same manner as does the server
+	// compiler.
+
+        .globl _Copy_arrayof_conjoint_bytes
+        .globl _Copy_conjoint_jshorts_atomic
+	.globl _Copy_arrayof_conjoint_jshorts
+        .globl _Copy_conjoint_jints_atomic
+        .globl _Copy_arrayof_conjoint_jints
+	.globl _Copy_conjoint_jlongs_atomic
+        .globl _Copy_arrayof_conjoint_jlongs
+
+	.section .text,"ax"
+
+        / Fast thread accessors, used by threadLS_solaris_amd64.cpp
+	.align   16
+fs_load:
+	movq %fs:(%rdi),%rax
+	ret
+
+	.align   16
+fs_thread:
+	movq %fs:0x0,%rax
+	ret
+
+        .globl SafeFetch32, Fetch32PFI, Fetch32Resume
+        .align  16
+        // Prototype: int SafeFetch32 (int * Adr, int ErrValue) 
+SafeFetch32:
+        movl    %esi, %eax
+Fetch32PFI:
+        movl    (%rdi), %eax
+Fetch32Resume:
+        ret
+
+        .globl SafeFetchN, FetchNPFI, FetchNResume
+        .align  16
+        // Prototype: intptr_t SafeFetchN (intptr_t * Adr, intptr_t ErrValue) 
+SafeFetchN:
+        movq    %rsi, %rax
+FetchNPFI:
+        movq    (%rdi), %rax
+FetchNResume:
+        ret
+
+        .globl  SpinPause
+        .align  16
+SpinPause:
+        rep
+        nop
+        movq    $1, %rax
+        ret
+        
+
+        / Support for void Copy::arrayof_conjoint_bytes(void* from,
+        /                                               void* to,
+        /                                               size_t count)
+        / rdi - from
+        / rsi - to
+        / rdx - count, treated as ssize_t
+        /
+        .align   16
+_Copy_arrayof_conjoint_bytes:
+        movq     %rdx,%r8             / byte count
+        shrq     $3,%rdx              / qword count
+        cmpq     %rdi,%rsi
+        leaq     -1(%rdi,%r8,1),%rax  / from + bcount*1 - 1
+        jbe      acb_CopyRight
+        cmpq     %rax,%rsi
+        jbe      acb_CopyLeft 
+acb_CopyRight:
+        leaq     -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8
+        leaq     -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8
+        negq     %rdx
+        jmp      7f
+        .align   16
+1:      movq     8(%rax,%rdx,8),%rsi
+        movq     %rsi,8(%rcx,%rdx,8)
+        addq     $1,%rdx
+        jnz      1b
+2:      testq    $4,%r8               / check for trailing dword
+        jz       3f
+        movl     8(%rax),%esi         / copy trailing dword
+        movl     %esi,8(%rcx)
+        addq     $4,%rax
+        addq     $4,%rcx              / original %rsi is trashed, so we
+                                      /  can't use it as a base register
+3:      testq    $2,%r8               / check for trailing word
+        jz       4f
+        movw     8(%rax),%si          / copy trailing word
+        movw     %si,8(%rcx)
+        addq     $2,%rcx
+4:      testq    $1,%r8               / check for trailing byte
+        jz       5f
+        movb     -1(%rdi,%r8,1),%al   / copy trailing byte
+        movb     %al,8(%rcx)
+5:      ret
+        .align   16
+6:      movq     -24(%rax,%rdx,8),%rsi
+        movq     %rsi,-24(%rcx,%rdx,8)
+        movq     -16(%rax,%rdx,8),%rsi
+        movq     %rsi,-16(%rcx,%rdx,8)
+        movq     -8(%rax,%rdx,8),%rsi
+        movq     %rsi,-8(%rcx,%rdx,8)
+        movq     (%rax,%rdx,8),%rsi
+        movq     %rsi,(%rcx,%rdx,8)
+7:      addq     $4,%rdx
+        jle      6b
+        subq     $4,%rdx
+        jl       1b
+        jmp      2b
+acb_CopyLeft:
+        testq    $1,%r8               / check for trailing byte
+        jz       1f
+        movb     -1(%rdi,%r8,1),%cl   / copy trailing byte
+        movb     %cl,-1(%rsi,%r8,1)
+        subq     $1,%r8               / adjust for possible trailing word
+1:      testq    $2,%r8               / check for trailing word
+        jz       2f
+        movw     -2(%rdi,%r8,1),%cx   / copy trailing word
+        movw     %cx,-2(%rsi,%r8,1)
+2:      testq    $4,%r8               / check for trailing dword
+        jz       5f
+        movl     (%rdi,%rdx,8),%ecx   / copy trailing dword
+        movl     %ecx,(%rsi,%rdx,8)
+        jmp      5f
+        .align   16
+3:      movq     -8(%rdi,%rdx,8),%rcx
+        movq     %rcx,-8(%rsi,%rdx,8)
+        subq     $1,%rdx
+        jnz      3b
+        ret
+        .align   16
+4:      movq     24(%rdi,%rdx,8),%rcx
+        movq     %rcx,24(%rsi,%rdx,8)
+        movq     16(%rdi,%rdx,8),%rcx
+        movq     %rcx,16(%rsi,%rdx,8)
+        movq     8(%rdi,%rdx,8),%rcx
+        movq     %rcx,8(%rsi,%rdx,8)
+        movq     (%rdi,%rdx,8),%rcx
+        movq     %rcx,(%rsi,%rdx,8)
+5:      subq     $4,%rdx
+        jge      4b
+        addq     $4,%rdx
+        jg       3b
+        ret
+
+        / Support for void Copy::arrayof_conjoint_jshorts(void* from,
+        /                                                 void* to,
+        /                                                 size_t count)
+        / Equivalent to
+        /   conjoint_jshorts_atomic
+        /
+        / If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
+        / let the hardware handle it.  The tow or four words within dwords
+        / or qwords that span cache line boundaries will still be loaded
+        / and stored atomically.
+        /
+        / rdi - from
+        / rsi - to
+        / rdx - count, treated as ssize_t
+        /
+        .align   16
+_Copy_arrayof_conjoint_jshorts:
+_Copy_conjoint_jshorts_atomic:
+        movq     %rdx,%r8             / word count
+        shrq     $2,%rdx              / qword count
+        cmpq     %rdi,%rsi
+        leaq     -2(%rdi,%r8,2),%rax  / from + wcount*2 - 2
+        jbe      acs_CopyRight
+        cmpq     %rax,%rsi
+        jbe      acs_CopyLeft 
+acs_CopyRight:
+        leaq     -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8
+        leaq     -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8
+        negq     %rdx
+        jmp      6f
+1:      movq     8(%rax,%rdx,8),%rsi
+        movq     %rsi,8(%rcx,%rdx,8)
+        addq     $1,%rdx
+        jnz      1b
+2:      testq    $2,%r8               / check for trailing dword
+        jz       3f
+        movl     8(%rax),%esi         / copy trailing dword
+        movl     %esi,8(%rcx)
+        addq     $4,%rcx              / original %rsi is trashed, so we
+                                      /  can't use it as a base register
+3:      testq    $1,%r8               / check for trailing word
+        jz       4f
+        movw     -2(%rdi,%r8,2),%si   / copy trailing word
+        movw     %si,8(%rcx)
+4:      ret
+        .align   16
+5:      movq     -24(%rax,%rdx,8),%rsi
+        movq     %rsi,-24(%rcx,%rdx,8)
+        movq     -16(%rax,%rdx,8),%rsi
+        movq     %rsi,-16(%rcx,%rdx,8)
+        movq     -8(%rax,%rdx,8),%rsi
+        movq     %rsi,-8(%rcx,%rdx,8)
+        movq     (%rax,%rdx,8),%rsi
+        movq     %rsi,(%rcx,%rdx,8)
+6:      addq     $4,%rdx
+        jle      5b
+        subq     $4,%rdx
+        jl       1b
+        jmp      2b
+acs_CopyLeft:
+        testq    $1,%r8               / check for trailing word
+        jz       1f
+        movw     -2(%rdi,%r8,2),%cx   / copy trailing word
+        movw     %cx,-2(%rsi,%r8,2)
+1:      testq    $2,%r8               / check for trailing dword
+        jz       4f
+        movl     (%rdi,%rdx,8),%ecx   / copy trailing dword
+        movl     %ecx,(%rsi,%rdx,8)
+        jmp      4f
+2:      movq     -8(%rdi,%rdx,8),%rcx
+        movq     %rcx,-8(%rsi,%rdx,8)
+        subq     $1,%rdx
+        jnz      2b
+        ret
+        .align   16
+3:      movq     24(%rdi,%rdx,8),%rcx
+        movq     %rcx,24(%rsi,%rdx,8)
+        movq     16(%rdi,%rdx,8),%rcx
+        movq     %rcx,16(%rsi,%rdx,8)
+        movq     8(%rdi,%rdx,8),%rcx
+        movq     %rcx,8(%rsi,%rdx,8)
+        movq     (%rdi,%rdx,8),%rcx
+        movq     %rcx,(%rsi,%rdx,8)
+4:      subq     $4,%rdx
+        jge      3b
+        addq     $4,%rdx
+        jg       2b
+        ret
+
+        / Support for void Copy::arrayof_conjoint_jints(jint* from,
+        /                                               jint* to,
+        /                                               size_t count)
+        / Equivalent to
+        /   conjoint_jints_atomic
+        /
+        / If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+        / the hardware handle it.  The two dwords within qwords that span
+        / cache line boundaries will still be loaded and stored atomically.
+        /
+        / rdi - from
+        / rsi - to
+        / rdx - count, treated as ssize_t
+        /
+        .align   16
+_Copy_arrayof_conjoint_jints:
+_Copy_conjoint_jints_atomic:
+        movq     %rdx,%r8             / dword count
+        shrq     %rdx                 / qword count
+        cmpq     %rdi,%rsi
+        leaq     -4(%rdi,%r8,4),%rax  / from + dcount*4 - 4
+        jbe      aci_CopyRight
+        cmpq     %rax,%rsi
+        jbe      aci_CopyLeft 
+aci_CopyRight:
+        leaq     -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8
+        leaq     -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8
+        negq     %rdx
+        jmp      5f
+        .align   16
+1:      movq     8(%rax,%rdx,8),%rsi
+        movq     %rsi,8(%rcx,%rdx,8)
+        addq     $1,%rdx
+        jnz       1b
+2:      testq    $1,%r8               / check for trailing dword
+        jz       3f
+        movl     8(%rax),%esi         / copy trailing dword
+        movl     %esi,8(%rcx)
+3:      ret
+        .align   16
+4:      movq     -24(%rax,%rdx,8),%rsi
+        movq     %rsi,-24(%rcx,%rdx,8)
+        movq     -16(%rax,%rdx,8),%rsi
+        movq     %rsi,-16(%rcx,%rdx,8)
+        movq     -8(%rax,%rdx,8),%rsi
+        movq     %rsi,-8(%rcx,%rdx,8)
+        movq     (%rax,%rdx,8),%rsi
+        movq     %rsi,(%rcx,%rdx,8)
+5:      addq     $4,%rdx
+        jle      4b
+        subq     $4,%rdx
+        jl       1b
+        jmp      2b
+aci_CopyLeft:
+        testq    $1,%r8               / check for trailing dword
+        jz       3f
+        movl     -4(%rdi,%r8,4),%ecx  / copy trailing dword
+        movl     %ecx,-4(%rsi,%r8,4)
+        jmp      3f
+1:      movq     -8(%rdi,%rdx,8),%rcx
+        movq     %rcx,-8(%rsi,%rdx,8)
+        subq     $1,%rdx
+        jnz      1b
+        ret
+        .align   16
+2:      movq     24(%rdi,%rdx,8),%rcx
+        movq     %rcx,24(%rsi,%rdx,8)
+        movq     16(%rdi,%rdx,8),%rcx
+        movq     %rcx,16(%rsi,%rdx,8)
+        movq     8(%rdi,%rdx,8),%rcx
+        movq     %rcx,8(%rsi,%rdx,8)
+        movq     (%rdi,%rdx,8),%rcx
+        movq     %rcx,(%rsi,%rdx,8)
+3:      subq     $4,%rdx
+        jge      2b
+        addq     $4,%rdx
+        jg       1b
+        ret
+	
+        / Support for void Copy::arrayof_conjoint_jlongs(jlong* from,
+        /                                                jlong* to,
+        /                                                size_t count)
+        / Equivalent to
+        /   conjoint_jlongs_atomic
+        /   arrayof_conjoint_oops
+        /   conjoint_oops_atomic
+        /
+        / rdi - from
+        / rsi - to
+        / rdx - count, treated as ssize_t
+        /
+        .align   16
+_Copy_arrayof_conjoint_jlongs:
+_Copy_conjoint_jlongs_atomic:
+        cmpq     %rdi,%rsi
+        leaq     -8(%rdi,%rdx,8),%rax / from + count*8 - 8
+        jbe      acl_CopyRight
+        cmpq     %rax,%rsi
+        jbe      acl_CopyLeft 
+acl_CopyRight:
+        leaq     -8(%rsi,%rdx,8),%rcx / to + count*8 - 8
+        negq     %rdx
+        jmp      3f
+1:      movq     8(%rax,%rdx,8),%rsi
+        movq     %rsi,8(%rcx,%rdx,8)
+        addq     $1,%rdx
+        jnz      1b
+        ret
+        .align   16
+2:      movq     -24(%rax,%rdx,8),%rsi
+        movq     %rsi,-24(%rcx,%rdx,8)
+        movq     -16(%rax,%rdx,8),%rsi
+        movq     %rsi,-16(%rcx,%rdx,8)
+        movq     -8(%rax,%rdx,8),%rsi
+        movq     %rsi,-8(%rcx,%rdx,8)
+        movq     (%rax,%rdx,8),%rsi
+        movq     %rsi,(%rcx,%rdx,8)
+3:      addq     $4,%rdx
+        jle      2b
+        subq     $4,%rdx
+        jl       1b
+        ret
+4:      movq     -8(%rdi,%rdx,8),%rcx
+        movq     %rcx,-8(%rsi,%rdx,8)
+        subq     $1,%rdx
+        jnz      4b
+        ret
+        .align   16
+5:      movq     24(%rdi,%rdx,8),%rcx
+        movq     %rcx,24(%rsi,%rdx,8)
+        movq     16(%rdi,%rdx,8),%rcx
+        movq     %rcx,16(%rsi,%rdx,8)
+        movq     8(%rdi,%rdx,8),%rcx
+        movq     %rcx,8(%rsi,%rdx,8)
+        movq     (%rdi,%rdx,8),%rcx
+        movq     %rcx,(%rsi,%rdx,8)
+acl_CopyLeft:
+        subq     $4,%rdx
+        jge      5b
+        addq     $4,%rdx
+        jg       4b
+        ret