Mercurial > hg > graal-compiler
diff src/os_cpu/solaris_x86/vm/solaris_x86_32.s @ 0:a61af66fc99e jdk7-b24
Initial load
author | duke |
---|---|
date | Sat, 01 Dec 2007 00:00:00 +0000 |
parents | |
children | c18cbe5936b8 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/os_cpu/solaris_x86/vm/solaris_x86_32.s Sat Dec 01 00:00:00 2007 +0000 @@ -0,0 +1,676 @@ +// +// Copyright 2004-2007 Sun Microsystems, Inc. All Rights Reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, +// CA 95054 USA or visit www.sun.com if you need additional information or +// have any questions. +// + + .globl fixcw + .globl sse_check + .globl sse_unavailable + .globl gs_load + .globl gs_thread + .globl _Atomic_cmpxchg_long_gcc + + // NOTE WELL! The _Copy functions are called directly + // from server-compiler-generated code via CallLeafNoFP, + // which means that they *must* either not use floating + // point or use it in the same manner as does the server + // compiler. + + .globl _Copy_conjoint_bytes + .globl _Copy_arrayof_conjoint_bytes + .globl _Copy_conjoint_jshorts_atomic + .globl _Copy_arrayof_conjoint_jshorts + .globl _Copy_conjoint_jints_atomic + .globl _Copy_arrayof_conjoint_jints + .globl _Copy_conjoint_jlongs_atomic + .globl _mmx_Copy_arrayof_conjoint_jshorts + + .section .text,"ax" + +/ Support for void os::Solaris::init_thread_fpu_state() in os_solaris_i486.cpp +/ Set fpu to 53 bit precision. This happens too early to use a stub. + .align 16 +fixcw: + pushl $0x27f + fldcw 0(%esp) + popl %eax + ret + + .align 16 + .globl SafeFetch32 + .globl SafeFetchN + .globl Fetch32PFI, Fetch32Resume +SafeFetch32: +SafeFetchN: + movl 0x8(%esp), %eax + movl 0x4(%esp), %ecx +Fetch32PFI: + movl (%ecx), %eax +Fetch32Resume: + ret + + + .align 16 + .globl SpinPause +SpinPause: + rep + nop + movl $1, %eax + ret + + +/ Test SSE availability, used by os_solaris_i486.cpp + .align 16 +sse_check: + / Fault if SSE not available + xorps %xmm0,%xmm0 + / No fault + movl $1,%eax + ret + / Signal handler continues here if SSE is not available +sse_unavailable: + xorl %eax,%eax + ret + +/ Fast thread accessors, used by threadLS_solaris_i486.cpp + .align 16 +gs_load: + movl 4(%esp),%ecx + movl %gs:(%ecx),%eax + ret + + .align 16 +gs_thread: + movl %gs:0x0,%eax + ret + + / Support for void Copy::conjoint_bytes(void* from, + / void* to, + / size_t count) + .align 16 +_Copy_conjoint_bytes: + pushl %esi + movl 4+12(%esp),%ecx / count + pushl %edi + movl 8+ 4(%esp),%esi / from + movl 8+ 8(%esp),%edi / to + cmpl %esi,%edi + leal -1(%esi,%ecx),%eax / from + count - 1 + jbe cb_CopyRight + cmpl %eax,%edi + jbe cb_CopyLeft + / copy from low to high +cb_CopyRight: + cmpl $3,%ecx + jbe 5f / <= 3 bytes + / align source address at dword address boundary + movl %ecx,%eax / original count + movl $4,%ecx + subl %esi,%ecx + andl $3,%ecx / prefix byte count + jz 1f / no prefix + subl %ecx,%eax / byte count less prefix + / copy prefix + subl %esi,%edi +0: movb (%esi),%dl + movb %dl,(%edi,%esi,1) + addl $1,%esi + subl $1,%ecx + jnz 0b + addl %esi,%edi +1: movl %eax,%ecx / byte count less prefix + shrl $2,%ecx / dword count + jz 4f / no dwords to move + cmpl $32,%ecx + jbe 2f / <= 32 dwords + / copy aligned dwords + rep; smovl + jmp 4f + / copy aligned dwords +2: subl %esi,%edi + .align 16 +3: movl (%esi),%edx + movl %edx,(%edi,%esi,1) + addl $4,%esi + subl $1,%ecx + jnz 3b + addl %esi,%edi +4: movl %eax,%ecx / byte count less prefix + andl $3,%ecx / suffix byte count + jz 7f / no suffix + / copy suffix +5: xorl %eax,%eax +6: movb (%esi,%eax,1),%dl + movb %dl,(%edi,%eax,1) + addl $1,%eax + subl $1,%ecx + jnz 6b +7: popl %edi + popl %esi + ret + / copy from high to low +cb_CopyLeft: + std + leal -4(%edi,%ecx),%edi / to + count - 4 + movl %eax,%esi / from + count - 1 + movl %ecx,%eax + subl $3,%esi / from + count - 4 + cmpl $3,%ecx + jbe 5f / <= 3 bytes +1: shrl $2,%ecx / dword count + jz 4f / no dwords to move + cmpl $32,%ecx + ja 3f / > 32 dwords + / copy dwords, aligned or not + subl %esi,%edi + .align 16 +2: movl (%esi),%edx + movl %edx,(%edi,%esi,1) + subl $4,%esi + subl $1,%ecx + jnz 2b + addl %esi,%edi + jmp 4f + / copy dwords, aligned or not +3: rep; smovl +4: movl %eax,%ecx / byte count + andl $3,%ecx / suffix byte count + jz 7f / no suffix + / copy suffix +5: subl %esi,%edi + addl $3,%esi +6: movb (%esi),%dl + movb %dl,(%edi,%esi,1) + subl $1,%esi + subl $1,%ecx + jnz 6b +7: cld + popl %edi + popl %esi + ret + + / Support for void Copy::arrayof_conjoint_bytes(void* from, + / void* to, + / size_t count) + / + / Same as _Copy_conjoint_bytes, except no source alignment check. + .align 16 +_Copy_arrayof_conjoint_bytes: + pushl %esi + movl 4+12(%esp),%ecx / count + pushl %edi + movl 8+ 4(%esp),%esi / from + movl 8+ 8(%esp),%edi / to + cmpl %esi,%edi + leal -1(%esi,%ecx),%eax / from + count - 1 + jbe acb_CopyRight + cmpl %eax,%edi + jbe acb_CopyLeft + / copy from low to high +acb_CopyRight: + cmpl $3,%ecx + jbe 5f +1: movl %ecx,%eax + shrl $2,%ecx + jz 4f + cmpl $32,%ecx + ja 3f + / copy aligned dwords + subl %esi,%edi + .align 16 +2: movl (%esi),%edx + movl %edx,(%edi,%esi,1) + addl $4,%esi + subl $1,%ecx + jnz 2b + addl %esi,%edi + jmp 4f + / copy aligned dwords +3: rep; smovl +4: movl %eax,%ecx + andl $3,%ecx + jz 7f + / copy suffix +5: xorl %eax,%eax +6: movb (%esi,%eax,1),%dl + movb %dl,(%edi,%eax,1) + addl $1,%eax + subl $1,%ecx + jnz 6b +7: popl %edi + popl %esi + ret +acb_CopyLeft: + std + leal -4(%edi,%ecx),%edi / to + count - 4 + movl %eax,%esi / from + count - 1 + movl %ecx,%eax + subl $3,%esi / from + count - 4 + cmpl $3,%ecx + jbe 5f +1: shrl $2,%ecx + jz 4f + cmpl $32,%ecx + jbe 2f / <= 32 dwords + rep; smovl + jmp 4f + .=.+8 +2: subl %esi,%edi + .align 16 +3: movl (%esi),%edx + movl %edx,(%edi,%esi,1) + subl $4,%esi + subl $1,%ecx + jnz 3b + addl %esi,%edi +4: movl %eax,%ecx + andl $3,%ecx + jz 7f +5: subl %esi,%edi + addl $3,%esi +6: movb (%esi),%dl + movb %dl,(%edi,%esi,1) + subl $1,%esi + subl $1,%ecx + jnz 6b +7: cld + popl %edi + popl %esi + ret + + / Support for void Copy::conjoint_jshorts_atomic(void* from, + / void* to, + / size_t count) + .align 16 +_Copy_conjoint_jshorts_atomic: + pushl %esi + movl 4+12(%esp),%ecx / count + pushl %edi + movl 8+ 4(%esp),%esi / from + movl 8+ 8(%esp),%edi / to + cmpl %esi,%edi + leal -2(%esi,%ecx,2),%eax / from + count*2 - 2 + jbe cs_CopyRight + cmpl %eax,%edi + jbe cs_CopyLeft + / copy from low to high +cs_CopyRight: + / align source address at dword address boundary + movl %esi,%eax / original from + andl $3,%eax / either 0 or 2 + jz 1f / no prefix + / copy prefix + movw (%esi),%dx + movw %dx,(%edi) + addl %eax,%esi / %eax == 2 + addl %eax,%edi + subl $1,%ecx +1: movl %ecx,%eax / word count less prefix + sarl %ecx / dword count + jz 4f / no dwords to move + cmpl $32,%ecx + jbe 2f / <= 32 dwords + / copy aligned dwords + rep; smovl + jmp 4f + / copy aligned dwords +2: subl %esi,%edi + .align 16 +3: movl (%esi),%edx + movl %edx,(%edi,%esi,1) + addl $4,%esi + subl $1,%ecx + jnz 3b + addl %esi,%edi +4: andl $1,%eax / suffix count + jz 5f / no suffix + / copy suffix + movw (%esi),%dx + movw %dx,(%edi) +5: popl %edi + popl %esi + ret + / copy from high to low +cs_CopyLeft: + std + leal -4(%edi,%ecx,2),%edi / to + count*2 - 4 + movl %eax,%esi / from + count*2 - 2 + movl %ecx,%eax + subl $2,%esi / from + count*2 - 4 +1: sarl %ecx / dword count + jz 4f / no dwords to move + cmpl $32,%ecx + ja 3f / > 32 dwords + subl %esi,%edi + .align 16 +2: movl (%esi),%edx + movl %edx,(%edi,%esi,1) + subl $4,%esi + subl $1,%ecx + jnz 2b + addl %esi,%edi + jmp 4f +3: rep; smovl +4: andl $1,%eax / suffix count + jz 5f / no suffix + / copy suffix + addl $2,%esi + addl $2,%edi + movw (%esi),%dx + movw %dx,(%edi) +5: cld + popl %edi + popl %esi + ret + + / Support for void Copy::arrayof_conjoint_jshorts(void* from, + / void* to, + / size_t count) + .align 16 +_Copy_arrayof_conjoint_jshorts: + pushl %esi + movl 4+12(%esp),%ecx / count + pushl %edi + movl 8+ 4(%esp),%esi / from + movl 8+ 8(%esp),%edi / to + cmpl %esi,%edi + leal -2(%esi,%ecx,2),%eax / from + count*2 - 2 + jbe acs_CopyRight + cmpl %eax,%edi + jbe acs_CopyLeft +acs_CopyRight: + movl %ecx,%eax / word count + sarl %ecx / dword count + jz 4f / no dwords to move + cmpl $32,%ecx + jbe 2f / <= 32 dwords + / copy aligned dwords + rep; smovl + jmp 4f + / copy aligned dwords + .=.+5 +2: subl %esi,%edi + .align 16 +3: movl (%esi),%edx + movl %edx,(%edi,%esi,1) + addl $4,%esi + subl $1,%ecx + jnz 3b + addl %esi,%edi +4: andl $1,%eax / suffix count + jz 5f / no suffix + / copy suffix + movw (%esi),%dx + movw %dx,(%edi) +5: popl %edi + popl %esi + ret +acs_CopyLeft: + std + leal -4(%edi,%ecx,2),%edi / to + count*2 - 4 + movl %eax,%esi / from + count*2 - 2 + movl %ecx,%eax + subl $2,%esi / from + count*2 - 4 + sarl %ecx / dword count + jz 4f / no dwords to move + cmpl $32,%ecx + ja 3f / > 32 dwords + subl %esi,%edi + .align 16 +2: movl (%esi),%edx + movl %edx,(%edi,%esi,1) + subl $4,%esi + subl $1,%ecx + jnz 2b + addl %esi,%edi + jmp 4f +3: rep; smovl +4: andl $1,%eax / suffix count + jz 5f / no suffix + / copy suffix + addl $2,%esi + addl $2,%edi + movw (%esi),%dx + movw %dx,(%edi) +5: cld + popl %edi + popl %esi + ret + + / Support for void Copy::conjoint_jints_atomic(void* from, + / void* to, + / size_t count) + / Equivalent to + / arrayof_conjoint_jints + .align 16 +_Copy_conjoint_jints_atomic: +_Copy_arrayof_conjoint_jints: + pushl %esi + movl 4+12(%esp),%ecx / count + pushl %edi + movl 8+ 4(%esp),%esi / from + movl 8+ 8(%esp),%edi / to + cmpl %esi,%edi + leal -4(%esi,%ecx,4),%eax / from + count*4 - 4 + jbe ci_CopyRight + cmpl %eax,%edi + jbe ci_CopyLeft +ci_CopyRight: + cmpl $32,%ecx + jbe 2f / <= 32 dwords + rep; smovl + popl %edi + popl %esi + ret + .=.+10 +2: subl %esi,%edi + .align 16 +3: movl (%esi),%edx + movl %edx,(%edi,%esi,1) + addl $4,%esi + subl $1,%ecx + jnz 3b + popl %edi + popl %esi + ret +ci_CopyLeft: + std + leal -4(%edi,%ecx,4),%edi / to + count*4 - 4 + cmpl $32,%ecx + ja 3f / > 32 dwords + subl %eax,%edi / eax == from + count*4 - 4 + .align 16 +2: movl (%eax),%edx + movl %edx,(%edi,%eax,1) + subl $4,%eax + subl $1,%ecx + jnz 2b + cld + popl %edi + popl %esi + ret +3: movl %eax,%esi / from + count*4 - 4 + rep; smovl + cld + popl %edi + popl %esi + ret + + / Support for void Copy::conjoint_jlongs_atomic(jlong* from, + / jlong* to, + / size_t count) + / + / 32-bit + / + / count treated as signed + / + / if (from > to) { + / while (--count >= 0) { + / *to++ = *from++; + / } + / } else { + / while (--count >= 0) { + / to[count] = from[count]; + / } + / } + .align 16 +_Copy_conjoint_jlongs_atomic: + movl 4+8(%esp),%ecx / count + movl 4+0(%esp),%eax / from + movl 4+4(%esp),%edx / to + cmpl %eax,%edx + jae cla_CopyLeft +cla_CopyRight: + subl %eax,%edx + jmp 2f + .align 16 +1: fildll (%eax) + fistpll (%edx,%eax,1) + addl $8,%eax +2: subl $1,%ecx + jge 1b + ret + .align 16 +3: fildll (%eax,%ecx,8) + fistpll (%edx,%ecx,8) +cla_CopyLeft: + subl $1,%ecx + jge 3b + ret + + / Support for void Copy::arrayof_conjoint_jshorts(void* from, + / void* to, + / size_t count) + .align 16 +_mmx_Copy_arrayof_conjoint_jshorts: + pushl %esi + movl 4+12(%esp),%ecx + pushl %edi + movl 8+ 4(%esp),%esi + movl 8+ 8(%esp),%edi + cmpl %esi,%edi + leal -2(%esi,%ecx,2),%eax + jbe mmx_acs_CopyRight + cmpl %eax,%edi + jbe mmx_acs_CopyLeft +mmx_acs_CopyRight: + movl %ecx,%eax + sarl %ecx + je 5f + cmpl $33,%ecx + jae 3f +1: subl %esi,%edi + .align 16 +2: movl (%esi),%edx + movl %edx,(%edi,%esi,1) + addl $4,%esi + subl $1,%ecx + jnz 2b + addl %esi,%edi + jmp 5f +3: smovl / align to 8 bytes, we know we are 4 byte aligned to start + subl $1,%ecx +4: .align 16 + movq 0(%esi),%mm0 + addl $64,%edi + movq 8(%esi),%mm1 + subl $16,%ecx + movq 16(%esi),%mm2 + movq %mm0,-64(%edi) + movq 24(%esi),%mm0 + movq %mm1,-56(%edi) + movq 32(%esi),%mm1 + movq %mm2,-48(%edi) + movq 40(%esi),%mm2 + movq %mm0,-40(%edi) + movq 48(%esi),%mm0 + movq %mm1,-32(%edi) + movq 56(%esi),%mm1 + movq %mm2,-24(%edi) + movq %mm0,-16(%edi) + addl $64,%esi + movq %mm1,-8(%edi) + cmpl $16,%ecx + jge 4b + emms + testl %ecx,%ecx + ja 1b +5: andl $1,%eax + je 7f +6: movw (%esi),%dx + movw %dx,(%edi) +7: popl %edi + popl %esi + ret +mmx_acs_CopyLeft: + std + leal -4(%edi,%ecx,2),%edi + movl %eax,%esi + movl %ecx,%eax + subl $2,%esi + sarl %ecx + je 4f + cmpl $32,%ecx + ja 3f + subl %esi,%edi + .align 16 +2: movl (%esi),%edx + movl %edx,(%edi,%esi,1) + subl $4,%esi + subl $1,%ecx + jnz 2b + addl %esi,%edi + jmp 4f +3: rep; smovl +4: andl $1,%eax + je 6f + addl $2,%esi + addl $2,%edi +5: movw (%esi),%dx + movw %dx,(%edi) +6: cld + popl %edi + popl %esi + ret + + + / Support for jlong Atomic::cmpxchg(jlong exchange_value, + / volatile jlong* dest, + / jlong compare_value, + / bool is_MP) + / Used only for Solaris/gcc builds + .align 16 +_Atomic_cmpxchg_long_gcc: + / 8(%esp) : return PC + pushl %ebx / 4(%esp) : old %ebx + pushl %edi / 0(%esp) : old %edi + movl 12(%esp), %ebx / 12(%esp) : exchange_value (low) + movl 16(%esp), %ecx / 16(%esp) : exchange_value (high) + movl 24(%esp), %eax / 24(%esp) : compare_value (low) + movl 28(%esp), %edx / 28(%esp) : compare_value (high) + movl 20(%esp), %edi / 20(%esp) : dest + cmpl $0, 32(%esp) / 32(%esp) : is_MP + je 1f + lock +1: cmpxchg8b (%edi) + popl %edi + popl %ebx + ret