comparison src/os_cpu/bsd_x86/vm/bsd_x86_64.s @ 3960:f08d439fab8c

7089790: integrate bsd-port changes Reviewed-by: kvn, twisti, jrose Contributed-by: Kurt Miller <kurt@intricatesoftware.com>, Greg Lewis <glewis@eyesbeyond.com>, Jung-uk Kim <jkim@freebsd.org>, Christos Zoulas <christos@zoulas.com>, Landon Fuller <landonf@plausible.coop>, The FreeBSD Foundation <board@freebsdfoundation.org>, Michael Franz <mvfranz@gmail.com>, Roger Hoover <rhoover@apple.com>, Alexander Strange <astrange@apple.com>
author never
date Sun, 25 Sep 2011 16:03:29 -0700
parents
children 980532a806a5
comparison
equal deleted inserted replaced
3959:eda6988c0d81 3960:f08d439fab8c
1 #
2 # Copyright (c) 2004, 2007, Oracle and/or its affiliates. All rights reserved.
3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 #
5 # This code is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License version 2 only, as
7 # published by the Free Software Foundation.
8 #
9 # This code is distributed in the hope that it will be useful, but WITHOUT
10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 # version 2 for more details (a copy is included in the LICENSE file that
13 # accompanied this code).
14 #
15 # You should have received a copy of the GNU General Public License version
16 # 2 along with this work; if not, write to the Free Software Foundation,
17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 #
19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 # or visit www.oracle.com if you need additional information or have any
21 # questions.
22 #
23
24 #ifdef __APPLE__
25 # Darwin uses _ prefixed global symbols
26 #define SYMBOL(s) _ ## s
27 #define ELF_TYPE(name, description)
28 #else
29 #define SYMBOL(s) s
30 #define ELF_TYPE(name, description) .type name,description
31 #endif
32
33 # NOTE WELL! The _Copy functions are called directly
34 # from server-compiler-generated code via CallLeafNoFP,
35 # which means that they *must* either not use floating
36 # point or use it in the same manner as does the server
37 # compiler.
38
39 .globl SYMBOL(_Copy_arrayof_conjoint_bytes)
40 .globl SYMBOL(_Copy_arrayof_conjoint_jshorts)
41 .globl SYMBOL(_Copy_conjoint_jshorts_atomic)
42 .globl SYMBOL(_Copy_arrayof_conjoint_jints)
43 .globl SYMBOL(_Copy_conjoint_jints_atomic)
44 .globl SYMBOL(_Copy_arrayof_conjoint_jlongs)
45 .globl SYMBOL(_Copy_conjoint_jlongs_atomic)
46
47 .text
48
49 .globl SYMBOL(SafeFetch32), SYMBOL(Fetch32PFI), SYMBOL(Fetch32Resume)
50 .p2align 4,,15
51 ELF_TYPE(SafeFetch32,@function)
52 // Prototype: int SafeFetch32 (int * Adr, int ErrValue)
53 SYMBOL(SafeFetch32):
54 movl %esi, %eax
55 SYMBOL(Fetch32PFI):
56 movl (%rdi), %eax
57 SYMBOL(Fetch32Resume):
58 ret
59
60 .globl SYMBOL(SafeFetchN), SYMBOL(FetchNPFI), SYMBOL(FetchNResume)
61 .p2align 4,,15
62 ELF_TYPE(SafeFetchN,@function)
63 // Prototype: intptr_t SafeFetchN (intptr_t * Adr, intptr_t ErrValue)
64 SYMBOL(SafeFetchN):
65 movq %rsi, %rax
66 SYMBOL(FetchNPFI):
67 movq (%rdi), %rax
68 SYMBOL(FetchNResume):
69 ret
70
71 .globl SYMBOL(SpinPause)
72 .p2align 4,,15
73 ELF_TYPE(SpinPause,@function)
74 SYMBOL(SpinPause):
75 rep
76 nop
77 movq $1, %rax
78 ret
79
80 # Support for void Copy::arrayof_conjoint_bytes(void* from,
81 # void* to,
82 # size_t count)
83 # rdi - from
84 # rsi - to
85 # rdx - count, treated as ssize_t
86 #
87 .p2align 4,,15
88 ELF_TYPE(_Copy_arrayof_conjoint_bytes,@function)
89 SYMBOL(_Copy_arrayof_conjoint_bytes):
90 movq %rdx,%r8 # byte count
91 shrq $3,%rdx # qword count
92 cmpq %rdi,%rsi
93 leaq -1(%rdi,%r8,1),%rax # from + bcount*1 - 1
94 jbe acb_CopyRight
95 cmpq %rax,%rsi
96 jbe acb_CopyLeft
97 acb_CopyRight:
98 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
99 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
100 negq %rdx
101 jmp 7f
102 .p2align 4,,15
103 1: movq 8(%rax,%rdx,8),%rsi
104 movq %rsi,8(%rcx,%rdx,8)
105 addq $1,%rdx
106 jnz 1b
107 2: testq $4,%r8 # check for trailing dword
108 jz 3f
109 movl 8(%rax),%esi # copy trailing dword
110 movl %esi,8(%rcx)
111 addq $4,%rax
112 addq $4,%rcx # original %rsi is trashed, so we
113 # can't use it as a base register
114 3: testq $2,%r8 # check for trailing word
115 jz 4f
116 movw 8(%rax),%si # copy trailing word
117 movw %si,8(%rcx)
118 addq $2,%rcx
119 4: testq $1,%r8 # check for trailing byte
120 jz 5f
121 movb -1(%rdi,%r8,1),%al # copy trailing byte
122 movb %al,8(%rcx)
123 5: ret
124 .p2align 4,,15
125 6: movq -24(%rax,%rdx,8),%rsi
126 movq %rsi,-24(%rcx,%rdx,8)
127 movq -16(%rax,%rdx,8),%rsi
128 movq %rsi,-16(%rcx,%rdx,8)
129 movq -8(%rax,%rdx,8),%rsi
130 movq %rsi,-8(%rcx,%rdx,8)
131 movq (%rax,%rdx,8),%rsi
132 movq %rsi,(%rcx,%rdx,8)
133 7: addq $4,%rdx
134 jle 6b
135 subq $4,%rdx
136 jl 1b
137 jmp 2b
138 acb_CopyLeft:
139 testq $1,%r8 # check for trailing byte
140 jz 1f
141 movb -1(%rdi,%r8,1),%cl # copy trailing byte
142 movb %cl,-1(%rsi,%r8,1)
143 subq $1,%r8 # adjust for possible trailing word
144 1: testq $2,%r8 # check for trailing word
145 jz 2f
146 movw -2(%rdi,%r8,1),%cx # copy trailing word
147 movw %cx,-2(%rsi,%r8,1)
148 2: testq $4,%r8 # check for trailing dword
149 jz 5f
150 movl (%rdi,%rdx,8),%ecx # copy trailing dword
151 movl %ecx,(%rsi,%rdx,8)
152 jmp 5f
153 .p2align 4,,15
154 3: movq -8(%rdi,%rdx,8),%rcx
155 movq %rcx,-8(%rsi,%rdx,8)
156 subq $1,%rdx
157 jnz 3b
158 ret
159 .p2align 4,,15
160 4: movq 24(%rdi,%rdx,8),%rcx
161 movq %rcx,24(%rsi,%rdx,8)
162 movq 16(%rdi,%rdx,8),%rcx
163 movq %rcx,16(%rsi,%rdx,8)
164 movq 8(%rdi,%rdx,8),%rcx
165 movq %rcx,8(%rsi,%rdx,8)
166 movq (%rdi,%rdx,8),%rcx
167 movq %rcx,(%rsi,%rdx,8)
168 5: subq $4,%rdx
169 jge 4b
170 addq $4,%rdx
171 jg 3b
172 ret
173
174 # Support for void Copy::arrayof_conjoint_jshorts(void* from,
175 # void* to,
176 # size_t count)
177 # Equivalent to
178 # conjoint_jshorts_atomic
179 #
180 # If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
181 # let the hardware handle it. The tow or four words within dwords
182 # or qwords that span cache line boundaries will still be loaded
183 # and stored atomically.
184 #
185 # rdi - from
186 # rsi - to
187 # rdx - count, treated as ssize_t
188 #
189 .p2align 4,,15
190 ELF_TYPE(_Copy_arrayof_conjoint_jshorts,@function)
191 ELF_TYPE(_Copy_conjoint_jshorts_atomic,@function)
192 SYMBOL(_Copy_arrayof_conjoint_jshorts):
193 SYMBOL(_Copy_conjoint_jshorts_atomic):
194 movq %rdx,%r8 # word count
195 shrq $2,%rdx # qword count
196 cmpq %rdi,%rsi
197 leaq -2(%rdi,%r8,2),%rax # from + wcount*2 - 2
198 jbe acs_CopyRight
199 cmpq %rax,%rsi
200 jbe acs_CopyLeft
201 acs_CopyRight:
202 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
203 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
204 negq %rdx
205 jmp 6f
206 1: movq 8(%rax,%rdx,8),%rsi
207 movq %rsi,8(%rcx,%rdx,8)
208 addq $1,%rdx
209 jnz 1b
210 2: testq $2,%r8 # check for trailing dword
211 jz 3f
212 movl 8(%rax),%esi # copy trailing dword
213 movl %esi,8(%rcx)
214 addq $4,%rcx # original %rsi is trashed, so we
215 # can't use it as a base register
216 3: testq $1,%r8 # check for trailing word
217 jz 4f
218 movw -2(%rdi,%r8,2),%si # copy trailing word
219 movw %si,8(%rcx)
220 4: ret
221 .p2align 4,,15
222 5: movq -24(%rax,%rdx,8),%rsi
223 movq %rsi,-24(%rcx,%rdx,8)
224 movq -16(%rax,%rdx,8),%rsi
225 movq %rsi,-16(%rcx,%rdx,8)
226 movq -8(%rax,%rdx,8),%rsi
227 movq %rsi,-8(%rcx,%rdx,8)
228 movq (%rax,%rdx,8),%rsi
229 movq %rsi,(%rcx,%rdx,8)
230 6: addq $4,%rdx
231 jle 5b
232 subq $4,%rdx
233 jl 1b
234 jmp 2b
235 acs_CopyLeft:
236 testq $1,%r8 # check for trailing word
237 jz 1f
238 movw -2(%rdi,%r8,2),%cx # copy trailing word
239 movw %cx,-2(%rsi,%r8,2)
240 1: testq $2,%r8 # check for trailing dword
241 jz 4f
242 movl (%rdi,%rdx,8),%ecx # copy trailing dword
243 movl %ecx,(%rsi,%rdx,8)
244 jmp 4f
245 2: movq -8(%rdi,%rdx,8),%rcx
246 movq %rcx,-8(%rsi,%rdx,8)
247 subq $1,%rdx
248 jnz 2b
249 ret
250 .p2align 4,,15
251 3: movq 24(%rdi,%rdx,8),%rcx
252 movq %rcx,24(%rsi,%rdx,8)
253 movq 16(%rdi,%rdx,8),%rcx
254 movq %rcx,16(%rsi,%rdx,8)
255 movq 8(%rdi,%rdx,8),%rcx
256 movq %rcx,8(%rsi,%rdx,8)
257 movq (%rdi,%rdx,8),%rcx
258 movq %rcx,(%rsi,%rdx,8)
259 4: subq $4,%rdx
260 jge 3b
261 addq $4,%rdx
262 jg 2b
263 ret
264
265 # Support for void Copy::arrayof_conjoint_jints(jint* from,
266 # jint* to,
267 # size_t count)
268 # Equivalent to
269 # conjoint_jints_atomic
270 #
271 # If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
272 # the hardware handle it. The two dwords within qwords that span
273 # cache line boundaries will still be loaded and stored atomically.
274 #
275 # rdi - from
276 # rsi - to
277 # rdx - count, treated as ssize_t
278 #
279 .p2align 4,,15
280 ELF_TYPE(_Copy_arrayof_conjoint_jints,@function)
281 ELF_TYPE(_Copy_conjoint_jints_atomic,@function)
282 SYMBOL(_Copy_arrayof_conjoint_jints):
283 SYMBOL(_Copy_conjoint_jints_atomic):
284 movq %rdx,%r8 # dword count
285 shrq %rdx # qword count
286 cmpq %rdi,%rsi
287 leaq -4(%rdi,%r8,4),%rax # from + dcount*4 - 4
288 jbe aci_CopyRight
289 cmpq %rax,%rsi
290 jbe aci_CopyLeft
291 aci_CopyRight:
292 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
293 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
294 negq %rdx
295 jmp 5f
296 .p2align 4,,15
297 1: movq 8(%rax,%rdx,8),%rsi
298 movq %rsi,8(%rcx,%rdx,8)
299 addq $1,%rdx
300 jnz 1b
301 2: testq $1,%r8 # check for trailing dword
302 jz 3f
303 movl 8(%rax),%esi # copy trailing dword
304 movl %esi,8(%rcx)
305 3: ret
306 .p2align 4,,15
307 4: movq -24(%rax,%rdx,8),%rsi
308 movq %rsi,-24(%rcx,%rdx,8)
309 movq -16(%rax,%rdx,8),%rsi
310 movq %rsi,-16(%rcx,%rdx,8)
311 movq -8(%rax,%rdx,8),%rsi
312 movq %rsi,-8(%rcx,%rdx,8)
313 movq (%rax,%rdx,8),%rsi
314 movq %rsi,(%rcx,%rdx,8)
315 5: addq $4,%rdx
316 jle 4b
317 subq $4,%rdx
318 jl 1b
319 jmp 2b
320 aci_CopyLeft:
321 testq $1,%r8 # check for trailing dword
322 jz 3f
323 movl -4(%rdi,%r8,4),%ecx # copy trailing dword
324 movl %ecx,-4(%rsi,%r8,4)
325 jmp 3f
326 1: movq -8(%rdi,%rdx,8),%rcx
327 movq %rcx,-8(%rsi,%rdx,8)
328 subq $1,%rdx
329 jnz 1b
330 ret
331 .p2align 4,,15
332 2: movq 24(%rdi,%rdx,8),%rcx
333 movq %rcx,24(%rsi,%rdx,8)
334 movq 16(%rdi,%rdx,8),%rcx
335 movq %rcx,16(%rsi,%rdx,8)
336 movq 8(%rdi,%rdx,8),%rcx
337 movq %rcx,8(%rsi,%rdx,8)
338 movq (%rdi,%rdx,8),%rcx
339 movq %rcx,(%rsi,%rdx,8)
340 3: subq $4,%rdx
341 jge 2b
342 addq $4,%rdx
343 jg 1b
344 ret
345
346 # Support for void Copy::arrayof_conjoint_jlongs(jlong* from,
347 # jlong* to,
348 # size_t count)
349 # Equivalent to
350 # conjoint_jlongs_atomic
351 # arrayof_conjoint_oops
352 # conjoint_oops_atomic
353 #
354 # rdi - from
355 # rsi - to
356 # rdx - count, treated as ssize_t
357 #
358 .p2align 4,,15
359 ELF_TYPE(_Copy_arrayof_conjoint_jlongs,@function)
360 ELF_TYPE(_Copy_conjoint_jlongs_atomic,@function)
361 SYMBOL(_Copy_arrayof_conjoint_jlongs):
362 SYMBOL(_Copy_conjoint_jlongs_atomic):
363 cmpq %rdi,%rsi
364 leaq -8(%rdi,%rdx,8),%rax # from + count*8 - 8
365 jbe acl_CopyRight
366 cmpq %rax,%rsi
367 jbe acl_CopyLeft
368 acl_CopyRight:
369 leaq -8(%rsi,%rdx,8),%rcx # to + count*8 - 8
370 negq %rdx
371 jmp 3f
372 1: movq 8(%rax,%rdx,8),%rsi
373 movq %rsi,8(%rcx,%rdx,8)
374 addq $1,%rdx
375 jnz 1b
376 ret
377 .p2align 4,,15
378 2: movq -24(%rax,%rdx,8),%rsi
379 movq %rsi,-24(%rcx,%rdx,8)
380 movq -16(%rax,%rdx,8),%rsi
381 movq %rsi,-16(%rcx,%rdx,8)
382 movq -8(%rax,%rdx,8),%rsi
383 movq %rsi,-8(%rcx,%rdx,8)
384 movq (%rax,%rdx,8),%rsi
385 movq %rsi,(%rcx,%rdx,8)
386 3: addq $4,%rdx
387 jle 2b
388 subq $4,%rdx
389 jl 1b
390 ret
391 4: movq -8(%rdi,%rdx,8),%rcx
392 movq %rcx,-8(%rsi,%rdx,8)
393 subq $1,%rdx
394 jnz 4b
395 ret
396 .p2align 4,,15
397 5: movq 24(%rdi,%rdx,8),%rcx
398 movq %rcx,24(%rsi,%rdx,8)
399 movq 16(%rdi,%rdx,8),%rcx
400 movq %rcx,16(%rsi,%rdx,8)
401 movq 8(%rdi,%rdx,8),%rcx
402 movq %rcx,8(%rsi,%rdx,8)
403 movq (%rdi,%rdx,8),%rcx
404 movq %rcx,(%rsi,%rdx,8)
405 acl_CopyLeft:
406 subq $4,%rdx
407 jge 5b
408 addq $4,%rdx
409 jg 4b
410 ret