0
|
1 #
|
|
2 # Copyright 2004-2007 Sun Microsystems, Inc. All Rights Reserved.
|
|
3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
4 #
|
|
5 # This code is free software; you can redistribute it and/or modify it
|
|
6 # under the terms of the GNU General Public License version 2 only, as
|
|
7 # published by the Free Software Foundation.
|
|
8 #
|
|
9 # This code is distributed in the hope that it will be useful, but WITHOUT
|
|
10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
11 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
12 # version 2 for more details (a copy is included in the LICENSE file that
|
|
13 # accompanied this code).
|
|
14 #
|
|
15 # You should have received a copy of the GNU General Public License version
|
|
16 # 2 along with this work; if not, write to the Free Software Foundation,
|
|
17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
18 #
|
|
19 # Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
|
|
20 # CA 95054 USA or visit www.sun.com if you need additional information or
|
|
21 # have any questions.
|
|
22 #
|
|
23
|
|
24
|
|
25 # NOTE WELL! The _Copy functions are called directly
|
|
26 # from server-compiler-generated code via CallLeafNoFP,
|
|
27 # which means that they *must* either not use floating
|
|
28 # point or use it in the same manner as does the server
|
|
29 # compiler.
|
|
30
|
|
31 .globl _Copy_arrayof_conjoint_bytes
|
|
32 .globl _Copy_arrayof_conjoint_jshorts
|
|
33 .globl _Copy_conjoint_jshorts_atomic
|
|
34 .globl _Copy_arrayof_conjoint_jints
|
|
35 .globl _Copy_conjoint_jints_atomic
|
|
36 .globl _Copy_arrayof_conjoint_jlongs
|
|
37 .globl _Copy_conjoint_jlongs_atomic
|
|
38
|
|
39 .text
|
|
40
|
|
41 .globl SafeFetch32, Fetch32PFI, Fetch32Resume
|
|
42 .align 16
|
|
43 .type SafeFetch32,@function
|
|
44 // Prototype: int SafeFetch32 (int * Adr, int ErrValue)
|
|
45 SafeFetch32:
|
|
46 movl %esi, %eax
|
|
47 Fetch32PFI:
|
|
48 movl (%rdi), %eax
|
|
49 Fetch32Resume:
|
|
50 ret
|
|
51
|
|
52 .globl SafeFetchN, FetchNPFI, FetchNResume
|
|
53 .align 16
|
|
54 .type SafeFetchN,@function
|
|
55 // Prototype: intptr_t SafeFetchN (intptr_t * Adr, intptr_t ErrValue)
|
|
56 SafeFetchN:
|
|
57 movq %rsi, %rax
|
|
58 FetchNPFI:
|
|
59 movq (%rdi), %rax
|
|
60 FetchNResume:
|
|
61 ret
|
|
62
|
|
63 .globl SpinPause
|
|
64 .align 16
|
|
65 .type SpinPause,@function
|
|
66 SpinPause:
|
|
67 rep
|
|
68 nop
|
|
69 movq $1, %rax
|
|
70 ret
|
|
71
|
|
72 # Support for void Copy::arrayof_conjoint_bytes(void* from,
|
|
73 # void* to,
|
|
74 # size_t count)
|
|
75 # rdi - from
|
|
76 # rsi - to
|
|
77 # rdx - count, treated as ssize_t
|
|
78 #
|
|
79 .p2align 4,,15
|
|
80 .type _Copy_arrayof_conjoint_bytes,@function
|
|
81 _Copy_arrayof_conjoint_bytes:
|
|
82 movq %rdx,%r8 # byte count
|
|
83 shrq $3,%rdx # qword count
|
|
84 cmpq %rdi,%rsi
|
|
85 leaq -1(%rdi,%r8,1),%rax # from + bcount*1 - 1
|
|
86 jbe acb_CopyRight
|
|
87 cmpq %rax,%rsi
|
|
88 jbe acb_CopyLeft
|
|
89 acb_CopyRight:
|
|
90 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
|
|
91 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
|
|
92 negq %rdx
|
|
93 jmp 7f
|
|
94 .p2align 4,,15
|
|
95 1: movq 8(%rax,%rdx,8),%rsi
|
|
96 movq %rsi,8(%rcx,%rdx,8)
|
|
97 addq $1,%rdx
|
|
98 jnz 1b
|
|
99 2: testq $4,%r8 # check for trailing dword
|
|
100 jz 3f
|
|
101 movl 8(%rax),%esi # copy trailing dword
|
|
102 movl %esi,8(%rcx)
|
|
103 addq $4,%rax
|
|
104 addq $4,%rcx # original %rsi is trashed, so we
|
|
105 # can't use it as a base register
|
|
106 3: testq $2,%r8 # check for trailing word
|
|
107 jz 4f
|
|
108 movw 8(%rax),%si # copy trailing word
|
|
109 movw %si,8(%rcx)
|
|
110 addq $2,%rcx
|
|
111 4: testq $1,%r8 # check for trailing byte
|
|
112 jz 5f
|
|
113 movb -1(%rdi,%r8,1),%al # copy trailing byte
|
|
114 movb %al,8(%rcx)
|
|
115 5: ret
|
|
116 .p2align 4,,15
|
|
117 6: movq -24(%rax,%rdx,8),%rsi
|
|
118 movq %rsi,-24(%rcx,%rdx,8)
|
|
119 movq -16(%rax,%rdx,8),%rsi
|
|
120 movq %rsi,-16(%rcx,%rdx,8)
|
|
121 movq -8(%rax,%rdx,8),%rsi
|
|
122 movq %rsi,-8(%rcx,%rdx,8)
|
|
123 movq (%rax,%rdx,8),%rsi
|
|
124 movq %rsi,(%rcx,%rdx,8)
|
|
125 7: addq $4,%rdx
|
|
126 jle 6b
|
|
127 subq $4,%rdx
|
|
128 jl 1b
|
|
129 jmp 2b
|
|
130 acb_CopyLeft:
|
|
131 testq $1,%r8 # check for trailing byte
|
|
132 jz 1f
|
|
133 movb -1(%rdi,%r8,1),%cl # copy trailing byte
|
|
134 movb %cl,-1(%rsi,%r8,1)
|
|
135 subq $1,%r8 # adjust for possible trailing word
|
|
136 1: testq $2,%r8 # check for trailing word
|
|
137 jz 2f
|
|
138 movw -2(%rdi,%r8,1),%cx # copy trailing word
|
|
139 movw %cx,-2(%rsi,%r8,1)
|
|
140 2: testq $4,%r8 # check for trailing dword
|
|
141 jz 5f
|
|
142 movl (%rdi,%rdx,8),%ecx # copy trailing dword
|
|
143 movl %ecx,(%rsi,%rdx,8)
|
|
144 jmp 5f
|
|
145 .p2align 4,,15
|
|
146 3: movq -8(%rdi,%rdx,8),%rcx
|
|
147 movq %rcx,-8(%rsi,%rdx,8)
|
|
148 subq $1,%rdx
|
|
149 jnz 3b
|
|
150 ret
|
|
151 .p2align 4,,15
|
|
152 4: movq 24(%rdi,%rdx,8),%rcx
|
|
153 movq %rcx,24(%rsi,%rdx,8)
|
|
154 movq 16(%rdi,%rdx,8),%rcx
|
|
155 movq %rcx,16(%rsi,%rdx,8)
|
|
156 movq 8(%rdi,%rdx,8),%rcx
|
|
157 movq %rcx,8(%rsi,%rdx,8)
|
|
158 movq (%rdi,%rdx,8),%rcx
|
|
159 movq %rcx,(%rsi,%rdx,8)
|
|
160 5: subq $4,%rdx
|
|
161 jge 4b
|
|
162 addq $4,%rdx
|
|
163 jg 3b
|
|
164 ret
|
|
165
|
|
166 # Support for void Copy::arrayof_conjoint_jshorts(void* from,
|
|
167 # void* to,
|
|
168 # size_t count)
|
|
169 # Equivalent to
|
|
170 # conjoint_jshorts_atomic
|
|
171 #
|
|
172 # If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
|
|
173 # let the hardware handle it. The tow or four words within dwords
|
|
174 # or qwords that span cache line boundaries will still be loaded
|
|
175 # and stored atomically.
|
|
176 #
|
|
177 # rdi - from
|
|
178 # rsi - to
|
|
179 # rdx - count, treated as ssize_t
|
|
180 #
|
|
181 .p2align 4,,15
|
|
182 .type _Copy_arrayof_conjoint_jshorts,@function
|
|
183 .type _Copy_conjoint_jshorts_atomic,@function
|
|
184 _Copy_arrayof_conjoint_jshorts:
|
|
185 _Copy_conjoint_jshorts_atomic:
|
|
186 movq %rdx,%r8 # word count
|
|
187 shrq $2,%rdx # qword count
|
|
188 cmpq %rdi,%rsi
|
|
189 leaq -2(%rdi,%r8,2),%rax # from + wcount*2 - 2
|
|
190 jbe acs_CopyRight
|
|
191 cmpq %rax,%rsi
|
|
192 jbe acs_CopyLeft
|
|
193 acs_CopyRight:
|
|
194 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
|
|
195 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
|
|
196 negq %rdx
|
|
197 jmp 6f
|
|
198 1: movq 8(%rax,%rdx,8),%rsi
|
|
199 movq %rsi,8(%rcx,%rdx,8)
|
|
200 addq $1,%rdx
|
|
201 jnz 1b
|
|
202 2: testq $2,%r8 # check for trailing dword
|
|
203 jz 3f
|
|
204 movl 8(%rax),%esi # copy trailing dword
|
|
205 movl %esi,8(%rcx)
|
|
206 addq $4,%rcx # original %rsi is trashed, so we
|
|
207 # can't use it as a base register
|
|
208 3: testq $1,%r8 # check for trailing word
|
|
209 jz 4f
|
|
210 movw -2(%rdi,%r8,2),%si # copy trailing word
|
|
211 movw %si,8(%rcx)
|
|
212 4: ret
|
|
213 .p2align 4,,15
|
|
214 5: movq -24(%rax,%rdx,8),%rsi
|
|
215 movq %rsi,-24(%rcx,%rdx,8)
|
|
216 movq -16(%rax,%rdx,8),%rsi
|
|
217 movq %rsi,-16(%rcx,%rdx,8)
|
|
218 movq -8(%rax,%rdx,8),%rsi
|
|
219 movq %rsi,-8(%rcx,%rdx,8)
|
|
220 movq (%rax,%rdx,8),%rsi
|
|
221 movq %rsi,(%rcx,%rdx,8)
|
|
222 6: addq $4,%rdx
|
|
223 jle 5b
|
|
224 subq $4,%rdx
|
|
225 jl 1b
|
|
226 jmp 2b
|
|
227 acs_CopyLeft:
|
|
228 testq $1,%r8 # check for trailing word
|
|
229 jz 1f
|
|
230 movw -2(%rdi,%r8,2),%cx # copy trailing word
|
|
231 movw %cx,-2(%rsi,%r8,2)
|
|
232 1: testq $2,%r8 # check for trailing dword
|
|
233 jz 4f
|
|
234 movl (%rdi,%rdx,8),%ecx # copy trailing dword
|
|
235 movl %ecx,(%rsi,%rdx,8)
|
|
236 jmp 4f
|
|
237 2: movq -8(%rdi,%rdx,8),%rcx
|
|
238 movq %rcx,-8(%rsi,%rdx,8)
|
|
239 subq $1,%rdx
|
|
240 jnz 2b
|
|
241 ret
|
|
242 .p2align 4,,15
|
|
243 3: movq 24(%rdi,%rdx,8),%rcx
|
|
244 movq %rcx,24(%rsi,%rdx,8)
|
|
245 movq 16(%rdi,%rdx,8),%rcx
|
|
246 movq %rcx,16(%rsi,%rdx,8)
|
|
247 movq 8(%rdi,%rdx,8),%rcx
|
|
248 movq %rcx,8(%rsi,%rdx,8)
|
|
249 movq (%rdi,%rdx,8),%rcx
|
|
250 movq %rcx,(%rsi,%rdx,8)
|
|
251 4: subq $4,%rdx
|
|
252 jge 3b
|
|
253 addq $4,%rdx
|
|
254 jg 2b
|
|
255 ret
|
|
256
|
|
257 # Support for void Copy::arrayof_conjoint_jints(jint* from,
|
|
258 # jint* to,
|
|
259 # size_t count)
|
|
260 # Equivalent to
|
|
261 # conjoint_jints_atomic
|
|
262 #
|
|
263 # If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
|
|
264 # the hardware handle it. The two dwords within qwords that span
|
|
265 # cache line boundaries will still be loaded and stored atomically.
|
|
266 #
|
|
267 # rdi - from
|
|
268 # rsi - to
|
|
269 # rdx - count, treated as ssize_t
|
|
270 #
|
|
271 .p2align 4,,15
|
|
272 .type _Copy_arrayof_conjoint_jints,@function
|
|
273 .type _Copy_conjoint_jints_atomic,@function
|
|
274 _Copy_arrayof_conjoint_jints:
|
|
275 _Copy_conjoint_jints_atomic:
|
|
276 movq %rdx,%r8 # dword count
|
|
277 shrq %rdx # qword count
|
|
278 cmpq %rdi,%rsi
|
|
279 leaq -4(%rdi,%r8,4),%rax # from + dcount*4 - 4
|
|
280 jbe aci_CopyRight
|
|
281 cmpq %rax,%rsi
|
|
282 jbe aci_CopyLeft
|
|
283 aci_CopyRight:
|
|
284 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
|
|
285 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
|
|
286 negq %rdx
|
|
287 jmp 5f
|
|
288 .p2align 4,,15
|
|
289 1: movq 8(%rax,%rdx,8),%rsi
|
|
290 movq %rsi,8(%rcx,%rdx,8)
|
|
291 addq $1,%rdx
|
|
292 jnz 1b
|
|
293 2: testq $1,%r8 # check for trailing dword
|
|
294 jz 3f
|
|
295 movl 8(%rax),%esi # copy trailing dword
|
|
296 movl %esi,8(%rcx)
|
|
297 3: ret
|
|
298 .p2align 4,,15
|
|
299 4: movq -24(%rax,%rdx,8),%rsi
|
|
300 movq %rsi,-24(%rcx,%rdx,8)
|
|
301 movq -16(%rax,%rdx,8),%rsi
|
|
302 movq %rsi,-16(%rcx,%rdx,8)
|
|
303 movq -8(%rax,%rdx,8),%rsi
|
|
304 movq %rsi,-8(%rcx,%rdx,8)
|
|
305 movq (%rax,%rdx,8),%rsi
|
|
306 movq %rsi,(%rcx,%rdx,8)
|
|
307 5: addq $4,%rdx
|
|
308 jle 4b
|
|
309 subq $4,%rdx
|
|
310 jl 1b
|
|
311 jmp 2b
|
|
312 aci_CopyLeft:
|
|
313 testq $1,%r8 # check for trailing dword
|
|
314 jz 3f
|
|
315 movl -4(%rdi,%r8,4),%ecx # copy trailing dword
|
|
316 movl %ecx,-4(%rsi,%r8,4)
|
|
317 jmp 3f
|
|
318 1: movq -8(%rdi,%rdx,8),%rcx
|
|
319 movq %rcx,-8(%rsi,%rdx,8)
|
|
320 subq $1,%rdx
|
|
321 jnz 1b
|
|
322 ret
|
|
323 .p2align 4,,15
|
|
324 2: movq 24(%rdi,%rdx,8),%rcx
|
|
325 movq %rcx,24(%rsi,%rdx,8)
|
|
326 movq 16(%rdi,%rdx,8),%rcx
|
|
327 movq %rcx,16(%rsi,%rdx,8)
|
|
328 movq 8(%rdi,%rdx,8),%rcx
|
|
329 movq %rcx,8(%rsi,%rdx,8)
|
|
330 movq (%rdi,%rdx,8),%rcx
|
|
331 movq %rcx,(%rsi,%rdx,8)
|
|
332 3: subq $4,%rdx
|
|
333 jge 2b
|
|
334 addq $4,%rdx
|
|
335 jg 1b
|
|
336 ret
|
|
337
|
|
338 # Support for void Copy::arrayof_conjoint_jlongs(jlong* from,
|
|
339 # jlong* to,
|
|
340 # size_t count)
|
|
341 # Equivalent to
|
|
342 # conjoint_jlongs_atomic
|
|
343 # arrayof_conjoint_oops
|
|
344 # conjoint_oops_atomic
|
|
345 #
|
|
346 # rdi - from
|
|
347 # rsi - to
|
|
348 # rdx - count, treated as ssize_t
|
|
349 #
|
|
350 .p2align 4,,15
|
|
351 .type _Copy_arrayof_conjoint_jlongs,@function
|
|
352 .type _Copy_conjoint_jlongs_atomic,@function
|
|
353 _Copy_arrayof_conjoint_jlongs:
|
|
354 _Copy_conjoint_jlongs_atomic:
|
|
355 cmpq %rdi,%rsi
|
|
356 leaq -8(%rdi,%rdx,8),%rax # from + count*8 - 8
|
|
357 jbe acl_CopyRight
|
|
358 cmpq %rax,%rsi
|
|
359 jbe acl_CopyLeft
|
|
360 acl_CopyRight:
|
|
361 leaq -8(%rsi,%rdx,8),%rcx # to + count*8 - 8
|
|
362 negq %rdx
|
|
363 jmp 3f
|
|
364 1: movq 8(%rax,%rdx,8),%rsi
|
|
365 movq %rsi,8(%rcx,%rdx,8)
|
|
366 addq $1,%rdx
|
|
367 jnz 1b
|
|
368 ret
|
|
369 .p2align 4,,15
|
|
370 2: movq -24(%rax,%rdx,8),%rsi
|
|
371 movq %rsi,-24(%rcx,%rdx,8)
|
|
372 movq -16(%rax,%rdx,8),%rsi
|
|
373 movq %rsi,-16(%rcx,%rdx,8)
|
|
374 movq -8(%rax,%rdx,8),%rsi
|
|
375 movq %rsi,-8(%rcx,%rdx,8)
|
|
376 movq (%rax,%rdx,8),%rsi
|
|
377 movq %rsi,(%rcx,%rdx,8)
|
|
378 3: addq $4,%rdx
|
|
379 jle 2b
|
|
380 subq $4,%rdx
|
|
381 jl 1b
|
|
382 ret
|
|
383 4: movq -8(%rdi,%rdx,8),%rcx
|
|
384 movq %rcx,-8(%rsi,%rdx,8)
|
|
385 subq $1,%rdx
|
|
386 jnz 4b
|
|
387 ret
|
|
388 .p2align 4,,15
|
|
389 5: movq 24(%rdi,%rdx,8),%rcx
|
|
390 movq %rcx,24(%rsi,%rdx,8)
|
|
391 movq 16(%rdi,%rdx,8),%rcx
|
|
392 movq %rcx,16(%rsi,%rdx,8)
|
|
393 movq 8(%rdi,%rdx,8),%rcx
|
|
394 movq %rcx,8(%rsi,%rdx,8)
|
|
395 movq (%rdi,%rdx,8),%rcx
|
|
396 movq %rcx,(%rsi,%rdx,8)
|
|
397 acl_CopyLeft:
|
|
398 subq $4,%rdx
|
|
399 jge 5b
|
|
400 addq $4,%rdx
|
|
401 jg 4b
|
|
402 ret
|