Mercurial > hg > truffle
comparison src/os_cpu/bsd_x86/vm/bsd_x86_32.s @ 3960:f08d439fab8c
7089790: integrate bsd-port changes
Reviewed-by: kvn, twisti, jrose
Contributed-by: Kurt Miller <kurt@intricatesoftware.com>, Greg Lewis <glewis@eyesbeyond.com>, Jung-uk Kim <jkim@freebsd.org>, Christos Zoulas <christos@zoulas.com>, Landon Fuller <landonf@plausible.coop>, The FreeBSD Foundation <board@freebsdfoundation.org>, Michael Franz <mvfranz@gmail.com>, Roger Hoover <rhoover@apple.com>, Alexander Strange <astrange@apple.com>
author | never |
---|---|
date | Sun, 25 Sep 2011 16:03:29 -0700 |
parents | |
children | 436b4a3231bf |
comparison
equal
deleted
inserted
replaced
3959:eda6988c0d81 | 3960:f08d439fab8c |
---|---|
1 # | |
2 # Copyright (c) 2004, 2011, Oracle and/or its affiliates. All rights reserved. | |
3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
4 # | |
5 # This code is free software; you can redistribute it and/or modify it | |
6 # under the terms of the GNU General Public License version 2 only, as | |
7 # published by the Free Software Foundation. | |
8 # | |
9 # This code is distributed in the hope that it will be useful, but WITHOUT | |
10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 # version 2 for more details (a copy is included in the LICENSE file that | |
13 # accompanied this code). | |
14 # | |
15 # You should have received a copy of the GNU General Public License version | |
16 # 2 along with this work; if not, write to the Free Software Foundation, | |
17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 # | |
19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | |
20 # or visit www.oracle.com if you need additional information or have any | |
21 # questions. | |
22 # | |
23 | |
24 | |
25 #ifdef __APPLE__ | |
26 # Darwin uses _ prefixed global symbols | |
27 #define SYMBOL(s) _ ## s | |
28 #define ELF_TYPE(name, description) | |
29 #else | |
30 #define SYMBOL(s) s | |
31 #define ELF_TYPE(name, description) .type name,description | |
32 #endif | |
33 | |
34 .globl SYMBOL(fixcw) | |
35 | |
36 # NOTE WELL! The _Copy functions are called directly | |
37 # from server-compiler-generated code via CallLeafNoFP, | |
38 # which means that they *must* either not use floating | |
39 # point or use it in the same manner as does the server | |
40 # compiler. | |
41 | |
42 .globl SYMBOL(_Copy_conjoint_bytes) | |
43 .globl SYMBOL(_Copy_arrayof_conjoint_bytes) | |
44 .globl SYMBOL(_Copy_conjoint_jshorts_atomic) | |
45 .globl SYMBOL(_Copy_arrayof_conjoint_jshorts) | |
46 .globl SYMBOL(_Copy_conjoint_jints_atomic) | |
47 .globl SYMBOL(_Copy_arrayof_conjoint_jints) | |
48 .globl SYMBOL(_Copy_conjoint_jlongs_atomic) | |
49 .globl SYMBOL(_mmx_Copy_arrayof_conjoint_jshorts) | |
50 | |
51 .globl SYMBOL(_Atomic_cmpxchg_long) | |
52 .globl SYMBOL(_Atomic_move_long) | |
53 | |
54 .text | |
55 | |
56 # Support for void os::Solaris::init_thread_fpu_state() in os_solaris_i486.cpp | |
57 # Set fpu to 53 bit precision. This happens too early to use a stub. | |
58 # ported from solaris_x86_32.s | |
59 .p2align 4,,15 | |
60 SYMBOL(fixcw): | |
61 pushl $0x27f | |
62 fldcw 0(%esp) | |
63 popl %eax | |
64 ret | |
65 | |
66 .globl SYMBOL(SafeFetch32), SYMBOL(Fetch32PFI), SYMBOL(Fetch32Resume) | |
67 .globl SYMBOL(SafeFetchN) | |
68 ## TODO: avoid exposing Fetch32PFI and Fetch32Resume. | |
69 ## Instead, the signal handler would call a new SafeFetchTriage(FaultingEIP) | |
70 ## routine to vet the address. If the address is the faulting LD then | |
71 ## SafeFetchTriage() would return the resume-at EIP, otherwise null. | |
72 ELF_TYPE(SafeFetch32,@function) | |
73 .p2align 4,,15 | |
74 SYMBOL(SafeFetch32): | |
75 SYMBOL(SafeFetchN): | |
76 movl 0x8(%esp), %eax | |
77 movl 0x4(%esp), %ecx | |
78 SYMBOL(Fetch32PFI): | |
79 movl (%ecx), %eax | |
80 SYMBOL(Fetch32Resume): | |
81 ret | |
82 | |
83 | |
84 .globl SYMBOL(SpinPause) | |
85 ELF_TYPE(SpinPause,@function) | |
86 .p2align 4,,15 | |
87 SYMBOL(SpinPause): | |
88 rep | |
89 nop | |
90 movl $1, %eax | |
91 ret | |
92 | |
93 # Support for void Copy::conjoint_bytes(void* from, | |
94 # void* to, | |
95 # size_t count) | |
96 .p2align 4,,15 | |
97 ELF_TYPE(_Copy_conjoint_bytes,@function) | |
98 SYMBOL(_Copy_conjoint_bytes): | |
99 pushl %esi | |
100 movl 4+12(%esp),%ecx # count | |
101 pushl %edi | |
102 movl 8+ 4(%esp),%esi # from | |
103 movl 8+ 8(%esp),%edi # to | |
104 cmpl %esi,%edi | |
105 leal -1(%esi,%ecx),%eax # from + count - 1 | |
106 jbe cb_CopyRight | |
107 cmpl %eax,%edi | |
108 jbe cb_CopyLeft | |
109 # copy from low to high | |
110 cb_CopyRight: | |
111 cmpl $3,%ecx | |
112 jbe 5f # <= 3 bytes | |
113 # align source address at dword address boundary | |
114 movl %ecx,%eax # original count | |
115 movl $4,%ecx | |
116 subl %esi,%ecx | |
117 andl $3,%ecx # prefix byte count | |
118 jz 1f # no prefix | |
119 subl %ecx,%eax # byte count less prefix | |
120 # copy prefix | |
121 subl %esi,%edi | |
122 0: movb (%esi),%dl | |
123 movb %dl,(%edi,%esi,1) | |
124 addl $1,%esi | |
125 subl $1,%ecx | |
126 jnz 0b | |
127 addl %esi,%edi | |
128 1: movl %eax,%ecx # byte count less prefix | |
129 shrl $2,%ecx # dword count | |
130 jz 4f # no dwords to move | |
131 cmpl $32,%ecx | |
132 jbe 2f # <= 32 dwords | |
133 # copy aligned dwords | |
134 rep; smovl | |
135 jmp 4f | |
136 # copy aligned dwords | |
137 2: subl %esi,%edi | |
138 .p2align 4,,15 | |
139 3: movl (%esi),%edx | |
140 movl %edx,(%edi,%esi,1) | |
141 addl $4,%esi | |
142 subl $1,%ecx | |
143 jnz 3b | |
144 addl %esi,%edi | |
145 4: movl %eax,%ecx # byte count less prefix | |
146 5: andl $3,%ecx # suffix byte count | |
147 jz 7f # no suffix | |
148 # copy suffix | |
149 xorl %eax,%eax | |
150 6: movb (%esi,%eax,1),%dl | |
151 movb %dl,(%edi,%eax,1) | |
152 addl $1,%eax | |
153 subl $1,%ecx | |
154 jnz 6b | |
155 7: popl %edi | |
156 popl %esi | |
157 ret | |
158 # copy from high to low | |
159 cb_CopyLeft: | |
160 std | |
161 leal -4(%edi,%ecx),%edi # to + count - 4 | |
162 movl %eax,%esi # from + count - 1 | |
163 movl %ecx,%eax | |
164 subl $3,%esi # from + count - 4 | |
165 cmpl $3,%ecx | |
166 jbe 5f # <= 3 bytes | |
167 1: shrl $2,%ecx # dword count | |
168 jz 4f # no dwords to move | |
169 cmpl $32,%ecx | |
170 ja 3f # > 32 dwords | |
171 # copy dwords, aligned or not | |
172 subl %esi,%edi | |
173 .p2align 4,,15 | |
174 2: movl (%esi),%edx | |
175 movl %edx,(%edi,%esi,1) | |
176 subl $4,%esi | |
177 subl $1,%ecx | |
178 jnz 2b | |
179 addl %esi,%edi | |
180 jmp 4f | |
181 # copy dwords, aligned or not | |
182 3: rep; smovl | |
183 4: movl %eax,%ecx # byte count | |
184 5: andl $3,%ecx # suffix byte count | |
185 jz 7f # no suffix | |
186 # copy suffix | |
187 subl %esi,%edi | |
188 addl $3,%esi | |
189 6: movb (%esi),%dl | |
190 movb %dl,(%edi,%esi,1) | |
191 subl $1,%esi | |
192 subl $1,%ecx | |
193 jnz 6b | |
194 7: cld | |
195 popl %edi | |
196 popl %esi | |
197 ret | |
198 | |
199 # Support for void Copy::arrayof_conjoint_bytes(void* from, | |
200 # void* to, | |
201 # size_t count) | |
202 # | |
203 # Same as _Copy_conjoint_bytes, except no source alignment check. | |
204 .p2align 4,,15 | |
205 ELF_TYPE(_Copy_arrayof_conjoint_bytes,@function) | |
206 SYMBOL(_Copy_arrayof_conjoint_bytes): | |
207 pushl %esi | |
208 movl 4+12(%esp),%ecx # count | |
209 pushl %edi | |
210 movl 8+ 4(%esp),%esi # from | |
211 movl 8+ 8(%esp),%edi # to | |
212 cmpl %esi,%edi | |
213 leal -1(%esi,%ecx),%eax # from + count - 1 | |
214 jbe acb_CopyRight | |
215 cmpl %eax,%edi | |
216 jbe acb_CopyLeft | |
217 # copy from low to high | |
218 acb_CopyRight: | |
219 cmpl $3,%ecx | |
220 jbe 5f | |
221 1: movl %ecx,%eax | |
222 shrl $2,%ecx | |
223 jz 4f | |
224 cmpl $32,%ecx | |
225 ja 3f | |
226 # copy aligned dwords | |
227 subl %esi,%edi | |
228 .p2align 4,,15 | |
229 2: movl (%esi),%edx | |
230 movl %edx,(%edi,%esi,1) | |
231 addl $4,%esi | |
232 subl $1,%ecx | |
233 jnz 2b | |
234 addl %esi,%edi | |
235 jmp 4f | |
236 # copy aligned dwords | |
237 3: rep; smovl | |
238 4: movl %eax,%ecx | |
239 5: andl $3,%ecx | |
240 jz 7f | |
241 # copy suffix | |
242 xorl %eax,%eax | |
243 6: movb (%esi,%eax,1),%dl | |
244 movb %dl,(%edi,%eax,1) | |
245 addl $1,%eax | |
246 subl $1,%ecx | |
247 jnz 6b | |
248 7: popl %edi | |
249 popl %esi | |
250 ret | |
251 acb_CopyLeft: | |
252 std | |
253 leal -4(%edi,%ecx),%edi # to + count - 4 | |
254 movl %eax,%esi # from + count - 1 | |
255 movl %ecx,%eax | |
256 subl $3,%esi # from + count - 4 | |
257 cmpl $3,%ecx | |
258 jbe 5f | |
259 1: shrl $2,%ecx | |
260 jz 4f | |
261 cmpl $32,%ecx | |
262 jbe 2f # <= 32 dwords | |
263 rep; smovl | |
264 jmp 4f | |
265 .=.+8 | |
266 2: subl %esi,%edi | |
267 .p2align 4,,15 | |
268 3: movl (%esi),%edx | |
269 movl %edx,(%edi,%esi,1) | |
270 subl $4,%esi | |
271 subl $1,%ecx | |
272 jnz 3b | |
273 addl %esi,%edi | |
274 4: movl %eax,%ecx | |
275 5: andl $3,%ecx | |
276 jz 7f | |
277 subl %esi,%edi | |
278 addl $3,%esi | |
279 6: movb (%esi),%dl | |
280 movb %dl,(%edi,%esi,1) | |
281 subl $1,%esi | |
282 subl $1,%ecx | |
283 jnz 6b | |
284 7: cld | |
285 popl %edi | |
286 popl %esi | |
287 ret | |
288 | |
289 # Support for void Copy::conjoint_jshorts_atomic(void* from, | |
290 # void* to, | |
291 # size_t count) | |
292 .p2align 4,,15 | |
293 ELF_TYPE(_Copy_conjoint_jshorts_atomic,@function) | |
294 SYMBOL(_Copy_conjoint_jshorts_atomic): | |
295 pushl %esi | |
296 movl 4+12(%esp),%ecx # count | |
297 pushl %edi | |
298 movl 8+ 4(%esp),%esi # from | |
299 movl 8+ 8(%esp),%edi # to | |
300 cmpl %esi,%edi | |
301 leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 | |
302 jbe cs_CopyRight | |
303 cmpl %eax,%edi | |
304 jbe cs_CopyLeft | |
305 # copy from low to high | |
306 cs_CopyRight: | |
307 # align source address at dword address boundary | |
308 movl %esi,%eax # original from | |
309 andl $3,%eax # either 0 or 2 | |
310 jz 1f # no prefix | |
311 # copy prefix | |
312 subl $1,%ecx | |
313 jl 5f # zero count | |
314 movw (%esi),%dx | |
315 movw %dx,(%edi) | |
316 addl %eax,%esi # %eax == 2 | |
317 addl %eax,%edi | |
318 1: movl %ecx,%eax # word count less prefix | |
319 sarl %ecx # dword count | |
320 jz 4f # no dwords to move | |
321 cmpl $32,%ecx | |
322 jbe 2f # <= 32 dwords | |
323 # copy aligned dwords | |
324 rep; smovl | |
325 jmp 4f | |
326 # copy aligned dwords | |
327 2: subl %esi,%edi | |
328 .p2align 4,,15 | |
329 3: movl (%esi),%edx | |
330 movl %edx,(%edi,%esi,1) | |
331 addl $4,%esi | |
332 subl $1,%ecx | |
333 jnz 3b | |
334 addl %esi,%edi | |
335 4: andl $1,%eax # suffix count | |
336 jz 5f # no suffix | |
337 # copy suffix | |
338 movw (%esi),%dx | |
339 movw %dx,(%edi) | |
340 5: popl %edi | |
341 popl %esi | |
342 ret | |
343 # copy from high to low | |
344 cs_CopyLeft: | |
345 std | |
346 leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 | |
347 movl %eax,%esi # from + count*2 - 2 | |
348 movl %ecx,%eax | |
349 subl $2,%esi # from + count*2 - 4 | |
350 1: sarl %ecx # dword count | |
351 jz 4f # no dwords to move | |
352 cmpl $32,%ecx | |
353 ja 3f # > 32 dwords | |
354 subl %esi,%edi | |
355 .p2align 4,,15 | |
356 2: movl (%esi),%edx | |
357 movl %edx,(%edi,%esi,1) | |
358 subl $4,%esi | |
359 subl $1,%ecx | |
360 jnz 2b | |
361 addl %esi,%edi | |
362 jmp 4f | |
363 3: rep; smovl | |
364 4: andl $1,%eax # suffix count | |
365 jz 5f # no suffix | |
366 # copy suffix | |
367 addl $2,%esi | |
368 addl $2,%edi | |
369 movw (%esi),%dx | |
370 movw %dx,(%edi) | |
371 5: cld | |
372 popl %edi | |
373 popl %esi | |
374 ret | |
375 | |
376 # Support for void Copy::arrayof_conjoint_jshorts(void* from, | |
377 # void* to, | |
378 # size_t count) | |
379 .p2align 4,,15 | |
380 ELF_TYPE(_Copy_arrayof_conjoint_jshorts,@function) | |
381 SYMBOL(_Copy_arrayof_conjoint_jshorts): | |
382 pushl %esi | |
383 movl 4+12(%esp),%ecx # count | |
384 pushl %edi | |
385 movl 8+ 4(%esp),%esi # from | |
386 movl 8+ 8(%esp),%edi # to | |
387 cmpl %esi,%edi | |
388 leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 | |
389 jbe acs_CopyRight | |
390 cmpl %eax,%edi | |
391 jbe acs_CopyLeft | |
392 acs_CopyRight: | |
393 movl %ecx,%eax # word count | |
394 sarl %ecx # dword count | |
395 jz 4f # no dwords to move | |
396 cmpl $32,%ecx | |
397 jbe 2f # <= 32 dwords | |
398 # copy aligned dwords | |
399 rep; smovl | |
400 jmp 4f | |
401 # copy aligned dwords | |
402 .=.+5 | |
403 2: subl %esi,%edi | |
404 .p2align 4,,15 | |
405 3: movl (%esi),%edx | |
406 movl %edx,(%edi,%esi,1) | |
407 addl $4,%esi | |
408 subl $1,%ecx | |
409 jnz 3b | |
410 addl %esi,%edi | |
411 4: andl $1,%eax # suffix count | |
412 jz 5f # no suffix | |
413 # copy suffix | |
414 movw (%esi),%dx | |
415 movw %dx,(%edi) | |
416 5: popl %edi | |
417 popl %esi | |
418 ret | |
419 acs_CopyLeft: | |
420 std | |
421 leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 | |
422 movl %eax,%esi # from + count*2 - 2 | |
423 movl %ecx,%eax | |
424 subl $2,%esi # from + count*2 - 4 | |
425 sarl %ecx # dword count | |
426 jz 4f # no dwords to move | |
427 cmpl $32,%ecx | |
428 ja 3f # > 32 dwords | |
429 subl %esi,%edi | |
430 .p2align 4,,15 | |
431 2: movl (%esi),%edx | |
432 movl %edx,(%edi,%esi,1) | |
433 subl $4,%esi | |
434 subl $1,%ecx | |
435 jnz 2b | |
436 addl %esi,%edi | |
437 jmp 4f | |
438 3: rep; smovl | |
439 4: andl $1,%eax # suffix count | |
440 jz 5f # no suffix | |
441 # copy suffix | |
442 addl $2,%esi | |
443 addl $2,%edi | |
444 movw (%esi),%dx | |
445 movw %dx,(%edi) | |
446 5: cld | |
447 popl %edi | |
448 popl %esi | |
449 ret | |
450 | |
451 # Support for void Copy::conjoint_jints_atomic(void* from, | |
452 # void* to, | |
453 # size_t count) | |
454 # Equivalent to | |
455 # arrayof_conjoint_jints | |
456 .p2align 4,,15 | |
457 ELF_TYPE(_Copy_conjoint_jints_atomic,@function) | |
458 ELF_TYPE(_Copy_arrayof_conjoint_jints,@function) | |
459 SYMBOL(_Copy_conjoint_jints_atomic): | |
460 SYMBOL(_Copy_arrayof_conjoint_jints): | |
461 pushl %esi | |
462 movl 4+12(%esp),%ecx # count | |
463 pushl %edi | |
464 movl 8+ 4(%esp),%esi # from | |
465 movl 8+ 8(%esp),%edi # to | |
466 cmpl %esi,%edi | |
467 leal -4(%esi,%ecx,4),%eax # from + count*4 - 4 | |
468 jbe ci_CopyRight | |
469 cmpl %eax,%edi | |
470 jbe ci_CopyLeft | |
471 ci_CopyRight: | |
472 cmpl $32,%ecx | |
473 jbe 2f # <= 32 dwords | |
474 rep; smovl | |
475 popl %edi | |
476 popl %esi | |
477 ret | |
478 .=.+10 | |
479 2: subl %esi,%edi | |
480 jmp 4f | |
481 .p2align 4,,15 | |
482 3: movl (%esi),%edx | |
483 movl %edx,(%edi,%esi,1) | |
484 addl $4,%esi | |
485 4: subl $1,%ecx | |
486 jge 3b | |
487 popl %edi | |
488 popl %esi | |
489 ret | |
490 ci_CopyLeft: | |
491 std | |
492 leal -4(%edi,%ecx,4),%edi # to + count*4 - 4 | |
493 cmpl $32,%ecx | |
494 ja 4f # > 32 dwords | |
495 subl %eax,%edi # eax == from + count*4 - 4 | |
496 jmp 3f | |
497 .p2align 4,,15 | |
498 2: movl (%eax),%edx | |
499 movl %edx,(%edi,%eax,1) | |
500 subl $4,%eax | |
501 3: subl $1,%ecx | |
502 jge 2b | |
503 cld | |
504 popl %edi | |
505 popl %esi | |
506 ret | |
507 4: movl %eax,%esi # from + count*4 - 4 | |
508 rep; smovl | |
509 cld | |
510 popl %edi | |
511 popl %esi | |
512 ret | |
513 | |
514 # Support for void Copy::conjoint_jlongs_atomic(jlong* from, | |
515 # jlong* to, | |
516 # size_t count) | |
517 # | |
518 # 32-bit | |
519 # | |
520 # count treated as signed | |
521 # | |
522 # // if (from > to) { | |
523 # while (--count >= 0) { | |
524 # *to++ = *from++; | |
525 # } | |
526 # } else { | |
527 # while (--count >= 0) { | |
528 # to[count] = from[count]; | |
529 # } | |
530 # } | |
531 .p2align 4,,15 | |
532 ELF_TYPE(_Copy_conjoint_jlongs_atomic,@function) | |
533 SYMBOL(_Copy_conjoint_jlongs_atomic): | |
534 movl 4+8(%esp),%ecx # count | |
535 movl 4+0(%esp),%eax # from | |
536 movl 4+4(%esp),%edx # to | |
537 cmpl %eax,%edx | |
538 jae cla_CopyLeft | |
539 cla_CopyRight: | |
540 subl %eax,%edx | |
541 jmp 2f | |
542 .p2align 4,,15 | |
543 1: fildll (%eax) | |
544 fistpll (%edx,%eax,1) | |
545 addl $8,%eax | |
546 2: subl $1,%ecx | |
547 jge 1b | |
548 ret | |
549 .p2align 4,,15 | |
550 3: fildll (%eax,%ecx,8) | |
551 fistpll (%edx,%ecx,8) | |
552 cla_CopyLeft: | |
553 subl $1,%ecx | |
554 jge 3b | |
555 ret | |
556 | |
557 # Support for void Copy::arrayof_conjoint_jshorts(void* from, | |
558 # void* to, | |
559 # size_t count) | |
560 .p2align 4,,15 | |
561 ELF_TYPE(_mmx_Copy_arrayof_conjoint_jshorts,@function) | |
562 SYMBOL(_mmx_Copy_arrayof_conjoint_jshorts): | |
563 pushl %esi | |
564 movl 4+12(%esp),%ecx | |
565 pushl %edi | |
566 movl 8+ 4(%esp),%esi | |
567 movl 8+ 8(%esp),%edi | |
568 cmpl %esi,%edi | |
569 leal -2(%esi,%ecx,2),%eax | |
570 jbe mmx_acs_CopyRight | |
571 cmpl %eax,%edi | |
572 jbe mmx_acs_CopyLeft | |
573 mmx_acs_CopyRight: | |
574 movl %ecx,%eax | |
575 sarl %ecx | |
576 je 5f | |
577 cmpl $33,%ecx | |
578 jae 3f | |
579 1: subl %esi,%edi | |
580 .p2align 4,,15 | |
581 2: movl (%esi),%edx | |
582 movl %edx,(%edi,%esi,1) | |
583 addl $4,%esi | |
584 subl $1,%ecx | |
585 jnz 2b | |
586 addl %esi,%edi | |
587 jmp 5f | |
588 3: smovl # align to 8 bytes, we know we are 4 byte aligned to start | |
589 subl $1,%ecx | |
590 4: .p2align 4,,15 | |
591 movq 0(%esi),%mm0 | |
592 addl $64,%edi | |
593 movq 8(%esi),%mm1 | |
594 subl $16,%ecx | |
595 movq 16(%esi),%mm2 | |
596 movq %mm0,-64(%edi) | |
597 movq 24(%esi),%mm0 | |
598 movq %mm1,-56(%edi) | |
599 movq 32(%esi),%mm1 | |
600 movq %mm2,-48(%edi) | |
601 movq 40(%esi),%mm2 | |
602 movq %mm0,-40(%edi) | |
603 movq 48(%esi),%mm0 | |
604 movq %mm1,-32(%edi) | |
605 movq 56(%esi),%mm1 | |
606 movq %mm2,-24(%edi) | |
607 movq %mm0,-16(%edi) | |
608 addl $64,%esi | |
609 movq %mm1,-8(%edi) | |
610 cmpl $16,%ecx | |
611 jge 4b | |
612 emms | |
613 testl %ecx,%ecx | |
614 ja 1b | |
615 5: andl $1,%eax | |
616 je 7f | |
617 6: movw (%esi),%dx | |
618 movw %dx,(%edi) | |
619 7: popl %edi | |
620 popl %esi | |
621 ret | |
622 mmx_acs_CopyLeft: | |
623 std | |
624 leal -4(%edi,%ecx,2),%edi | |
625 movl %eax,%esi | |
626 movl %ecx,%eax | |
627 subl $2,%esi | |
628 sarl %ecx | |
629 je 4f | |
630 cmpl $32,%ecx | |
631 ja 3f | |
632 subl %esi,%edi | |
633 .p2align 4,,15 | |
634 2: movl (%esi),%edx | |
635 movl %edx,(%edi,%esi,1) | |
636 subl $4,%esi | |
637 subl $1,%ecx | |
638 jnz 2b | |
639 addl %esi,%edi | |
640 jmp 4f | |
641 3: rep; smovl | |
642 4: andl $1,%eax | |
643 je 6f | |
644 addl $2,%esi | |
645 addl $2,%edi | |
646 5: movw (%esi),%dx | |
647 movw %dx,(%edi) | |
648 6: cld | |
649 popl %edi | |
650 popl %esi | |
651 ret | |
652 | |
653 | |
654 # Support for jlong Atomic::cmpxchg(jlong exchange_value, | |
655 # volatile jlong* dest, | |
656 # jlong compare_value, | |
657 # bool is_MP) | |
658 # | |
659 .p2align 4,,15 | |
660 ELF_TYPE(_Atomic_cmpxchg_long,@function) | |
661 SYMBOL(_Atomic_cmpxchg_long): | |
662 # 8(%esp) : return PC | |
663 pushl %ebx # 4(%esp) : old %ebx | |
664 pushl %edi # 0(%esp) : old %edi | |
665 movl 12(%esp), %ebx # 12(%esp) : exchange_value (low) | |
666 movl 16(%esp), %ecx # 16(%esp) : exchange_value (high) | |
667 movl 24(%esp), %eax # 24(%esp) : compare_value (low) | |
668 movl 28(%esp), %edx # 28(%esp) : compare_value (high) | |
669 movl 20(%esp), %edi # 20(%esp) : dest | |
670 cmpl $0, 32(%esp) # 32(%esp) : is_MP | |
671 je 1f | |
672 lock | |
673 1: cmpxchg8b (%edi) | |
674 popl %edi | |
675 popl %ebx | |
676 ret | |
677 | |
678 | |
679 # Support for jlong Atomic::load and Atomic::store. | |
680 # void _Atomic_move_long(volatile jlong* src, volatile jlong* dst) | |
681 .p2align 4,,15 | |
682 ELF_TYPE(_Atomic_move_long,@function) | |
683 SYMBOL(_Atomic_move_long): | |
684 movl 4(%esp), %eax # src | |
685 fildll (%eax) | |
686 movl 8(%esp), %eax # dest | |
687 fistpll (%eax) | |
688 ret | |
689 |