comparison src/os_cpu/bsd_x86/vm/bsd_x86_32.s @ 3960:f08d439fab8c

7089790: integrate bsd-port changes Reviewed-by: kvn, twisti, jrose Contributed-by: Kurt Miller <kurt@intricatesoftware.com>, Greg Lewis <glewis@eyesbeyond.com>, Jung-uk Kim <jkim@freebsd.org>, Christos Zoulas <christos@zoulas.com>, Landon Fuller <landonf@plausible.coop>, The FreeBSD Foundation <board@freebsdfoundation.org>, Michael Franz <mvfranz@gmail.com>, Roger Hoover <rhoover@apple.com>, Alexander Strange <astrange@apple.com>
author never
date Sun, 25 Sep 2011 16:03:29 -0700
parents
children 436b4a3231bf
comparison
equal deleted inserted replaced
3959:eda6988c0d81 3960:f08d439fab8c
1 #
2 # Copyright (c) 2004, 2011, Oracle and/or its affiliates. All rights reserved.
3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 #
5 # This code is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License version 2 only, as
7 # published by the Free Software Foundation.
8 #
9 # This code is distributed in the hope that it will be useful, but WITHOUT
10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 # version 2 for more details (a copy is included in the LICENSE file that
13 # accompanied this code).
14 #
15 # You should have received a copy of the GNU General Public License version
16 # 2 along with this work; if not, write to the Free Software Foundation,
17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 #
19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 # or visit www.oracle.com if you need additional information or have any
21 # questions.
22 #
23
24
25 #ifdef __APPLE__
26 # Darwin uses _ prefixed global symbols
27 #define SYMBOL(s) _ ## s
28 #define ELF_TYPE(name, description)
29 #else
30 #define SYMBOL(s) s
31 #define ELF_TYPE(name, description) .type name,description
32 #endif
33
34 .globl SYMBOL(fixcw)
35
36 # NOTE WELL! The _Copy functions are called directly
37 # from server-compiler-generated code via CallLeafNoFP,
38 # which means that they *must* either not use floating
39 # point or use it in the same manner as does the server
40 # compiler.
41
42 .globl SYMBOL(_Copy_conjoint_bytes)
43 .globl SYMBOL(_Copy_arrayof_conjoint_bytes)
44 .globl SYMBOL(_Copy_conjoint_jshorts_atomic)
45 .globl SYMBOL(_Copy_arrayof_conjoint_jshorts)
46 .globl SYMBOL(_Copy_conjoint_jints_atomic)
47 .globl SYMBOL(_Copy_arrayof_conjoint_jints)
48 .globl SYMBOL(_Copy_conjoint_jlongs_atomic)
49 .globl SYMBOL(_mmx_Copy_arrayof_conjoint_jshorts)
50
51 .globl SYMBOL(_Atomic_cmpxchg_long)
52 .globl SYMBOL(_Atomic_move_long)
53
54 .text
55
56 # Support for void os::Solaris::init_thread_fpu_state() in os_solaris_i486.cpp
57 # Set fpu to 53 bit precision. This happens too early to use a stub.
58 # ported from solaris_x86_32.s
59 .p2align 4,,15
60 SYMBOL(fixcw):
61 pushl $0x27f
62 fldcw 0(%esp)
63 popl %eax
64 ret
65
66 .globl SYMBOL(SafeFetch32), SYMBOL(Fetch32PFI), SYMBOL(Fetch32Resume)
67 .globl SYMBOL(SafeFetchN)
68 ## TODO: avoid exposing Fetch32PFI and Fetch32Resume.
69 ## Instead, the signal handler would call a new SafeFetchTriage(FaultingEIP)
70 ## routine to vet the address. If the address is the faulting LD then
71 ## SafeFetchTriage() would return the resume-at EIP, otherwise null.
72 ELF_TYPE(SafeFetch32,@function)
73 .p2align 4,,15
74 SYMBOL(SafeFetch32):
75 SYMBOL(SafeFetchN):
76 movl 0x8(%esp), %eax
77 movl 0x4(%esp), %ecx
78 SYMBOL(Fetch32PFI):
79 movl (%ecx), %eax
80 SYMBOL(Fetch32Resume):
81 ret
82
83
84 .globl SYMBOL(SpinPause)
85 ELF_TYPE(SpinPause,@function)
86 .p2align 4,,15
87 SYMBOL(SpinPause):
88 rep
89 nop
90 movl $1, %eax
91 ret
92
93 # Support for void Copy::conjoint_bytes(void* from,
94 # void* to,
95 # size_t count)
96 .p2align 4,,15
97 ELF_TYPE(_Copy_conjoint_bytes,@function)
98 SYMBOL(_Copy_conjoint_bytes):
99 pushl %esi
100 movl 4+12(%esp),%ecx # count
101 pushl %edi
102 movl 8+ 4(%esp),%esi # from
103 movl 8+ 8(%esp),%edi # to
104 cmpl %esi,%edi
105 leal -1(%esi,%ecx),%eax # from + count - 1
106 jbe cb_CopyRight
107 cmpl %eax,%edi
108 jbe cb_CopyLeft
109 # copy from low to high
110 cb_CopyRight:
111 cmpl $3,%ecx
112 jbe 5f # <= 3 bytes
113 # align source address at dword address boundary
114 movl %ecx,%eax # original count
115 movl $4,%ecx
116 subl %esi,%ecx
117 andl $3,%ecx # prefix byte count
118 jz 1f # no prefix
119 subl %ecx,%eax # byte count less prefix
120 # copy prefix
121 subl %esi,%edi
122 0: movb (%esi),%dl
123 movb %dl,(%edi,%esi,1)
124 addl $1,%esi
125 subl $1,%ecx
126 jnz 0b
127 addl %esi,%edi
128 1: movl %eax,%ecx # byte count less prefix
129 shrl $2,%ecx # dword count
130 jz 4f # no dwords to move
131 cmpl $32,%ecx
132 jbe 2f # <= 32 dwords
133 # copy aligned dwords
134 rep; smovl
135 jmp 4f
136 # copy aligned dwords
137 2: subl %esi,%edi
138 .p2align 4,,15
139 3: movl (%esi),%edx
140 movl %edx,(%edi,%esi,1)
141 addl $4,%esi
142 subl $1,%ecx
143 jnz 3b
144 addl %esi,%edi
145 4: movl %eax,%ecx # byte count less prefix
146 5: andl $3,%ecx # suffix byte count
147 jz 7f # no suffix
148 # copy suffix
149 xorl %eax,%eax
150 6: movb (%esi,%eax,1),%dl
151 movb %dl,(%edi,%eax,1)
152 addl $1,%eax
153 subl $1,%ecx
154 jnz 6b
155 7: popl %edi
156 popl %esi
157 ret
158 # copy from high to low
159 cb_CopyLeft:
160 std
161 leal -4(%edi,%ecx),%edi # to + count - 4
162 movl %eax,%esi # from + count - 1
163 movl %ecx,%eax
164 subl $3,%esi # from + count - 4
165 cmpl $3,%ecx
166 jbe 5f # <= 3 bytes
167 1: shrl $2,%ecx # dword count
168 jz 4f # no dwords to move
169 cmpl $32,%ecx
170 ja 3f # > 32 dwords
171 # copy dwords, aligned or not
172 subl %esi,%edi
173 .p2align 4,,15
174 2: movl (%esi),%edx
175 movl %edx,(%edi,%esi,1)
176 subl $4,%esi
177 subl $1,%ecx
178 jnz 2b
179 addl %esi,%edi
180 jmp 4f
181 # copy dwords, aligned or not
182 3: rep; smovl
183 4: movl %eax,%ecx # byte count
184 5: andl $3,%ecx # suffix byte count
185 jz 7f # no suffix
186 # copy suffix
187 subl %esi,%edi
188 addl $3,%esi
189 6: movb (%esi),%dl
190 movb %dl,(%edi,%esi,1)
191 subl $1,%esi
192 subl $1,%ecx
193 jnz 6b
194 7: cld
195 popl %edi
196 popl %esi
197 ret
198
199 # Support for void Copy::arrayof_conjoint_bytes(void* from,
200 # void* to,
201 # size_t count)
202 #
203 # Same as _Copy_conjoint_bytes, except no source alignment check.
204 .p2align 4,,15
205 ELF_TYPE(_Copy_arrayof_conjoint_bytes,@function)
206 SYMBOL(_Copy_arrayof_conjoint_bytes):
207 pushl %esi
208 movl 4+12(%esp),%ecx # count
209 pushl %edi
210 movl 8+ 4(%esp),%esi # from
211 movl 8+ 8(%esp),%edi # to
212 cmpl %esi,%edi
213 leal -1(%esi,%ecx),%eax # from + count - 1
214 jbe acb_CopyRight
215 cmpl %eax,%edi
216 jbe acb_CopyLeft
217 # copy from low to high
218 acb_CopyRight:
219 cmpl $3,%ecx
220 jbe 5f
221 1: movl %ecx,%eax
222 shrl $2,%ecx
223 jz 4f
224 cmpl $32,%ecx
225 ja 3f
226 # copy aligned dwords
227 subl %esi,%edi
228 .p2align 4,,15
229 2: movl (%esi),%edx
230 movl %edx,(%edi,%esi,1)
231 addl $4,%esi
232 subl $1,%ecx
233 jnz 2b
234 addl %esi,%edi
235 jmp 4f
236 # copy aligned dwords
237 3: rep; smovl
238 4: movl %eax,%ecx
239 5: andl $3,%ecx
240 jz 7f
241 # copy suffix
242 xorl %eax,%eax
243 6: movb (%esi,%eax,1),%dl
244 movb %dl,(%edi,%eax,1)
245 addl $1,%eax
246 subl $1,%ecx
247 jnz 6b
248 7: popl %edi
249 popl %esi
250 ret
251 acb_CopyLeft:
252 std
253 leal -4(%edi,%ecx),%edi # to + count - 4
254 movl %eax,%esi # from + count - 1
255 movl %ecx,%eax
256 subl $3,%esi # from + count - 4
257 cmpl $3,%ecx
258 jbe 5f
259 1: shrl $2,%ecx
260 jz 4f
261 cmpl $32,%ecx
262 jbe 2f # <= 32 dwords
263 rep; smovl
264 jmp 4f
265 .=.+8
266 2: subl %esi,%edi
267 .p2align 4,,15
268 3: movl (%esi),%edx
269 movl %edx,(%edi,%esi,1)
270 subl $4,%esi
271 subl $1,%ecx
272 jnz 3b
273 addl %esi,%edi
274 4: movl %eax,%ecx
275 5: andl $3,%ecx
276 jz 7f
277 subl %esi,%edi
278 addl $3,%esi
279 6: movb (%esi),%dl
280 movb %dl,(%edi,%esi,1)
281 subl $1,%esi
282 subl $1,%ecx
283 jnz 6b
284 7: cld
285 popl %edi
286 popl %esi
287 ret
288
289 # Support for void Copy::conjoint_jshorts_atomic(void* from,
290 # void* to,
291 # size_t count)
292 .p2align 4,,15
293 ELF_TYPE(_Copy_conjoint_jshorts_atomic,@function)
294 SYMBOL(_Copy_conjoint_jshorts_atomic):
295 pushl %esi
296 movl 4+12(%esp),%ecx # count
297 pushl %edi
298 movl 8+ 4(%esp),%esi # from
299 movl 8+ 8(%esp),%edi # to
300 cmpl %esi,%edi
301 leal -2(%esi,%ecx,2),%eax # from + count*2 - 2
302 jbe cs_CopyRight
303 cmpl %eax,%edi
304 jbe cs_CopyLeft
305 # copy from low to high
306 cs_CopyRight:
307 # align source address at dword address boundary
308 movl %esi,%eax # original from
309 andl $3,%eax # either 0 or 2
310 jz 1f # no prefix
311 # copy prefix
312 subl $1,%ecx
313 jl 5f # zero count
314 movw (%esi),%dx
315 movw %dx,(%edi)
316 addl %eax,%esi # %eax == 2
317 addl %eax,%edi
318 1: movl %ecx,%eax # word count less prefix
319 sarl %ecx # dword count
320 jz 4f # no dwords to move
321 cmpl $32,%ecx
322 jbe 2f # <= 32 dwords
323 # copy aligned dwords
324 rep; smovl
325 jmp 4f
326 # copy aligned dwords
327 2: subl %esi,%edi
328 .p2align 4,,15
329 3: movl (%esi),%edx
330 movl %edx,(%edi,%esi,1)
331 addl $4,%esi
332 subl $1,%ecx
333 jnz 3b
334 addl %esi,%edi
335 4: andl $1,%eax # suffix count
336 jz 5f # no suffix
337 # copy suffix
338 movw (%esi),%dx
339 movw %dx,(%edi)
340 5: popl %edi
341 popl %esi
342 ret
343 # copy from high to low
344 cs_CopyLeft:
345 std
346 leal -4(%edi,%ecx,2),%edi # to + count*2 - 4
347 movl %eax,%esi # from + count*2 - 2
348 movl %ecx,%eax
349 subl $2,%esi # from + count*2 - 4
350 1: sarl %ecx # dword count
351 jz 4f # no dwords to move
352 cmpl $32,%ecx
353 ja 3f # > 32 dwords
354 subl %esi,%edi
355 .p2align 4,,15
356 2: movl (%esi),%edx
357 movl %edx,(%edi,%esi,1)
358 subl $4,%esi
359 subl $1,%ecx
360 jnz 2b
361 addl %esi,%edi
362 jmp 4f
363 3: rep; smovl
364 4: andl $1,%eax # suffix count
365 jz 5f # no suffix
366 # copy suffix
367 addl $2,%esi
368 addl $2,%edi
369 movw (%esi),%dx
370 movw %dx,(%edi)
371 5: cld
372 popl %edi
373 popl %esi
374 ret
375
376 # Support for void Copy::arrayof_conjoint_jshorts(void* from,
377 # void* to,
378 # size_t count)
379 .p2align 4,,15
380 ELF_TYPE(_Copy_arrayof_conjoint_jshorts,@function)
381 SYMBOL(_Copy_arrayof_conjoint_jshorts):
382 pushl %esi
383 movl 4+12(%esp),%ecx # count
384 pushl %edi
385 movl 8+ 4(%esp),%esi # from
386 movl 8+ 8(%esp),%edi # to
387 cmpl %esi,%edi
388 leal -2(%esi,%ecx,2),%eax # from + count*2 - 2
389 jbe acs_CopyRight
390 cmpl %eax,%edi
391 jbe acs_CopyLeft
392 acs_CopyRight:
393 movl %ecx,%eax # word count
394 sarl %ecx # dword count
395 jz 4f # no dwords to move
396 cmpl $32,%ecx
397 jbe 2f # <= 32 dwords
398 # copy aligned dwords
399 rep; smovl
400 jmp 4f
401 # copy aligned dwords
402 .=.+5
403 2: subl %esi,%edi
404 .p2align 4,,15
405 3: movl (%esi),%edx
406 movl %edx,(%edi,%esi,1)
407 addl $4,%esi
408 subl $1,%ecx
409 jnz 3b
410 addl %esi,%edi
411 4: andl $1,%eax # suffix count
412 jz 5f # no suffix
413 # copy suffix
414 movw (%esi),%dx
415 movw %dx,(%edi)
416 5: popl %edi
417 popl %esi
418 ret
419 acs_CopyLeft:
420 std
421 leal -4(%edi,%ecx,2),%edi # to + count*2 - 4
422 movl %eax,%esi # from + count*2 - 2
423 movl %ecx,%eax
424 subl $2,%esi # from + count*2 - 4
425 sarl %ecx # dword count
426 jz 4f # no dwords to move
427 cmpl $32,%ecx
428 ja 3f # > 32 dwords
429 subl %esi,%edi
430 .p2align 4,,15
431 2: movl (%esi),%edx
432 movl %edx,(%edi,%esi,1)
433 subl $4,%esi
434 subl $1,%ecx
435 jnz 2b
436 addl %esi,%edi
437 jmp 4f
438 3: rep; smovl
439 4: andl $1,%eax # suffix count
440 jz 5f # no suffix
441 # copy suffix
442 addl $2,%esi
443 addl $2,%edi
444 movw (%esi),%dx
445 movw %dx,(%edi)
446 5: cld
447 popl %edi
448 popl %esi
449 ret
450
451 # Support for void Copy::conjoint_jints_atomic(void* from,
452 # void* to,
453 # size_t count)
454 # Equivalent to
455 # arrayof_conjoint_jints
456 .p2align 4,,15
457 ELF_TYPE(_Copy_conjoint_jints_atomic,@function)
458 ELF_TYPE(_Copy_arrayof_conjoint_jints,@function)
459 SYMBOL(_Copy_conjoint_jints_atomic):
460 SYMBOL(_Copy_arrayof_conjoint_jints):
461 pushl %esi
462 movl 4+12(%esp),%ecx # count
463 pushl %edi
464 movl 8+ 4(%esp),%esi # from
465 movl 8+ 8(%esp),%edi # to
466 cmpl %esi,%edi
467 leal -4(%esi,%ecx,4),%eax # from + count*4 - 4
468 jbe ci_CopyRight
469 cmpl %eax,%edi
470 jbe ci_CopyLeft
471 ci_CopyRight:
472 cmpl $32,%ecx
473 jbe 2f # <= 32 dwords
474 rep; smovl
475 popl %edi
476 popl %esi
477 ret
478 .=.+10
479 2: subl %esi,%edi
480 jmp 4f
481 .p2align 4,,15
482 3: movl (%esi),%edx
483 movl %edx,(%edi,%esi,1)
484 addl $4,%esi
485 4: subl $1,%ecx
486 jge 3b
487 popl %edi
488 popl %esi
489 ret
490 ci_CopyLeft:
491 std
492 leal -4(%edi,%ecx,4),%edi # to + count*4 - 4
493 cmpl $32,%ecx
494 ja 4f # > 32 dwords
495 subl %eax,%edi # eax == from + count*4 - 4
496 jmp 3f
497 .p2align 4,,15
498 2: movl (%eax),%edx
499 movl %edx,(%edi,%eax,1)
500 subl $4,%eax
501 3: subl $1,%ecx
502 jge 2b
503 cld
504 popl %edi
505 popl %esi
506 ret
507 4: movl %eax,%esi # from + count*4 - 4
508 rep; smovl
509 cld
510 popl %edi
511 popl %esi
512 ret
513
514 # Support for void Copy::conjoint_jlongs_atomic(jlong* from,
515 # jlong* to,
516 # size_t count)
517 #
518 # 32-bit
519 #
520 # count treated as signed
521 #
522 # // if (from > to) {
523 # while (--count >= 0) {
524 # *to++ = *from++;
525 # }
526 # } else {
527 # while (--count >= 0) {
528 # to[count] = from[count];
529 # }
530 # }
531 .p2align 4,,15
532 ELF_TYPE(_Copy_conjoint_jlongs_atomic,@function)
533 SYMBOL(_Copy_conjoint_jlongs_atomic):
534 movl 4+8(%esp),%ecx # count
535 movl 4+0(%esp),%eax # from
536 movl 4+4(%esp),%edx # to
537 cmpl %eax,%edx
538 jae cla_CopyLeft
539 cla_CopyRight:
540 subl %eax,%edx
541 jmp 2f
542 .p2align 4,,15
543 1: fildll (%eax)
544 fistpll (%edx,%eax,1)
545 addl $8,%eax
546 2: subl $1,%ecx
547 jge 1b
548 ret
549 .p2align 4,,15
550 3: fildll (%eax,%ecx,8)
551 fistpll (%edx,%ecx,8)
552 cla_CopyLeft:
553 subl $1,%ecx
554 jge 3b
555 ret
556
557 # Support for void Copy::arrayof_conjoint_jshorts(void* from,
558 # void* to,
559 # size_t count)
560 .p2align 4,,15
561 ELF_TYPE(_mmx_Copy_arrayof_conjoint_jshorts,@function)
562 SYMBOL(_mmx_Copy_arrayof_conjoint_jshorts):
563 pushl %esi
564 movl 4+12(%esp),%ecx
565 pushl %edi
566 movl 8+ 4(%esp),%esi
567 movl 8+ 8(%esp),%edi
568 cmpl %esi,%edi
569 leal -2(%esi,%ecx,2),%eax
570 jbe mmx_acs_CopyRight
571 cmpl %eax,%edi
572 jbe mmx_acs_CopyLeft
573 mmx_acs_CopyRight:
574 movl %ecx,%eax
575 sarl %ecx
576 je 5f
577 cmpl $33,%ecx
578 jae 3f
579 1: subl %esi,%edi
580 .p2align 4,,15
581 2: movl (%esi),%edx
582 movl %edx,(%edi,%esi,1)
583 addl $4,%esi
584 subl $1,%ecx
585 jnz 2b
586 addl %esi,%edi
587 jmp 5f
588 3: smovl # align to 8 bytes, we know we are 4 byte aligned to start
589 subl $1,%ecx
590 4: .p2align 4,,15
591 movq 0(%esi),%mm0
592 addl $64,%edi
593 movq 8(%esi),%mm1
594 subl $16,%ecx
595 movq 16(%esi),%mm2
596 movq %mm0,-64(%edi)
597 movq 24(%esi),%mm0
598 movq %mm1,-56(%edi)
599 movq 32(%esi),%mm1
600 movq %mm2,-48(%edi)
601 movq 40(%esi),%mm2
602 movq %mm0,-40(%edi)
603 movq 48(%esi),%mm0
604 movq %mm1,-32(%edi)
605 movq 56(%esi),%mm1
606 movq %mm2,-24(%edi)
607 movq %mm0,-16(%edi)
608 addl $64,%esi
609 movq %mm1,-8(%edi)
610 cmpl $16,%ecx
611 jge 4b
612 emms
613 testl %ecx,%ecx
614 ja 1b
615 5: andl $1,%eax
616 je 7f
617 6: movw (%esi),%dx
618 movw %dx,(%edi)
619 7: popl %edi
620 popl %esi
621 ret
622 mmx_acs_CopyLeft:
623 std
624 leal -4(%edi,%ecx,2),%edi
625 movl %eax,%esi
626 movl %ecx,%eax
627 subl $2,%esi
628 sarl %ecx
629 je 4f
630 cmpl $32,%ecx
631 ja 3f
632 subl %esi,%edi
633 .p2align 4,,15
634 2: movl (%esi),%edx
635 movl %edx,(%edi,%esi,1)
636 subl $4,%esi
637 subl $1,%ecx
638 jnz 2b
639 addl %esi,%edi
640 jmp 4f
641 3: rep; smovl
642 4: andl $1,%eax
643 je 6f
644 addl $2,%esi
645 addl $2,%edi
646 5: movw (%esi),%dx
647 movw %dx,(%edi)
648 6: cld
649 popl %edi
650 popl %esi
651 ret
652
653
654 # Support for jlong Atomic::cmpxchg(jlong exchange_value,
655 # volatile jlong* dest,
656 # jlong compare_value,
657 # bool is_MP)
658 #
659 .p2align 4,,15
660 ELF_TYPE(_Atomic_cmpxchg_long,@function)
661 SYMBOL(_Atomic_cmpxchg_long):
662 # 8(%esp) : return PC
663 pushl %ebx # 4(%esp) : old %ebx
664 pushl %edi # 0(%esp) : old %edi
665 movl 12(%esp), %ebx # 12(%esp) : exchange_value (low)
666 movl 16(%esp), %ecx # 16(%esp) : exchange_value (high)
667 movl 24(%esp), %eax # 24(%esp) : compare_value (low)
668 movl 28(%esp), %edx # 28(%esp) : compare_value (high)
669 movl 20(%esp), %edi # 20(%esp) : dest
670 cmpl $0, 32(%esp) # 32(%esp) : is_MP
671 je 1f
672 lock
673 1: cmpxchg8b (%edi)
674 popl %edi
675 popl %ebx
676 ret
677
678
679 # Support for jlong Atomic::load and Atomic::store.
680 # void _Atomic_move_long(volatile jlong* src, volatile jlong* dst)
681 .p2align 4,,15
682 ELF_TYPE(_Atomic_move_long,@function)
683 SYMBOL(_Atomic_move_long):
684 movl 4(%esp), %eax # src
685 fildll (%eax)
686 movl 8(%esp), %eax # dest
687 fistpll (%eax)
688 ret
689