comparison src/os_cpu/solaris_x86/vm/solaris_x86_32.s @ 0:a61af66fc99e jdk7-b24

Initial load
author duke
date Sat, 01 Dec 2007 00:00:00 +0000
parents
children c18cbe5936b8
comparison
equal deleted inserted replaced
-1:000000000000 0:a61af66fc99e
1 //
2 // Copyright 2004-2007 Sun Microsystems, Inc. All Rights Reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 // CA 95054 USA or visit www.sun.com if you need additional information or
21 // have any questions.
22 //
23
24 .globl fixcw
25 .globl sse_check
26 .globl sse_unavailable
27 .globl gs_load
28 .globl gs_thread
29 .globl _Atomic_cmpxchg_long_gcc
30
31 // NOTE WELL! The _Copy functions are called directly
32 // from server-compiler-generated code via CallLeafNoFP,
33 // which means that they *must* either not use floating
34 // point or use it in the same manner as does the server
35 // compiler.
36
37 .globl _Copy_conjoint_bytes
38 .globl _Copy_arrayof_conjoint_bytes
39 .globl _Copy_conjoint_jshorts_atomic
40 .globl _Copy_arrayof_conjoint_jshorts
41 .globl _Copy_conjoint_jints_atomic
42 .globl _Copy_arrayof_conjoint_jints
43 .globl _Copy_conjoint_jlongs_atomic
44 .globl _mmx_Copy_arrayof_conjoint_jshorts
45
46 .section .text,"ax"
47
48 / Support for void os::Solaris::init_thread_fpu_state() in os_solaris_i486.cpp
49 / Set fpu to 53 bit precision. This happens too early to use a stub.
50 .align 16
51 fixcw:
52 pushl $0x27f
53 fldcw 0(%esp)
54 popl %eax
55 ret
56
57 .align 16
58 .globl SafeFetch32
59 .globl SafeFetchN
60 .globl Fetch32PFI, Fetch32Resume
61 SafeFetch32:
62 SafeFetchN:
63 movl 0x8(%esp), %eax
64 movl 0x4(%esp), %ecx
65 Fetch32PFI:
66 movl (%ecx), %eax
67 Fetch32Resume:
68 ret
69
70
71 .align 16
72 .globl SpinPause
73 SpinPause:
74 rep
75 nop
76 movl $1, %eax
77 ret
78
79
80 / Test SSE availability, used by os_solaris_i486.cpp
81 .align 16
82 sse_check:
83 / Fault if SSE not available
84 xorps %xmm0,%xmm0
85 / No fault
86 movl $1,%eax
87 ret
88 / Signal handler continues here if SSE is not available
89 sse_unavailable:
90 xorl %eax,%eax
91 ret
92
93 / Fast thread accessors, used by threadLS_solaris_i486.cpp
94 .align 16
95 gs_load:
96 movl 4(%esp),%ecx
97 movl %gs:(%ecx),%eax
98 ret
99
100 .align 16
101 gs_thread:
102 movl %gs:0x0,%eax
103 ret
104
105 / Support for void Copy::conjoint_bytes(void* from,
106 / void* to,
107 / size_t count)
108 .align 16
109 _Copy_conjoint_bytes:
110 pushl %esi
111 movl 4+12(%esp),%ecx / count
112 pushl %edi
113 movl 8+ 4(%esp),%esi / from
114 movl 8+ 8(%esp),%edi / to
115 cmpl %esi,%edi
116 leal -1(%esi,%ecx),%eax / from + count - 1
117 jbe cb_CopyRight
118 cmpl %eax,%edi
119 jbe cb_CopyLeft
120 / copy from low to high
121 cb_CopyRight:
122 cmpl $3,%ecx
123 jbe 5f / <= 3 bytes
124 / align source address at dword address boundary
125 movl %ecx,%eax / original count
126 movl $4,%ecx
127 subl %esi,%ecx
128 andl $3,%ecx / prefix byte count
129 jz 1f / no prefix
130 subl %ecx,%eax / byte count less prefix
131 / copy prefix
132 subl %esi,%edi
133 0: movb (%esi),%dl
134 movb %dl,(%edi,%esi,1)
135 addl $1,%esi
136 subl $1,%ecx
137 jnz 0b
138 addl %esi,%edi
139 1: movl %eax,%ecx / byte count less prefix
140 shrl $2,%ecx / dword count
141 jz 4f / no dwords to move
142 cmpl $32,%ecx
143 jbe 2f / <= 32 dwords
144 / copy aligned dwords
145 rep; smovl
146 jmp 4f
147 / copy aligned dwords
148 2: subl %esi,%edi
149 .align 16
150 3: movl (%esi),%edx
151 movl %edx,(%edi,%esi,1)
152 addl $4,%esi
153 subl $1,%ecx
154 jnz 3b
155 addl %esi,%edi
156 4: movl %eax,%ecx / byte count less prefix
157 andl $3,%ecx / suffix byte count
158 jz 7f / no suffix
159 / copy suffix
160 5: xorl %eax,%eax
161 6: movb (%esi,%eax,1),%dl
162 movb %dl,(%edi,%eax,1)
163 addl $1,%eax
164 subl $1,%ecx
165 jnz 6b
166 7: popl %edi
167 popl %esi
168 ret
169 / copy from high to low
170 cb_CopyLeft:
171 std
172 leal -4(%edi,%ecx),%edi / to + count - 4
173 movl %eax,%esi / from + count - 1
174 movl %ecx,%eax
175 subl $3,%esi / from + count - 4
176 cmpl $3,%ecx
177 jbe 5f / <= 3 bytes
178 1: shrl $2,%ecx / dword count
179 jz 4f / no dwords to move
180 cmpl $32,%ecx
181 ja 3f / > 32 dwords
182 / copy dwords, aligned or not
183 subl %esi,%edi
184 .align 16
185 2: movl (%esi),%edx
186 movl %edx,(%edi,%esi,1)
187 subl $4,%esi
188 subl $1,%ecx
189 jnz 2b
190 addl %esi,%edi
191 jmp 4f
192 / copy dwords, aligned or not
193 3: rep; smovl
194 4: movl %eax,%ecx / byte count
195 andl $3,%ecx / suffix byte count
196 jz 7f / no suffix
197 / copy suffix
198 5: subl %esi,%edi
199 addl $3,%esi
200 6: movb (%esi),%dl
201 movb %dl,(%edi,%esi,1)
202 subl $1,%esi
203 subl $1,%ecx
204 jnz 6b
205 7: cld
206 popl %edi
207 popl %esi
208 ret
209
210 / Support for void Copy::arrayof_conjoint_bytes(void* from,
211 / void* to,
212 / size_t count)
213 /
214 / Same as _Copy_conjoint_bytes, except no source alignment check.
215 .align 16
216 _Copy_arrayof_conjoint_bytes:
217 pushl %esi
218 movl 4+12(%esp),%ecx / count
219 pushl %edi
220 movl 8+ 4(%esp),%esi / from
221 movl 8+ 8(%esp),%edi / to
222 cmpl %esi,%edi
223 leal -1(%esi,%ecx),%eax / from + count - 1
224 jbe acb_CopyRight
225 cmpl %eax,%edi
226 jbe acb_CopyLeft
227 / copy from low to high
228 acb_CopyRight:
229 cmpl $3,%ecx
230 jbe 5f
231 1: movl %ecx,%eax
232 shrl $2,%ecx
233 jz 4f
234 cmpl $32,%ecx
235 ja 3f
236 / copy aligned dwords
237 subl %esi,%edi
238 .align 16
239 2: movl (%esi),%edx
240 movl %edx,(%edi,%esi,1)
241 addl $4,%esi
242 subl $1,%ecx
243 jnz 2b
244 addl %esi,%edi
245 jmp 4f
246 / copy aligned dwords
247 3: rep; smovl
248 4: movl %eax,%ecx
249 andl $3,%ecx
250 jz 7f
251 / copy suffix
252 5: xorl %eax,%eax
253 6: movb (%esi,%eax,1),%dl
254 movb %dl,(%edi,%eax,1)
255 addl $1,%eax
256 subl $1,%ecx
257 jnz 6b
258 7: popl %edi
259 popl %esi
260 ret
261 acb_CopyLeft:
262 std
263 leal -4(%edi,%ecx),%edi / to + count - 4
264 movl %eax,%esi / from + count - 1
265 movl %ecx,%eax
266 subl $3,%esi / from + count - 4
267 cmpl $3,%ecx
268 jbe 5f
269 1: shrl $2,%ecx
270 jz 4f
271 cmpl $32,%ecx
272 jbe 2f / <= 32 dwords
273 rep; smovl
274 jmp 4f
275 .=.+8
276 2: subl %esi,%edi
277 .align 16
278 3: movl (%esi),%edx
279 movl %edx,(%edi,%esi,1)
280 subl $4,%esi
281 subl $1,%ecx
282 jnz 3b
283 addl %esi,%edi
284 4: movl %eax,%ecx
285 andl $3,%ecx
286 jz 7f
287 5: subl %esi,%edi
288 addl $3,%esi
289 6: movb (%esi),%dl
290 movb %dl,(%edi,%esi,1)
291 subl $1,%esi
292 subl $1,%ecx
293 jnz 6b
294 7: cld
295 popl %edi
296 popl %esi
297 ret
298
299 / Support for void Copy::conjoint_jshorts_atomic(void* from,
300 / void* to,
301 / size_t count)
302 .align 16
303 _Copy_conjoint_jshorts_atomic:
304 pushl %esi
305 movl 4+12(%esp),%ecx / count
306 pushl %edi
307 movl 8+ 4(%esp),%esi / from
308 movl 8+ 8(%esp),%edi / to
309 cmpl %esi,%edi
310 leal -2(%esi,%ecx,2),%eax / from + count*2 - 2
311 jbe cs_CopyRight
312 cmpl %eax,%edi
313 jbe cs_CopyLeft
314 / copy from low to high
315 cs_CopyRight:
316 / align source address at dword address boundary
317 movl %esi,%eax / original from
318 andl $3,%eax / either 0 or 2
319 jz 1f / no prefix
320 / copy prefix
321 movw (%esi),%dx
322 movw %dx,(%edi)
323 addl %eax,%esi / %eax == 2
324 addl %eax,%edi
325 subl $1,%ecx
326 1: movl %ecx,%eax / word count less prefix
327 sarl %ecx / dword count
328 jz 4f / no dwords to move
329 cmpl $32,%ecx
330 jbe 2f / <= 32 dwords
331 / copy aligned dwords
332 rep; smovl
333 jmp 4f
334 / copy aligned dwords
335 2: subl %esi,%edi
336 .align 16
337 3: movl (%esi),%edx
338 movl %edx,(%edi,%esi,1)
339 addl $4,%esi
340 subl $1,%ecx
341 jnz 3b
342 addl %esi,%edi
343 4: andl $1,%eax / suffix count
344 jz 5f / no suffix
345 / copy suffix
346 movw (%esi),%dx
347 movw %dx,(%edi)
348 5: popl %edi
349 popl %esi
350 ret
351 / copy from high to low
352 cs_CopyLeft:
353 std
354 leal -4(%edi,%ecx,2),%edi / to + count*2 - 4
355 movl %eax,%esi / from + count*2 - 2
356 movl %ecx,%eax
357 subl $2,%esi / from + count*2 - 4
358 1: sarl %ecx / dword count
359 jz 4f / no dwords to move
360 cmpl $32,%ecx
361 ja 3f / > 32 dwords
362 subl %esi,%edi
363 .align 16
364 2: movl (%esi),%edx
365 movl %edx,(%edi,%esi,1)
366 subl $4,%esi
367 subl $1,%ecx
368 jnz 2b
369 addl %esi,%edi
370 jmp 4f
371 3: rep; smovl
372 4: andl $1,%eax / suffix count
373 jz 5f / no suffix
374 / copy suffix
375 addl $2,%esi
376 addl $2,%edi
377 movw (%esi),%dx
378 movw %dx,(%edi)
379 5: cld
380 popl %edi
381 popl %esi
382 ret
383
384 / Support for void Copy::arrayof_conjoint_jshorts(void* from,
385 / void* to,
386 / size_t count)
387 .align 16
388 _Copy_arrayof_conjoint_jshorts:
389 pushl %esi
390 movl 4+12(%esp),%ecx / count
391 pushl %edi
392 movl 8+ 4(%esp),%esi / from
393 movl 8+ 8(%esp),%edi / to
394 cmpl %esi,%edi
395 leal -2(%esi,%ecx,2),%eax / from + count*2 - 2
396 jbe acs_CopyRight
397 cmpl %eax,%edi
398 jbe acs_CopyLeft
399 acs_CopyRight:
400 movl %ecx,%eax / word count
401 sarl %ecx / dword count
402 jz 4f / no dwords to move
403 cmpl $32,%ecx
404 jbe 2f / <= 32 dwords
405 / copy aligned dwords
406 rep; smovl
407 jmp 4f
408 / copy aligned dwords
409 .=.+5
410 2: subl %esi,%edi
411 .align 16
412 3: movl (%esi),%edx
413 movl %edx,(%edi,%esi,1)
414 addl $4,%esi
415 subl $1,%ecx
416 jnz 3b
417 addl %esi,%edi
418 4: andl $1,%eax / suffix count
419 jz 5f / no suffix
420 / copy suffix
421 movw (%esi),%dx
422 movw %dx,(%edi)
423 5: popl %edi
424 popl %esi
425 ret
426 acs_CopyLeft:
427 std
428 leal -4(%edi,%ecx,2),%edi / to + count*2 - 4
429 movl %eax,%esi / from + count*2 - 2
430 movl %ecx,%eax
431 subl $2,%esi / from + count*2 - 4
432 sarl %ecx / dword count
433 jz 4f / no dwords to move
434 cmpl $32,%ecx
435 ja 3f / > 32 dwords
436 subl %esi,%edi
437 .align 16
438 2: movl (%esi),%edx
439 movl %edx,(%edi,%esi,1)
440 subl $4,%esi
441 subl $1,%ecx
442 jnz 2b
443 addl %esi,%edi
444 jmp 4f
445 3: rep; smovl
446 4: andl $1,%eax / suffix count
447 jz 5f / no suffix
448 / copy suffix
449 addl $2,%esi
450 addl $2,%edi
451 movw (%esi),%dx
452 movw %dx,(%edi)
453 5: cld
454 popl %edi
455 popl %esi
456 ret
457
458 / Support for void Copy::conjoint_jints_atomic(void* from,
459 / void* to,
460 / size_t count)
461 / Equivalent to
462 / arrayof_conjoint_jints
463 .align 16
464 _Copy_conjoint_jints_atomic:
465 _Copy_arrayof_conjoint_jints:
466 pushl %esi
467 movl 4+12(%esp),%ecx / count
468 pushl %edi
469 movl 8+ 4(%esp),%esi / from
470 movl 8+ 8(%esp),%edi / to
471 cmpl %esi,%edi
472 leal -4(%esi,%ecx,4),%eax / from + count*4 - 4
473 jbe ci_CopyRight
474 cmpl %eax,%edi
475 jbe ci_CopyLeft
476 ci_CopyRight:
477 cmpl $32,%ecx
478 jbe 2f / <= 32 dwords
479 rep; smovl
480 popl %edi
481 popl %esi
482 ret
483 .=.+10
484 2: subl %esi,%edi
485 .align 16
486 3: movl (%esi),%edx
487 movl %edx,(%edi,%esi,1)
488 addl $4,%esi
489 subl $1,%ecx
490 jnz 3b
491 popl %edi
492 popl %esi
493 ret
494 ci_CopyLeft:
495 std
496 leal -4(%edi,%ecx,4),%edi / to + count*4 - 4
497 cmpl $32,%ecx
498 ja 3f / > 32 dwords
499 subl %eax,%edi / eax == from + count*4 - 4
500 .align 16
501 2: movl (%eax),%edx
502 movl %edx,(%edi,%eax,1)
503 subl $4,%eax
504 subl $1,%ecx
505 jnz 2b
506 cld
507 popl %edi
508 popl %esi
509 ret
510 3: movl %eax,%esi / from + count*4 - 4
511 rep; smovl
512 cld
513 popl %edi
514 popl %esi
515 ret
516
517 / Support for void Copy::conjoint_jlongs_atomic(jlong* from,
518 / jlong* to,
519 / size_t count)
520 /
521 / 32-bit
522 /
523 / count treated as signed
524 /
525 / if (from > to) {
526 / while (--count >= 0) {
527 / *to++ = *from++;
528 / }
529 / } else {
530 / while (--count >= 0) {
531 / to[count] = from[count];
532 / }
533 / }
534 .align 16
535 _Copy_conjoint_jlongs_atomic:
536 movl 4+8(%esp),%ecx / count
537 movl 4+0(%esp),%eax / from
538 movl 4+4(%esp),%edx / to
539 cmpl %eax,%edx
540 jae cla_CopyLeft
541 cla_CopyRight:
542 subl %eax,%edx
543 jmp 2f
544 .align 16
545 1: fildll (%eax)
546 fistpll (%edx,%eax,1)
547 addl $8,%eax
548 2: subl $1,%ecx
549 jge 1b
550 ret
551 .align 16
552 3: fildll (%eax,%ecx,8)
553 fistpll (%edx,%ecx,8)
554 cla_CopyLeft:
555 subl $1,%ecx
556 jge 3b
557 ret
558
559 / Support for void Copy::arrayof_conjoint_jshorts(void* from,
560 / void* to,
561 / size_t count)
562 .align 16
563 _mmx_Copy_arrayof_conjoint_jshorts:
564 pushl %esi
565 movl 4+12(%esp),%ecx
566 pushl %edi
567 movl 8+ 4(%esp),%esi
568 movl 8+ 8(%esp),%edi
569 cmpl %esi,%edi
570 leal -2(%esi,%ecx,2),%eax
571 jbe mmx_acs_CopyRight
572 cmpl %eax,%edi
573 jbe mmx_acs_CopyLeft
574 mmx_acs_CopyRight:
575 movl %ecx,%eax
576 sarl %ecx
577 je 5f
578 cmpl $33,%ecx
579 jae 3f
580 1: subl %esi,%edi
581 .align 16
582 2: movl (%esi),%edx
583 movl %edx,(%edi,%esi,1)
584 addl $4,%esi
585 subl $1,%ecx
586 jnz 2b
587 addl %esi,%edi
588 jmp 5f
589 3: smovl / align to 8 bytes, we know we are 4 byte aligned to start
590 subl $1,%ecx
591 4: .align 16
592 movq 0(%esi),%mm0
593 addl $64,%edi
594 movq 8(%esi),%mm1
595 subl $16,%ecx
596 movq 16(%esi),%mm2
597 movq %mm0,-64(%edi)
598 movq 24(%esi),%mm0
599 movq %mm1,-56(%edi)
600 movq 32(%esi),%mm1
601 movq %mm2,-48(%edi)
602 movq 40(%esi),%mm2
603 movq %mm0,-40(%edi)
604 movq 48(%esi),%mm0
605 movq %mm1,-32(%edi)
606 movq 56(%esi),%mm1
607 movq %mm2,-24(%edi)
608 movq %mm0,-16(%edi)
609 addl $64,%esi
610 movq %mm1,-8(%edi)
611 cmpl $16,%ecx
612 jge 4b
613 emms
614 testl %ecx,%ecx
615 ja 1b
616 5: andl $1,%eax
617 je 7f
618 6: movw (%esi),%dx
619 movw %dx,(%edi)
620 7: popl %edi
621 popl %esi
622 ret
623 mmx_acs_CopyLeft:
624 std
625 leal -4(%edi,%ecx,2),%edi
626 movl %eax,%esi
627 movl %ecx,%eax
628 subl $2,%esi
629 sarl %ecx
630 je 4f
631 cmpl $32,%ecx
632 ja 3f
633 subl %esi,%edi
634 .align 16
635 2: movl (%esi),%edx
636 movl %edx,(%edi,%esi,1)
637 subl $4,%esi
638 subl $1,%ecx
639 jnz 2b
640 addl %esi,%edi
641 jmp 4f
642 3: rep; smovl
643 4: andl $1,%eax
644 je 6f
645 addl $2,%esi
646 addl $2,%edi
647 5: movw (%esi),%dx
648 movw %dx,(%edi)
649 6: cld
650 popl %edi
651 popl %esi
652 ret
653
654
655 / Support for jlong Atomic::cmpxchg(jlong exchange_value,
656 / volatile jlong* dest,
657 / jlong compare_value,
658 / bool is_MP)
659 / Used only for Solaris/gcc builds
660 .align 16
661 _Atomic_cmpxchg_long_gcc:
662 / 8(%esp) : return PC
663 pushl %ebx / 4(%esp) : old %ebx
664 pushl %edi / 0(%esp) : old %edi
665 movl 12(%esp), %ebx / 12(%esp) : exchange_value (low)
666 movl 16(%esp), %ecx / 16(%esp) : exchange_value (high)
667 movl 24(%esp), %eax / 24(%esp) : compare_value (low)
668 movl 28(%esp), %edx / 28(%esp) : compare_value (high)
669 movl 20(%esp), %edi / 20(%esp) : dest
670 cmpl $0, 32(%esp) / 32(%esp) : is_MP
671 je 1f
672 lock
673 1: cmpxchg8b (%edi)
674 popl %edi
675 popl %ebx
676 ret