Mercurial > hg > truffle
annotate src/os_cpu/bsd_x86/vm/bsd_x86_32.s @ 20543:e7d0505c8a30
8059758: Footprint regressions with JDK-8038423
Summary: Changes in JDK-8038423 always initialize (zero out) virtual memory used for auxiliary data structures. This causes a footprint regression for G1 in startup benchmarks. This is because they do not touch that memory at all, so the operating system does not actually commit these pages. The fix is to, if the initialization value of the data structures matches the default value of just committed memory (=0), do not do anything.
Reviewed-by: jwilhelm, brutisso
author | tschatzl |
---|---|
date | Fri, 10 Oct 2014 15:51:58 +0200 |
parents | 55fb97c4c58d |
children | 4ca6dc0799b6 |
rev | line source |
---|---|
4006 | 1 # |
17467
55fb97c4c58d
8029233: Update copyright year to match last edit in jdk8 hotspot repository for 2013
mikael
parents:
11127
diff
changeset
|
2 # Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. |
3960 | 3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 # | |
5 # This code is free software; you can redistribute it and/or modify it | |
6 # under the terms of the GNU General Public License version 2 only, as | |
7 # published by the Free Software Foundation. | |
8 # | |
9 # This code is distributed in the hope that it will be useful, but WITHOUT | |
10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 # version 2 for more details (a copy is included in the LICENSE file that | |
13 # accompanied this code). | |
14 # | |
15 # You should have received a copy of the GNU General Public License version | |
16 # 2 along with this work; if not, write to the Free Software Foundation, | |
17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 # | |
19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | |
20 # or visit www.oracle.com if you need additional information or have any | |
21 # questions. | |
4006 | 22 # |
3960 | 23 |
4006 | 24 |
3960 | 25 #ifdef __APPLE__ |
26 # Darwin uses _ prefixed global symbols | |
27 #define SYMBOL(s) _ ## s | |
28 #define ELF_TYPE(name, description) | |
29 #else | |
30 #define SYMBOL(s) s | |
31 #define ELF_TYPE(name, description) .type name,description | |
32 #endif | |
33 | |
4006 | 34 .globl SYMBOL(fixcw) |
35 | |
3960 | 36 # NOTE WELL! The _Copy functions are called directly |
4006 | 37 # from server-compiler-generated code via CallLeafNoFP, |
38 # which means that they *must* either not use floating | |
39 # point or use it in the same manner as does the server | |
40 # compiler. | |
41 | |
3960 | 42 .globl SYMBOL(_Copy_conjoint_bytes) |
43 .globl SYMBOL(_Copy_arrayof_conjoint_bytes) | |
44 .globl SYMBOL(_Copy_conjoint_jshorts_atomic) | |
4006 | 45 .globl SYMBOL(_Copy_arrayof_conjoint_jshorts) |
3960 | 46 .globl SYMBOL(_Copy_conjoint_jints_atomic) |
47 .globl SYMBOL(_Copy_arrayof_conjoint_jints) | |
4006 | 48 .globl SYMBOL(_Copy_conjoint_jlongs_atomic) |
49 .globl SYMBOL(_mmx_Copy_arrayof_conjoint_jshorts) | |
3960 | 50 |
51 .globl SYMBOL(_Atomic_cmpxchg_long) | |
52 .globl SYMBOL(_Atomic_move_long) | |
53 | |
4006 | 54 .text |
3960 | 55 |
56 # Support for void os::Solaris::init_thread_fpu_state() in os_solaris_i486.cpp | |
57 # Set fpu to 53 bit precision. This happens too early to use a stub. | |
58 # ported from solaris_x86_32.s | |
59 .p2align 4,,15 | |
60 SYMBOL(fixcw): | |
4006 | 61 pushl $0x27f |
62 fldcw 0(%esp) | |
63 popl %eax | |
64 ret | |
3960 | 65 |
66 .globl SYMBOL(SpinPause) | |
4006 | 67 ELF_TYPE(SpinPause,@function) |
3960 | 68 .p2align 4,,15 |
69 SYMBOL(SpinPause): | |
70 rep | |
71 nop | |
72 movl $1, %eax | |
73 ret | |
74 | |
75 # Support for void Copy::conjoint_bytes(void* from, | |
76 # void* to, | |
77 # size_t count) | |
78 .p2align 4,,15 | |
4006 | 79 ELF_TYPE(_Copy_conjoint_bytes,@function) |
3960 | 80 SYMBOL(_Copy_conjoint_bytes): |
81 pushl %esi | |
82 movl 4+12(%esp),%ecx # count | |
83 pushl %edi | |
84 movl 8+ 4(%esp),%esi # from | |
85 movl 8+ 8(%esp),%edi # to | |
86 cmpl %esi,%edi | |
87 leal -1(%esi,%ecx),%eax # from + count - 1 | |
88 jbe cb_CopyRight | |
89 cmpl %eax,%edi | |
90 jbe cb_CopyLeft | |
91 # copy from low to high | |
92 cb_CopyRight: | |
93 cmpl $3,%ecx | |
94 jbe 5f # <= 3 bytes | |
95 # align source address at dword address boundary | |
96 movl %ecx,%eax # original count | |
97 movl $4,%ecx | |
98 subl %esi,%ecx | |
99 andl $3,%ecx # prefix byte count | |
100 jz 1f # no prefix | |
101 subl %ecx,%eax # byte count less prefix | |
102 # copy prefix | |
103 subl %esi,%edi | |
104 0: movb (%esi),%dl | |
105 movb %dl,(%edi,%esi,1) | |
106 addl $1,%esi | |
107 subl $1,%ecx | |
108 jnz 0b | |
109 addl %esi,%edi | |
110 1: movl %eax,%ecx # byte count less prefix | |
111 shrl $2,%ecx # dword count | |
112 jz 4f # no dwords to move | |
113 cmpl $32,%ecx | |
114 jbe 2f # <= 32 dwords | |
115 # copy aligned dwords | |
116 rep; smovl | |
117 jmp 4f | |
118 # copy aligned dwords | |
119 2: subl %esi,%edi | |
120 .p2align 4,,15 | |
121 3: movl (%esi),%edx | |
122 movl %edx,(%edi,%esi,1) | |
123 addl $4,%esi | |
124 subl $1,%ecx | |
125 jnz 3b | |
126 addl %esi,%edi | |
127 4: movl %eax,%ecx # byte count less prefix | |
128 5: andl $3,%ecx # suffix byte count | |
129 jz 7f # no suffix | |
130 # copy suffix | |
131 xorl %eax,%eax | |
132 6: movb (%esi,%eax,1),%dl | |
133 movb %dl,(%edi,%eax,1) | |
134 addl $1,%eax | |
135 subl $1,%ecx | |
136 jnz 6b | |
137 7: popl %edi | |
138 popl %esi | |
139 ret | |
140 # copy from high to low | |
141 cb_CopyLeft: | |
142 std | |
143 leal -4(%edi,%ecx),%edi # to + count - 4 | |
144 movl %eax,%esi # from + count - 1 | |
145 movl %ecx,%eax | |
146 subl $3,%esi # from + count - 4 | |
147 cmpl $3,%ecx | |
148 jbe 5f # <= 3 bytes | |
149 1: shrl $2,%ecx # dword count | |
150 jz 4f # no dwords to move | |
151 cmpl $32,%ecx | |
152 ja 3f # > 32 dwords | |
153 # copy dwords, aligned or not | |
154 subl %esi,%edi | |
155 .p2align 4,,15 | |
156 2: movl (%esi),%edx | |
157 movl %edx,(%edi,%esi,1) | |
158 subl $4,%esi | |
159 subl $1,%ecx | |
160 jnz 2b | |
161 addl %esi,%edi | |
162 jmp 4f | |
163 # copy dwords, aligned or not | |
164 3: rep; smovl | |
165 4: movl %eax,%ecx # byte count | |
166 5: andl $3,%ecx # suffix byte count | |
167 jz 7f # no suffix | |
168 # copy suffix | |
169 subl %esi,%edi | |
170 addl $3,%esi | |
171 6: movb (%esi),%dl | |
172 movb %dl,(%edi,%esi,1) | |
4006 | 173 subl $1,%esi |
3960 | 174 subl $1,%ecx |
175 jnz 6b | |
176 7: cld | |
177 popl %edi | |
178 popl %esi | |
179 ret | |
180 | |
181 # Support for void Copy::arrayof_conjoint_bytes(void* from, | |
182 # void* to, | |
183 # size_t count) | |
184 # | |
185 # Same as _Copy_conjoint_bytes, except no source alignment check. | |
186 .p2align 4,,15 | |
4006 | 187 ELF_TYPE(_Copy_arrayof_conjoint_bytes,@function) |
3960 | 188 SYMBOL(_Copy_arrayof_conjoint_bytes): |
189 pushl %esi | |
190 movl 4+12(%esp),%ecx # count | |
191 pushl %edi | |
192 movl 8+ 4(%esp),%esi # from | |
193 movl 8+ 8(%esp),%edi # to | |
194 cmpl %esi,%edi | |
195 leal -1(%esi,%ecx),%eax # from + count - 1 | |
196 jbe acb_CopyRight | |
197 cmpl %eax,%edi | |
4006 | 198 jbe acb_CopyLeft |
3960 | 199 # copy from low to high |
200 acb_CopyRight: | |
201 cmpl $3,%ecx | |
202 jbe 5f | |
203 1: movl %ecx,%eax | |
204 shrl $2,%ecx | |
205 jz 4f | |
206 cmpl $32,%ecx | |
207 ja 3f | |
208 # copy aligned dwords | |
209 subl %esi,%edi | |
210 .p2align 4,,15 | |
211 2: movl (%esi),%edx | |
212 movl %edx,(%edi,%esi,1) | |
213 addl $4,%esi | |
214 subl $1,%ecx | |
215 jnz 2b | |
216 addl %esi,%edi | |
217 jmp 4f | |
218 # copy aligned dwords | |
219 3: rep; smovl | |
220 4: movl %eax,%ecx | |
221 5: andl $3,%ecx | |
222 jz 7f | |
223 # copy suffix | |
224 xorl %eax,%eax | |
225 6: movb (%esi,%eax,1),%dl | |
226 movb %dl,(%edi,%eax,1) | |
227 addl $1,%eax | |
228 subl $1,%ecx | |
229 jnz 6b | |
230 7: popl %edi | |
231 popl %esi | |
232 ret | |
233 acb_CopyLeft: | |
234 std | |
235 leal -4(%edi,%ecx),%edi # to + count - 4 | |
236 movl %eax,%esi # from + count - 1 | |
237 movl %ecx,%eax | |
238 subl $3,%esi # from + count - 4 | |
239 cmpl $3,%ecx | |
240 jbe 5f | |
241 1: shrl $2,%ecx | |
242 jz 4f | |
243 cmpl $32,%ecx | |
244 jbe 2f # <= 32 dwords | |
245 rep; smovl | |
246 jmp 4f | |
4006 | 247 .space 8 |
3960 | 248 2: subl %esi,%edi |
249 .p2align 4,,15 | |
250 3: movl (%esi),%edx | |
251 movl %edx,(%edi,%esi,1) | |
252 subl $4,%esi | |
253 subl $1,%ecx | |
254 jnz 3b | |
255 addl %esi,%edi | |
256 4: movl %eax,%ecx | |
257 5: andl $3,%ecx | |
258 jz 7f | |
259 subl %esi,%edi | |
260 addl $3,%esi | |
261 6: movb (%esi),%dl | |
262 movb %dl,(%edi,%esi,1) | |
4006 | 263 subl $1,%esi |
3960 | 264 subl $1,%ecx |
265 jnz 6b | |
266 7: cld | |
267 popl %edi | |
268 popl %esi | |
269 ret | |
270 | |
271 # Support for void Copy::conjoint_jshorts_atomic(void* from, | |
272 # void* to, | |
273 # size_t count) | |
274 .p2align 4,,15 | |
4006 | 275 ELF_TYPE(_Copy_conjoint_jshorts_atomic,@function) |
3960 | 276 SYMBOL(_Copy_conjoint_jshorts_atomic): |
277 pushl %esi | |
278 movl 4+12(%esp),%ecx # count | |
279 pushl %edi | |
280 movl 8+ 4(%esp),%esi # from | |
281 movl 8+ 8(%esp),%edi # to | |
282 cmpl %esi,%edi | |
283 leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 | |
284 jbe cs_CopyRight | |
285 cmpl %eax,%edi | |
4006 | 286 jbe cs_CopyLeft |
3960 | 287 # copy from low to high |
288 cs_CopyRight: | |
289 # align source address at dword address boundary | |
290 movl %esi,%eax # original from | |
291 andl $3,%eax # either 0 or 2 | |
292 jz 1f # no prefix | |
293 # copy prefix | |
294 subl $1,%ecx | |
295 jl 5f # zero count | |
296 movw (%esi),%dx | |
297 movw %dx,(%edi) | |
298 addl %eax,%esi # %eax == 2 | |
299 addl %eax,%edi | |
300 1: movl %ecx,%eax # word count less prefix | |
301 sarl %ecx # dword count | |
302 jz 4f # no dwords to move | |
303 cmpl $32,%ecx | |
304 jbe 2f # <= 32 dwords | |
305 # copy aligned dwords | |
306 rep; smovl | |
4006 | 307 jmp 4f |
3960 | 308 # copy aligned dwords |
309 2: subl %esi,%edi | |
310 .p2align 4,,15 | |
311 3: movl (%esi),%edx | |
312 movl %edx,(%edi,%esi,1) | |
313 addl $4,%esi | |
314 subl $1,%ecx | |
315 jnz 3b | |
316 addl %esi,%edi | |
317 4: andl $1,%eax # suffix count | |
318 jz 5f # no suffix | |
319 # copy suffix | |
320 movw (%esi),%dx | |
321 movw %dx,(%edi) | |
322 5: popl %edi | |
323 popl %esi | |
324 ret | |
325 # copy from high to low | |
326 cs_CopyLeft: | |
327 std | |
328 leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 | |
329 movl %eax,%esi # from + count*2 - 2 | |
330 movl %ecx,%eax | |
331 subl $2,%esi # from + count*2 - 4 | |
332 1: sarl %ecx # dword count | |
333 jz 4f # no dwords to move | |
334 cmpl $32,%ecx | |
335 ja 3f # > 32 dwords | |
336 subl %esi,%edi | |
337 .p2align 4,,15 | |
338 2: movl (%esi),%edx | |
339 movl %edx,(%edi,%esi,1) | |
340 subl $4,%esi | |
341 subl $1,%ecx | |
342 jnz 2b | |
343 addl %esi,%edi | |
344 jmp 4f | |
345 3: rep; smovl | |
346 4: andl $1,%eax # suffix count | |
347 jz 5f # no suffix | |
348 # copy suffix | |
349 addl $2,%esi | |
350 addl $2,%edi | |
351 movw (%esi),%dx | |
352 movw %dx,(%edi) | |
353 5: cld | |
354 popl %edi | |
355 popl %esi | |
356 ret | |
357 | |
358 # Support for void Copy::arrayof_conjoint_jshorts(void* from, | |
359 # void* to, | |
360 # size_t count) | |
361 .p2align 4,,15 | |
4006 | 362 ELF_TYPE(_Copy_arrayof_conjoint_jshorts,@function) |
3960 | 363 SYMBOL(_Copy_arrayof_conjoint_jshorts): |
364 pushl %esi | |
365 movl 4+12(%esp),%ecx # count | |
366 pushl %edi | |
367 movl 8+ 4(%esp),%esi # from | |
368 movl 8+ 8(%esp),%edi # to | |
369 cmpl %esi,%edi | |
370 leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 | |
371 jbe acs_CopyRight | |
372 cmpl %eax,%edi | |
4006 | 373 jbe acs_CopyLeft |
3960 | 374 acs_CopyRight: |
375 movl %ecx,%eax # word count | |
376 sarl %ecx # dword count | |
377 jz 4f # no dwords to move | |
378 cmpl $32,%ecx | |
379 jbe 2f # <= 32 dwords | |
380 # copy aligned dwords | |
381 rep; smovl | |
4006 | 382 jmp 4f |
3960 | 383 # copy aligned dwords |
4006 | 384 .space 5 |
385 2: subl %esi,%edi | |
3960 | 386 .p2align 4,,15 |
387 3: movl (%esi),%edx | |
388 movl %edx,(%edi,%esi,1) | |
389 addl $4,%esi | |
390 subl $1,%ecx | |
391 jnz 3b | |
392 addl %esi,%edi | |
393 4: andl $1,%eax # suffix count | |
394 jz 5f # no suffix | |
395 # copy suffix | |
396 movw (%esi),%dx | |
397 movw %dx,(%edi) | |
398 5: popl %edi | |
399 popl %esi | |
400 ret | |
401 acs_CopyLeft: | |
402 std | |
403 leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 | |
404 movl %eax,%esi # from + count*2 - 2 | |
405 movl %ecx,%eax | |
406 subl $2,%esi # from + count*2 - 4 | |
407 sarl %ecx # dword count | |
408 jz 4f # no dwords to move | |
409 cmpl $32,%ecx | |
410 ja 3f # > 32 dwords | |
411 subl %esi,%edi | |
412 .p2align 4,,15 | |
413 2: movl (%esi),%edx | |
414 movl %edx,(%edi,%esi,1) | |
415 subl $4,%esi | |
416 subl $1,%ecx | |
417 jnz 2b | |
418 addl %esi,%edi | |
419 jmp 4f | |
420 3: rep; smovl | |
421 4: andl $1,%eax # suffix count | |
422 jz 5f # no suffix | |
423 # copy suffix | |
424 addl $2,%esi | |
425 addl $2,%edi | |
426 movw (%esi),%dx | |
427 movw %dx,(%edi) | |
428 5: cld | |
429 popl %edi | |
430 popl %esi | |
431 ret | |
432 | |
433 # Support for void Copy::conjoint_jints_atomic(void* from, | |
434 # void* to, | |
435 # size_t count) | |
436 # Equivalent to | |
437 # arrayof_conjoint_jints | |
438 .p2align 4,,15 | |
4006 | 439 ELF_TYPE(_Copy_conjoint_jints_atomic,@function) |
440 ELF_TYPE(_Copy_arrayof_conjoint_jints,@function) | |
3960 | 441 SYMBOL(_Copy_conjoint_jints_atomic): |
442 SYMBOL(_Copy_arrayof_conjoint_jints): | |
443 pushl %esi | |
444 movl 4+12(%esp),%ecx # count | |
445 pushl %edi | |
446 movl 8+ 4(%esp),%esi # from | |
447 movl 8+ 8(%esp),%edi # to | |
448 cmpl %esi,%edi | |
449 leal -4(%esi,%ecx,4),%eax # from + count*4 - 4 | |
450 jbe ci_CopyRight | |
451 cmpl %eax,%edi | |
4006 | 452 jbe ci_CopyLeft |
3960 | 453 ci_CopyRight: |
454 cmpl $32,%ecx | |
455 jbe 2f # <= 32 dwords | |
456 rep; smovl | |
457 popl %edi | |
458 popl %esi | |
459 ret | |
4006 | 460 .space 10 |
3960 | 461 2: subl %esi,%edi |
462 jmp 4f | |
463 .p2align 4,,15 | |
464 3: movl (%esi),%edx | |
465 movl %edx,(%edi,%esi,1) | |
466 addl $4,%esi | |
467 4: subl $1,%ecx | |
468 jge 3b | |
469 popl %edi | |
470 popl %esi | |
471 ret | |
472 ci_CopyLeft: | |
473 std | |
474 leal -4(%edi,%ecx,4),%edi # to + count*4 - 4 | |
475 cmpl $32,%ecx | |
476 ja 4f # > 32 dwords | |
477 subl %eax,%edi # eax == from + count*4 - 4 | |
478 jmp 3f | |
479 .p2align 4,,15 | |
480 2: movl (%eax),%edx | |
481 movl %edx,(%edi,%eax,1) | |
482 subl $4,%eax | |
483 3: subl $1,%ecx | |
484 jge 2b | |
485 cld | |
486 popl %edi | |
487 popl %esi | |
488 ret | |
489 4: movl %eax,%esi # from + count*4 - 4 | |
490 rep; smovl | |
491 cld | |
492 popl %edi | |
493 popl %esi | |
494 ret | |
4006 | 495 |
3960 | 496 # Support for void Copy::conjoint_jlongs_atomic(jlong* from, |
497 # jlong* to, | |
498 # size_t count) | |
499 # | |
500 # 32-bit | |
501 # | |
502 # count treated as signed | |
503 # | |
504 # // if (from > to) { | |
505 # while (--count >= 0) { | |
506 # *to++ = *from++; | |
507 # } | |
508 # } else { | |
509 # while (--count >= 0) { | |
510 # to[count] = from[count]; | |
511 # } | |
512 # } | |
513 .p2align 4,,15 | |
4006 | 514 ELF_TYPE(_Copy_conjoint_jlongs_atomic,@function) |
3960 | 515 SYMBOL(_Copy_conjoint_jlongs_atomic): |
516 movl 4+8(%esp),%ecx # count | |
517 movl 4+0(%esp),%eax # from | |
518 movl 4+4(%esp),%edx # to | |
519 cmpl %eax,%edx | |
520 jae cla_CopyLeft | |
521 cla_CopyRight: | |
522 subl %eax,%edx | |
523 jmp 2f | |
524 .p2align 4,,15 | |
525 1: fildll (%eax) | |
526 fistpll (%edx,%eax,1) | |
527 addl $8,%eax | |
528 2: subl $1,%ecx | |
529 jge 1b | |
530 ret | |
531 .p2align 4,,15 | |
532 3: fildll (%eax,%ecx,8) | |
533 fistpll (%edx,%ecx,8) | |
534 cla_CopyLeft: | |
535 subl $1,%ecx | |
536 jge 3b | |
537 ret | |
538 | |
539 # Support for void Copy::arrayof_conjoint_jshorts(void* from, | |
540 # void* to, | |
541 # size_t count) | |
542 .p2align 4,,15 | |
4006 | 543 ELF_TYPE(_mmx_Copy_arrayof_conjoint_jshorts,@function) |
3960 | 544 SYMBOL(_mmx_Copy_arrayof_conjoint_jshorts): |
545 pushl %esi | |
546 movl 4+12(%esp),%ecx | |
547 pushl %edi | |
548 movl 8+ 4(%esp),%esi | |
549 movl 8+ 8(%esp),%edi | |
550 cmpl %esi,%edi | |
551 leal -2(%esi,%ecx,2),%eax | |
552 jbe mmx_acs_CopyRight | |
553 cmpl %eax,%edi | |
554 jbe mmx_acs_CopyLeft | |
555 mmx_acs_CopyRight: | |
556 movl %ecx,%eax | |
557 sarl %ecx | |
558 je 5f | |
559 cmpl $33,%ecx | |
560 jae 3f | |
4006 | 561 1: subl %esi,%edi |
3960 | 562 .p2align 4,,15 |
563 2: movl (%esi),%edx | |
564 movl %edx,(%edi,%esi,1) | |
565 addl $4,%esi | |
566 subl $1,%ecx | |
567 jnz 2b | |
568 addl %esi,%edi | |
4006 | 569 jmp 5f |
3960 | 570 3: smovl # align to 8 bytes, we know we are 4 byte aligned to start |
571 subl $1,%ecx | |
572 4: .p2align 4,,15 | |
573 movq 0(%esi),%mm0 | |
574 addl $64,%edi | |
575 movq 8(%esi),%mm1 | |
576 subl $16,%ecx | |
577 movq 16(%esi),%mm2 | |
578 movq %mm0,-64(%edi) | |
579 movq 24(%esi),%mm0 | |
580 movq %mm1,-56(%edi) | |
581 movq 32(%esi),%mm1 | |
582 movq %mm2,-48(%edi) | |
583 movq 40(%esi),%mm2 | |
584 movq %mm0,-40(%edi) | |
585 movq 48(%esi),%mm0 | |
586 movq %mm1,-32(%edi) | |
587 movq 56(%esi),%mm1 | |
588 movq %mm2,-24(%edi) | |
589 movq %mm0,-16(%edi) | |
590 addl $64,%esi | |
591 movq %mm1,-8(%edi) | |
592 cmpl $16,%ecx | |
593 jge 4b | |
594 emms | |
4006 | 595 testl %ecx,%ecx |
596 ja 1b | |
3960 | 597 5: andl $1,%eax |
598 je 7f | |
599 6: movw (%esi),%dx | |
600 movw %dx,(%edi) | |
4006 | 601 7: popl %edi |
3960 | 602 popl %esi |
603 ret | |
604 mmx_acs_CopyLeft: | |
605 std | |
606 leal -4(%edi,%ecx,2),%edi | |
607 movl %eax,%esi | |
608 movl %ecx,%eax | |
609 subl $2,%esi | |
610 sarl %ecx | |
611 je 4f | |
612 cmpl $32,%ecx | |
613 ja 3f | |
614 subl %esi,%edi | |
615 .p2align 4,,15 | |
616 2: movl (%esi),%edx | |
617 movl %edx,(%edi,%esi,1) | |
618 subl $4,%esi | |
619 subl $1,%ecx | |
620 jnz 2b | |
621 addl %esi,%edi | |
622 jmp 4f | |
623 3: rep; smovl | |
624 4: andl $1,%eax | |
625 je 6f | |
626 addl $2,%esi | |
627 addl $2,%edi | |
628 5: movw (%esi),%dx | |
629 movw %dx,(%edi) | |
630 6: cld | |
631 popl %edi | |
632 popl %esi | |
633 ret | |
634 | |
635 | |
636 # Support for jlong Atomic::cmpxchg(jlong exchange_value, | |
637 # volatile jlong* dest, | |
638 # jlong compare_value, | |
639 # bool is_MP) | |
640 # | |
641 .p2align 4,,15 | |
4006 | 642 ELF_TYPE(_Atomic_cmpxchg_long,@function) |
3960 | 643 SYMBOL(_Atomic_cmpxchg_long): |
644 # 8(%esp) : return PC | |
645 pushl %ebx # 4(%esp) : old %ebx | |
646 pushl %edi # 0(%esp) : old %edi | |
647 movl 12(%esp), %ebx # 12(%esp) : exchange_value (low) | |
648 movl 16(%esp), %ecx # 16(%esp) : exchange_value (high) | |
649 movl 24(%esp), %eax # 24(%esp) : compare_value (low) | |
650 movl 28(%esp), %edx # 28(%esp) : compare_value (high) | |
651 movl 20(%esp), %edi # 20(%esp) : dest | |
652 cmpl $0, 32(%esp) # 32(%esp) : is_MP | |
653 je 1f | |
654 lock | |
655 1: cmpxchg8b (%edi) | |
656 popl %edi | |
657 popl %ebx | |
658 ret | |
659 | |
660 | |
661 # Support for jlong Atomic::load and Atomic::store. | |
662 # void _Atomic_move_long(volatile jlong* src, volatile jlong* dst) | |
663 .p2align 4,,15 | |
4006 | 664 ELF_TYPE(_Atomic_move_long,@function) |
3960 | 665 SYMBOL(_Atomic_move_long): |
666 movl 4(%esp), %eax # src | |
667 fildll (%eax) | |
668 movl 8(%esp), %eax # dest | |
669 fistpll (%eax) | |
670 ret | |
671 |