comparison src/cpu/x86/vm/x86_32.ad @ 4761:65149e74c706

7121648: Use 3-operands SIMD instructions on x86 with AVX Summary: Use 3-operands SIMD instructions in C2 generated code for machines with AVX. Reviewed-by: never
author kvn
date Tue, 20 Dec 2011 00:55:02 -0800
parents 127b3692c168
children 1dc233a8c7fe
comparison
equal deleted inserted replaced
4760:669f6a7d5b70 4761:65149e74c706
1773 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1773 enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1774 $$$emit8$primary; 1774 $$$emit8$primary;
1775 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1775 emit_cc(cbuf, $secondary, $cop$$cmpcode);
1776 %} 1776 %}
1777 1777
1778 enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV 1778 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1779 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1779 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1780 emit_d8(cbuf, op >> 8 ); 1780 emit_d8(cbuf, op >> 8 );
1781 emit_d8(cbuf, op & 255); 1781 emit_d8(cbuf, op & 255);
1782 %} 1782 %}
1783 1783
2061 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2061 enc_class Con32 (immI src) %{ // Con32(storeImmI)
2062 // Output immediate 2062 // Output immediate
2063 $$$emit32$src$$constant; 2063 $$$emit32$src$$constant;
2064 %} 2064 %}
2065 2065
2066 enc_class Con32F_as_bits(immF src) %{ // storeF_imm 2066 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm
2067 // Output Float immediate bits 2067 // Output Float immediate bits
2068 jfloat jf = $src$$constant; 2068 jfloat jf = $src$$constant;
2069 int jf_as_bits = jint_cast( jf ); 2069 int jf_as_bits = jint_cast( jf );
2070 emit_d32(cbuf, jf_as_bits); 2070 emit_d32(cbuf, jf_as_bits);
2071 %} 2071 %}
2072 2072
2073 enc_class Con32XF_as_bits(immXF src) %{ // storeX_imm 2073 enc_class Con32F_as_bits(immF src) %{ // storeX_imm
2074 // Output Float immediate bits 2074 // Output Float immediate bits
2075 jfloat jf = $src$$constant; 2075 jfloat jf = $src$$constant;
2076 int jf_as_bits = jint_cast( jf ); 2076 int jf_as_bits = jint_cast( jf );
2077 emit_d32(cbuf, jf_as_bits); 2077 emit_d32(cbuf, jf_as_bits);
2078 %} 2078 %}
2281 // move dst,src 2281 // move dst,src
2282 emit_opcode(cbuf,0x8B); 2282 emit_opcode(cbuf,0x8B);
2283 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2283 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2284 %} 2284 %}
2285 2285
2286 enc_class enc_FP_store(memory mem, regD src) %{ 2286 enc_class enc_FPR_store(memory mem, regDPR src) %{
2287 // If src is FPR1, we can just FST to store it. 2287 // If src is FPR1, we can just FST to store it.
2288 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2288 // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2289 int reg_encoding = 0x2; // Just store 2289 int reg_encoding = 0x2; // Just store
2290 int base = $mem$$base; 2290 int base = $mem$$base;
2291 int index = $mem$$index; 2291 int index = $mem$$index;
2430 %} 2430 %}
2431 2431
2432 2432
2433 // ----------------- Encodings for floating point unit ----------------- 2433 // ----------------- Encodings for floating point unit -----------------
2434 // May leave result in FPU-TOS or FPU reg depending on opcodes 2434 // May leave result in FPU-TOS or FPU reg depending on opcodes
2435 enc_class OpcReg_F (regF src) %{ // FMUL, FDIV 2435 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV
2436 $$$emit8$primary; 2436 $$$emit8$primary;
2437 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2437 emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2438 %} 2438 %}
2439 2439
2440 // Pop argument in FPR0 with FSTP ST(0) 2440 // Pop argument in FPR0 with FSTP ST(0)
2442 emit_opcode( cbuf, 0xDD ); 2442 emit_opcode( cbuf, 0xDD );
2443 emit_d8( cbuf, 0xD8 ); 2443 emit_d8( cbuf, 0xD8 );
2444 %} 2444 %}
2445 2445
2446 // !!!!! equivalent to Pop_Reg_F 2446 // !!!!! equivalent to Pop_Reg_F
2447 enc_class Pop_Reg_D( regD dst ) %{ 2447 enc_class Pop_Reg_DPR( regDPR dst ) %{
2448 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2448 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2449 emit_d8( cbuf, 0xD8+$dst$$reg ); 2449 emit_d8( cbuf, 0xD8+$dst$$reg );
2450 %} 2450 %}
2451 2451
2452 enc_class Push_Reg_D( regD dst ) %{ 2452 enc_class Push_Reg_DPR( regDPR dst ) %{
2453 emit_opcode( cbuf, 0xD9 ); 2453 emit_opcode( cbuf, 0xD9 );
2454 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2454 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1)
2455 %} 2455 %}
2456 2456
2457 enc_class strictfp_bias1( regD dst ) %{ 2457 enc_class strictfp_bias1( regDPR dst ) %{
2458 emit_opcode( cbuf, 0xDB ); // FLD m80real 2458 emit_opcode( cbuf, 0xDB ); // FLD m80real
2459 emit_opcode( cbuf, 0x2D ); 2459 emit_opcode( cbuf, 0x2D );
2460 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2460 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2461 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2461 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2462 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2462 emit_opcode( cbuf, 0xC8+$dst$$reg );
2463 %} 2463 %}
2464 2464
2465 enc_class strictfp_bias2( regD dst ) %{ 2465 enc_class strictfp_bias2( regDPR dst ) %{
2466 emit_opcode( cbuf, 0xDB ); // FLD m80real 2466 emit_opcode( cbuf, 0xDB ); // FLD m80real
2467 emit_opcode( cbuf, 0x2D ); 2467 emit_opcode( cbuf, 0x2D );
2468 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2468 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2469 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2469 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2470 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2470 emit_opcode( cbuf, 0xC8+$dst$$reg );
2486 // Push the integer in stackSlot 'src' onto FP-stack 2486 // Push the integer in stackSlot 'src' onto FP-stack
2487 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2487 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src]
2488 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2488 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2489 %} 2489 %}
2490 2490
2491 // Push the float in stackSlot 'src' onto FP-stack
2492 enc_class Push_Mem_F( memory src ) %{ // FLD_S [ESP+src]
2493 store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp );
2494 %}
2495
2496 // Push the double in stackSlot 'src' onto FP-stack
2497 enc_class Push_Mem_D( memory src ) %{ // FLD_D [ESP+src]
2498 store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp );
2499 %}
2500
2501 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2491 // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2502 enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2492 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2503 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2493 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2504 %} 2494 %}
2505 2495
2506 // Same as Pop_Mem_F except for opcode 2496 // Same as Pop_Mem_F except for opcode
2507 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2497 // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2508 enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2498 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2509 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2499 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2510 %} 2500 %}
2511 2501
2512 enc_class Pop_Reg_F( regF dst ) %{ 2502 enc_class Pop_Reg_FPR( regFPR dst ) %{
2513 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2503 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2514 emit_d8( cbuf, 0xD8+$dst$$reg ); 2504 emit_d8( cbuf, 0xD8+$dst$$reg );
2515 %} 2505 %}
2516 2506
2517 enc_class Push_Reg_F( regF dst ) %{ 2507 enc_class Push_Reg_FPR( regFPR dst ) %{
2518 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2508 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2519 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2509 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2520 %} 2510 %}
2521 2511
2522 // Push FPU's float to a stack-slot, and pop FPU-stack 2512 // Push FPU's float to a stack-slot, and pop FPU-stack
2523 enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{ 2513 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2524 int pop = 0x02; 2514 int pop = 0x02;
2525 if ($src$$reg != FPR1L_enc) { 2515 if ($src$$reg != FPR1L_enc) {
2526 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2516 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2527 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2517 emit_d8( cbuf, 0xC0-1+$src$$reg );
2528 pop = 0x03; 2518 pop = 0x03;
2529 } 2519 }
2530 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2520 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst]
2531 %} 2521 %}
2532 2522
2533 // Push FPU's double to a stack-slot, and pop FPU-stack 2523 // Push FPU's double to a stack-slot, and pop FPU-stack
2534 enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{ 2524 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2535 int pop = 0x02; 2525 int pop = 0x02;
2536 if ($src$$reg != FPR1L_enc) { 2526 if ($src$$reg != FPR1L_enc) {
2537 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2527 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2538 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2528 emit_d8( cbuf, 0xC0-1+$src$$reg );
2539 pop = 0x03; 2529 pop = 0x03;
2540 } 2530 }
2541 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2531 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst]
2542 %} 2532 %}
2543 2533
2544 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2534 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2545 enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{ 2535 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2546 int pop = 0xD0 - 1; // -1 since we skip FLD 2536 int pop = 0xD0 - 1; // -1 since we skip FLD
2547 if ($src$$reg != FPR1L_enc) { 2537 if ($src$$reg != FPR1L_enc) {
2548 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2538 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1)
2549 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2539 emit_d8( cbuf, 0xC0-1+$src$$reg );
2550 pop = 0xD8; 2540 pop = 0xD8;
2552 emit_opcode( cbuf, 0xDD ); 2542 emit_opcode( cbuf, 0xDD );
2553 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2543 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i)
2554 %} 2544 %}
2555 2545
2556 2546
2557 enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{ 2547 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2558 MacroAssembler masm(&cbuf);
2559 masm.fld_s( $src1$$reg-1); // nothing at TOS, load TOS from src1.reg
2560 masm.fmul( $src2$$reg+0); // value at TOS
2561 masm.fadd( $src$$reg+0); // value at TOS
2562 masm.fstp_d( $dst$$reg+0); // value at TOS, popped off after store
2563 %}
2564
2565
2566 enc_class Push_Reg_Mod_D( regD dst, regD src) %{
2567 // load dst in FPR0 2548 // load dst in FPR0
2568 emit_opcode( cbuf, 0xD9 ); 2549 emit_opcode( cbuf, 0xD9 );
2569 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2550 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2570 if ($src$$reg != FPR1L_enc) { 2551 if ($src$$reg != FPR1L_enc) {
2571 // fincstp 2552 // fincstp
2579 emit_opcode (cbuf, 0xD9); 2560 emit_opcode (cbuf, 0xD9);
2580 emit_opcode (cbuf, 0xF6); 2561 emit_opcode (cbuf, 0xF6);
2581 } 2562 }
2582 %} 2563 %}
2583 2564
2584 enc_class Push_ModD_encoding(regXD src0, regXD src1) %{ 2565 enc_class Push_ModD_encoding(regD src0, regD src1) %{
2585 MacroAssembler _masm(&cbuf); 2566 MacroAssembler _masm(&cbuf);
2586 __ subptr(rsp, 8); 2567 __ subptr(rsp, 8);
2587 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2568 __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2588 __ fld_d(Address(rsp, 0)); 2569 __ fld_d(Address(rsp, 0));
2589 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2570 __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2590 __ fld_d(Address(rsp, 0)); 2571 __ fld_d(Address(rsp, 0));
2591 %} 2572 %}
2592 2573
2593 enc_class Push_ModX_encoding(regX src0, regX src1) %{ 2574 enc_class Push_ModF_encoding(regF src0, regF src1) %{
2594 MacroAssembler _masm(&cbuf); 2575 MacroAssembler _masm(&cbuf);
2595 __ subptr(rsp, 4); 2576 __ subptr(rsp, 4);
2596 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2577 __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2597 __ fld_s(Address(rsp, 0)); 2578 __ fld_s(Address(rsp, 0));
2598 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2579 __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2599 __ fld_s(Address(rsp, 0)); 2580 __ fld_s(Address(rsp, 0));
2600 %} 2581 %}
2601 2582
2602 enc_class Push_ResultXD(regXD dst) %{ 2583 enc_class Push_ResultD(regD dst) %{
2603 MacroAssembler _masm(&cbuf); 2584 MacroAssembler _masm(&cbuf);
2604 __ fstp_d(Address(rsp, 0)); 2585 __ fstp_d(Address(rsp, 0));
2605 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2586 __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2606 __ addptr(rsp, 8); 2587 __ addptr(rsp, 8);
2607 %} 2588 %}
2608 2589
2609 enc_class Push_ResultX(regX dst, immI d8) %{ 2590 enc_class Push_ResultF(regF dst, immI d8) %{
2610 MacroAssembler _masm(&cbuf); 2591 MacroAssembler _masm(&cbuf);
2611 __ fstp_s(Address(rsp, 0)); 2592 __ fstp_s(Address(rsp, 0));
2612 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2593 __ movflt($dst$$XMMRegister, Address(rsp, 0));
2613 __ addptr(rsp, $d8$$constant); 2594 __ addptr(rsp, $d8$$constant);
2614 %} 2595 %}
2615 2596
2616 enc_class Push_SrcXD(regXD src) %{ 2597 enc_class Push_SrcD(regD src) %{
2617 MacroAssembler _masm(&cbuf); 2598 MacroAssembler _masm(&cbuf);
2618 __ subptr(rsp, 8); 2599 __ subptr(rsp, 8);
2619 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2600 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2620 __ fld_d(Address(rsp, 0)); 2601 __ fld_d(Address(rsp, 0));
2621 %} 2602 %}
2628 enc_class pop_stack_temp_qword() %{ 2609 enc_class pop_stack_temp_qword() %{
2629 MacroAssembler _masm(&cbuf); 2610 MacroAssembler _masm(&cbuf);
2630 __ addptr(rsp, 8); 2611 __ addptr(rsp, 8);
2631 %} 2612 %}
2632 2613
2633 enc_class push_xmm_to_fpr1(regXD src) %{ 2614 enc_class push_xmm_to_fpr1(regD src) %{
2634 MacroAssembler _masm(&cbuf); 2615 MacroAssembler _masm(&cbuf);
2635 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2616 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2636 __ fld_d(Address(rsp, 0)); 2617 __ fld_d(Address(rsp, 0));
2637 %} 2618 %}
2638 2619
2673 emit_d32(cbuf,0); 2654 emit_d32(cbuf,0);
2674 emit_opcode(cbuf,0xDC); // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q 2655 emit_opcode(cbuf,0xDC); // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q
2675 encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false); 2656 encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
2676 %} 2657 %}
2677 2658
2678 // enc_class Pop_Reg_Mod_D( regD dst, regD src) 2659 enc_class Push_Result_Mod_DPR( regDPR src) %{
2679 // was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X()
2680
2681 enc_class Push_Result_Mod_D( regD src) %{
2682 if ($src$$reg != FPR1L_enc) { 2660 if ($src$$reg != FPR1L_enc) {
2683 // fincstp 2661 // fincstp
2684 emit_opcode (cbuf, 0xD9); 2662 emit_opcode (cbuf, 0xD9);
2685 emit_opcode (cbuf, 0xF7); 2663 emit_opcode (cbuf, 0xF7);
2686 // FXCH FPR1 with src 2664 // FXCH FPR1 with src
2705 // jnp ::skip 2683 // jnp ::skip
2706 emit_opcode( cbuf, 0x7B ); 2684 emit_opcode( cbuf, 0x7B );
2707 emit_opcode( cbuf, 0x05 ); 2685 emit_opcode( cbuf, 0x05 );
2708 %} 2686 %}
2709 2687
2710 enc_class emitModD() %{ 2688 enc_class emitModDPR() %{
2711 // fprem must be iterative 2689 // fprem must be iterative
2712 // :: loop 2690 // :: loop
2713 // fprem 2691 // fprem
2714 emit_opcode( cbuf, 0xD9 ); 2692 emit_opcode( cbuf, 0xD9 );
2715 emit_opcode( cbuf, 0xF8 ); 2693 emit_opcode( cbuf, 0xF8 );
3585 // Convert a double to an int. Java semantics require we do complex 3563 // Convert a double to an int. Java semantics require we do complex
3586 // manglelations in the corner cases. So we set the rounding mode to 3564 // manglelations in the corner cases. So we set the rounding mode to
3587 // 'zero', store the darned double down as an int, and reset the 3565 // 'zero', store the darned double down as an int, and reset the
3588 // rounding mode to 'nearest'. The hardware throws an exception which 3566 // rounding mode to 'nearest'. The hardware throws an exception which
3589 // patches up the correct value directly to the stack. 3567 // patches up the correct value directly to the stack.
3590 enc_class D2I_encoding( regD src ) %{ 3568 enc_class DPR2I_encoding( regDPR src ) %{
3591 // Flip to round-to-zero mode. We attempted to allow invalid-op 3569 // Flip to round-to-zero mode. We attempted to allow invalid-op
3592 // exceptions here, so that a NAN or other corner-case value will 3570 // exceptions here, so that a NAN or other corner-case value will
3593 // thrown an exception (but normal values get converted at full speed). 3571 // thrown an exception (but normal values get converted at full speed).
3594 // However, I2C adapters and other float-stack manglers leave pending 3572 // However, I2C adapters and other float-stack manglers leave pending
3595 // invalid-op exceptions hanging. We would have to clear them before 3573 // invalid-op exceptions hanging. We would have to clear them before
3628 emit_opcode(cbuf,0xE8); // Call into runtime 3606 emit_opcode(cbuf,0xE8); // Call into runtime
3629 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3607 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3630 // Carry on here... 3608 // Carry on here...
3631 %} 3609 %}
3632 3610
3633 enc_class D2L_encoding( regD src ) %{ 3611 enc_class DPR2L_encoding( regDPR src ) %{
3634 emit_opcode(cbuf,0xD9); // FLDCW trunc 3612 emit_opcode(cbuf,0xD9); // FLDCW trunc
3635 emit_opcode(cbuf,0x2D); 3613 emit_opcode(cbuf,0x2D);
3636 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3614 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3637 // Allocate a word 3615 // Allocate a word
3638 emit_opcode(cbuf,0x83); // SUB ESP,8 3616 emit_opcode(cbuf,0x83); // SUB ESP,8
3670 emit_opcode(cbuf,0xE8); // Call into runtime 3648 emit_opcode(cbuf,0xE8); // Call into runtime
3671 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3649 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3672 // Carry on here... 3650 // Carry on here...
3673 %} 3651 %}
3674 3652
3675 enc_class FMul_ST_reg( eRegF src1 ) %{ 3653 enc_class FMul_ST_reg( eRegFPR src1 ) %{
3676 // Operand was loaded from memory into fp ST (stack top) 3654 // Operand was loaded from memory into fp ST (stack top)
3677 // FMUL ST,$src /* D8 C8+i */ 3655 // FMUL ST,$src /* D8 C8+i */
3678 emit_opcode(cbuf, 0xD8); 3656 emit_opcode(cbuf, 0xD8);
3679 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3657 emit_opcode(cbuf, 0xC8 + $src1$$reg);
3680 %} 3658 %}
3681 3659
3682 enc_class FAdd_ST_reg( eRegF src2 ) %{ 3660 enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3683 // FADDP ST,src2 /* D8 C0+i */ 3661 // FADDP ST,src2 /* D8 C0+i */
3684 emit_opcode(cbuf, 0xD8); 3662 emit_opcode(cbuf, 0xD8);
3685 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3663 emit_opcode(cbuf, 0xC0 + $src2$$reg);
3686 //could use FADDP src2,fpST /* DE C0+i */ 3664 //could use FADDP src2,fpST /* DE C0+i */
3687 %} 3665 %}
3688 3666
3689 enc_class FAddP_reg_ST( eRegF src2 ) %{ 3667 enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3690 // FADDP src2,ST /* DE C0+i */ 3668 // FADDP src2,ST /* DE C0+i */
3691 emit_opcode(cbuf, 0xDE); 3669 emit_opcode(cbuf, 0xDE);
3692 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3670 emit_opcode(cbuf, 0xC0 + $src2$$reg);
3693 %} 3671 %}
3694 3672
3695 enc_class subF_divF_encode( eRegF src1, eRegF src2) %{ 3673 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3696 // Operand has been loaded into fp ST (stack top) 3674 // Operand has been loaded into fp ST (stack top)
3697 // FSUB ST,$src1 3675 // FSUB ST,$src1
3698 emit_opcode(cbuf, 0xD8); 3676 emit_opcode(cbuf, 0xD8);
3699 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3677 emit_opcode(cbuf, 0xE0 + $src1$$reg);
3700 3678
3701 // FDIV 3679 // FDIV
3702 emit_opcode(cbuf, 0xD8); 3680 emit_opcode(cbuf, 0xD8);
3703 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3681 emit_opcode(cbuf, 0xF0 + $src2$$reg);
3704 %} 3682 %}
3705 3683
3706 enc_class MulFAddF (eRegF src1, eRegF src2) %{ 3684 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3707 // Operand was loaded from memory into fp ST (stack top) 3685 // Operand was loaded from memory into fp ST (stack top)
3708 // FADD ST,$src /* D8 C0+i */ 3686 // FADD ST,$src /* D8 C0+i */
3709 emit_opcode(cbuf, 0xD8); 3687 emit_opcode(cbuf, 0xD8);
3710 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3688 emit_opcode(cbuf, 0xC0 + $src1$$reg);
3711 3689
3713 emit_opcode(cbuf, 0xD8); 3691 emit_opcode(cbuf, 0xD8);
3714 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3692 emit_opcode(cbuf, 0xC8 + $src2$$reg);
3715 %} 3693 %}
3716 3694
3717 3695
3718 enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{ 3696 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3719 // Operand was loaded from memory into fp ST (stack top) 3697 // Operand was loaded from memory into fp ST (stack top)
3720 // FADD ST,$src /* D8 C0+i */ 3698 // FADD ST,$src /* D8 C0+i */
3721 emit_opcode(cbuf, 0xD8); 3699 emit_opcode(cbuf, 0xD8);
3722 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3700 emit_opcode(cbuf, 0xC0 + $src1$$reg);
3723 3701
4146 format %{ %} 4124 format %{ %}
4147 interface(CONST_INTER); 4125 interface(CONST_INTER);
4148 %} 4126 %}
4149 4127
4150 //Double Immediate zero 4128 //Double Immediate zero
4151 operand immD0() %{ 4129 operand immDPR0() %{
4152 // Do additional (and counter-intuitive) test against NaN to work around VC++ 4130 // Do additional (and counter-intuitive) test against NaN to work around VC++
4153 // bug that generates code such that NaNs compare equal to 0.0 4131 // bug that generates code such that NaNs compare equal to 0.0
4154 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 4132 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
4155 match(ConD); 4133 match(ConD);
4156 4134
4158 format %{ %} 4136 format %{ %}
4159 interface(CONST_INTER); 4137 interface(CONST_INTER);
4160 %} 4138 %}
4161 4139
4162 // Double Immediate one 4140 // Double Immediate one
4163 operand immD1() %{ 4141 operand immDPR1() %{
4164 predicate( UseSSE<=1 && n->getd() == 1.0 ); 4142 predicate( UseSSE<=1 && n->getd() == 1.0 );
4165 match(ConD); 4143 match(ConD);
4166 4144
4167 op_cost(5); 4145 op_cost(5);
4168 format %{ %} 4146 format %{ %}
4169 interface(CONST_INTER); 4147 interface(CONST_INTER);
4170 %} 4148 %}
4171 4149
4172 // Double Immediate 4150 // Double Immediate
4173 operand immD() %{ 4151 operand immDPR() %{
4174 predicate(UseSSE<=1); 4152 predicate(UseSSE<=1);
4175 match(ConD); 4153 match(ConD);
4176 4154
4177 op_cost(5); 4155 op_cost(5);
4178 format %{ %} 4156 format %{ %}
4179 interface(CONST_INTER); 4157 interface(CONST_INTER);
4180 %} 4158 %}
4181 4159
4182 operand immXD() %{ 4160 operand immD() %{
4183 predicate(UseSSE>=2); 4161 predicate(UseSSE>=2);
4184 match(ConD); 4162 match(ConD);
4185 4163
4186 op_cost(5); 4164 op_cost(5);
4187 format %{ %} 4165 format %{ %}
4188 interface(CONST_INTER); 4166 interface(CONST_INTER);
4189 %} 4167 %}
4190 4168
4191 // Double Immediate zero 4169 // Double Immediate zero
4192 operand immXD0() %{ 4170 operand immD0() %{
4193 // Do additional (and counter-intuitive) test against NaN to work around VC++ 4171 // Do additional (and counter-intuitive) test against NaN to work around VC++
4194 // bug that generates code such that NaNs compare equal to 0.0 AND do not 4172 // bug that generates code such that NaNs compare equal to 0.0 AND do not
4195 // compare equal to -0.0. 4173 // compare equal to -0.0.
4196 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 4174 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
4197 match(ConD); 4175 match(ConD);
4199 format %{ %} 4177 format %{ %}
4200 interface(CONST_INTER); 4178 interface(CONST_INTER);
4201 %} 4179 %}
4202 4180
4203 // Float Immediate zero 4181 // Float Immediate zero
4204 operand immF0() %{ 4182 operand immFPR0() %{
4205 predicate(UseSSE == 0 && n->getf() == 0.0F); 4183 predicate(UseSSE == 0 && n->getf() == 0.0F);
4206 match(ConF); 4184 match(ConF);
4207 4185
4208 op_cost(5); 4186 op_cost(5);
4209 format %{ %} 4187 format %{ %}
4210 interface(CONST_INTER); 4188 interface(CONST_INTER);
4211 %} 4189 %}
4212 4190
4213 // Float Immediate one 4191 // Float Immediate one
4214 operand immF1() %{ 4192 operand immFPR1() %{
4215 predicate(UseSSE == 0 && n->getf() == 1.0F); 4193 predicate(UseSSE == 0 && n->getf() == 1.0F);
4216 match(ConF); 4194 match(ConF);
4217 4195
4218 op_cost(5); 4196 op_cost(5);
4219 format %{ %} 4197 format %{ %}
4220 interface(CONST_INTER); 4198 interface(CONST_INTER);
4221 %} 4199 %}
4222 4200
4223 // Float Immediate 4201 // Float Immediate
4224 operand immF() %{ 4202 operand immFPR() %{
4225 predicate( UseSSE == 0 ); 4203 predicate( UseSSE == 0 );
4226 match(ConF); 4204 match(ConF);
4227 4205
4228 op_cost(5); 4206 op_cost(5);
4229 format %{ %} 4207 format %{ %}
4230 interface(CONST_INTER); 4208 interface(CONST_INTER);
4231 %} 4209 %}
4232 4210
4233 // Float Immediate 4211 // Float Immediate
4234 operand immXF() %{ 4212 operand immF() %{
4235 predicate(UseSSE >= 1); 4213 predicate(UseSSE >= 1);
4236 match(ConF); 4214 match(ConF);
4237 4215
4238 op_cost(5); 4216 op_cost(5);
4239 format %{ %} 4217 format %{ %}
4240 interface(CONST_INTER); 4218 interface(CONST_INTER);
4241 %} 4219 %}
4242 4220
4243 // Float Immediate zero. Zero and not -0.0 4221 // Float Immediate zero. Zero and not -0.0
4244 operand immXF0() %{ 4222 operand immF0() %{
4245 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 4223 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
4246 match(ConF); 4224 match(ConF);
4247 4225
4248 op_cost(5); 4226 op_cost(5);
4249 format %{ %} 4227 format %{ %}
4615 format %{ "FLAGS_LEGT" %} 4593 format %{ "FLAGS_LEGT" %}
4616 interface(REG_INTER); 4594 interface(REG_INTER);
4617 %} 4595 %}
4618 4596
4619 // Float register operands 4597 // Float register operands
4620 operand regD() %{ 4598 operand regDPR() %{
4621 predicate( UseSSE < 2 ); 4599 predicate( UseSSE < 2 );
4622 constraint(ALLOC_IN_RC(dbl_reg)); 4600 constraint(ALLOC_IN_RC(dbl_reg));
4623 match(RegD); 4601 match(RegD);
4624 match(regDPR1); 4602 match(regDPR1);
4625 match(regDPR2); 4603 match(regDPR2);
4626 format %{ %} 4604 format %{ %}
4627 interface(REG_INTER); 4605 interface(REG_INTER);
4628 %} 4606 %}
4629 4607
4630 operand regDPR1(regD reg) %{ 4608 operand regDPR1(regDPR reg) %{
4631 predicate( UseSSE < 2 ); 4609 predicate( UseSSE < 2 );
4632 constraint(ALLOC_IN_RC(dbl_reg0)); 4610 constraint(ALLOC_IN_RC(dbl_reg0));
4633 match(reg); 4611 match(reg);
4634 format %{ "FPR1" %} 4612 format %{ "FPR1" %}
4635 interface(REG_INTER); 4613 interface(REG_INTER);
4636 %} 4614 %}
4637 4615
4638 operand regDPR2(regD reg) %{ 4616 operand regDPR2(regDPR reg) %{
4639 predicate( UseSSE < 2 ); 4617 predicate( UseSSE < 2 );
4640 constraint(ALLOC_IN_RC(dbl_reg1)); 4618 constraint(ALLOC_IN_RC(dbl_reg1));
4641 match(reg); 4619 match(reg);
4642 format %{ "FPR2" %} 4620 format %{ "FPR2" %}
4643 interface(REG_INTER); 4621 interface(REG_INTER);
4644 %} 4622 %}
4645 4623
4646 operand regnotDPR1(regD reg) %{ 4624 operand regnotDPR1(regDPR reg) %{
4647 predicate( UseSSE < 2 ); 4625 predicate( UseSSE < 2 );
4648 constraint(ALLOC_IN_RC(dbl_notreg0)); 4626 constraint(ALLOC_IN_RC(dbl_notreg0));
4649 match(reg); 4627 match(reg);
4650 format %{ %} 4628 format %{ %}
4651 interface(REG_INTER); 4629 interface(REG_INTER);
4652 %} 4630 %}
4653 4631
4654 // XMM Double register operands 4632 // XMM Double register operands
4655 operand regXD() %{ 4633 operand regD() %{
4656 predicate( UseSSE>=2 ); 4634 predicate( UseSSE>=2 );
4657 constraint(ALLOC_IN_RC(xdb_reg)); 4635 constraint(ALLOC_IN_RC(xdb_reg));
4658 match(RegD); 4636 match(RegD);
4659 match(regXD6); 4637 match(regD6);
4660 match(regXD7); 4638 match(regD7);
4661 format %{ %} 4639 format %{ %}
4662 interface(REG_INTER); 4640 interface(REG_INTER);
4663 %} 4641 %}
4664 4642
4665 // XMM6 double register operands 4643 // XMM6 double register operands
4666 operand regXD6(regXD reg) %{ 4644 operand regD6(regD reg) %{
4667 predicate( UseSSE>=2 ); 4645 predicate( UseSSE>=2 );
4668 constraint(ALLOC_IN_RC(xdb_reg6)); 4646 constraint(ALLOC_IN_RC(xdb_reg6));
4669 match(reg); 4647 match(reg);
4670 format %{ "XMM6" %} 4648 format %{ "XMM6" %}
4671 interface(REG_INTER); 4649 interface(REG_INTER);
4672 %} 4650 %}
4673 4651
4674 // XMM7 double register operands 4652 // XMM7 double register operands
4675 operand regXD7(regXD reg) %{ 4653 operand regD7(regD reg) %{
4676 predicate( UseSSE>=2 ); 4654 predicate( UseSSE>=2 );
4677 constraint(ALLOC_IN_RC(xdb_reg7)); 4655 constraint(ALLOC_IN_RC(xdb_reg7));
4678 match(reg); 4656 match(reg);
4679 format %{ "XMM7" %} 4657 format %{ "XMM7" %}
4680 interface(REG_INTER); 4658 interface(REG_INTER);
4681 %} 4659 %}
4682 4660
4683 // Float register operands 4661 // Float register operands
4684 operand regF() %{ 4662 operand regFPR() %{
4685 predicate( UseSSE < 2 ); 4663 predicate( UseSSE < 2 );
4686 constraint(ALLOC_IN_RC(flt_reg)); 4664 constraint(ALLOC_IN_RC(flt_reg));
4687 match(RegF); 4665 match(RegF);
4688 match(regFPR1); 4666 match(regFPR1);
4689 format %{ %} 4667 format %{ %}
4690 interface(REG_INTER); 4668 interface(REG_INTER);
4691 %} 4669 %}
4692 4670
4693 // Float register operands 4671 // Float register operands
4694 operand regFPR1(regF reg) %{ 4672 operand regFPR1(regFPR reg) %{
4695 predicate( UseSSE < 2 ); 4673 predicate( UseSSE < 2 );
4696 constraint(ALLOC_IN_RC(flt_reg0)); 4674 constraint(ALLOC_IN_RC(flt_reg0));
4697 match(reg); 4675 match(reg);
4698 format %{ "FPR1" %} 4676 format %{ "FPR1" %}
4699 interface(REG_INTER); 4677 interface(REG_INTER);
4700 %} 4678 %}
4701 4679
4702 // XMM register operands 4680 // XMM register operands
4703 operand regX() %{ 4681 operand regF() %{
4704 predicate( UseSSE>=1 ); 4682 predicate( UseSSE>=1 );
4705 constraint(ALLOC_IN_RC(xmm_reg)); 4683 constraint(ALLOC_IN_RC(xmm_reg));
4706 match(RegF); 4684 match(RegF);
4707 format %{ %} 4685 format %{ %}
4708 interface(REG_INTER); 4686 interface(REG_INTER);
5442 cr : S3(read); 5420 cr : S3(read);
5443 DECODE : S0(2); // any 2 decoders 5421 DECODE : S0(2); // any 2 decoders
5444 %} 5422 %}
5445 5423
5446 // Conditional move double reg-reg 5424 // Conditional move double reg-reg
5447 pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{ 5425 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
5448 single_instruction; 5426 single_instruction;
5449 dst : S4(write); 5427 dst : S4(write);
5450 src : S3(read); 5428 src : S3(read);
5451 cr : S3(read); 5429 cr : S3(read);
5452 DECODE : S0; // any decoder 5430 DECODE : S0; // any decoder
5453 %} 5431 %}
5454 5432
5455 // Float reg-reg operation 5433 // Float reg-reg operation
5456 pipe_class fpu_reg(regD dst) %{ 5434 pipe_class fpu_reg(regDPR dst) %{
5457 instruction_count(2); 5435 instruction_count(2);
5458 dst : S3(read); 5436 dst : S3(read);
5459 DECODE : S0(2); // any 2 decoders 5437 DECODE : S0(2); // any 2 decoders
5460 FPU : S3; 5438 FPU : S3;
5461 %} 5439 %}
5462 5440
5463 // Float reg-reg operation 5441 // Float reg-reg operation
5464 pipe_class fpu_reg_reg(regD dst, regD src) %{ 5442 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
5465 instruction_count(2); 5443 instruction_count(2);
5466 dst : S4(write); 5444 dst : S4(write);
5467 src : S3(read); 5445 src : S3(read);
5468 DECODE : S0(2); // any 2 decoders 5446 DECODE : S0(2); // any 2 decoders
5469 FPU : S3; 5447 FPU : S3;
5470 %} 5448 %}
5471 5449
5472 // Float reg-reg operation 5450 // Float reg-reg operation
5473 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{ 5451 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
5474 instruction_count(3); 5452 instruction_count(3);
5475 dst : S4(write); 5453 dst : S4(write);
5476 src1 : S3(read); 5454 src1 : S3(read);
5477 src2 : S3(read); 5455 src2 : S3(read);
5478 DECODE : S0(3); // any 3 decoders 5456 DECODE : S0(3); // any 3 decoders
5479 FPU : S3(2); 5457 FPU : S3(2);
5480 %} 5458 %}
5481 5459
5482 // Float reg-reg operation 5460 // Float reg-reg operation
5483 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ 5461 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
5484 instruction_count(4); 5462 instruction_count(4);
5485 dst : S4(write); 5463 dst : S4(write);
5486 src1 : S3(read); 5464 src1 : S3(read);
5487 src2 : S3(read); 5465 src2 : S3(read);
5488 src3 : S3(read); 5466 src3 : S3(read);
5489 DECODE : S0(4); // any 3 decoders 5467 DECODE : S0(4); // any 3 decoders
5490 FPU : S3(2); 5468 FPU : S3(2);
5491 %} 5469 %}
5492 5470
5493 // Float reg-reg operation 5471 // Float reg-reg operation
5494 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{ 5472 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
5495 instruction_count(4); 5473 instruction_count(4);
5496 dst : S4(write); 5474 dst : S4(write);
5497 src1 : S3(read); 5475 src1 : S3(read);
5498 src2 : S3(read); 5476 src2 : S3(read);
5499 src3 : S3(read); 5477 src3 : S3(read);
5502 FPU : S3(2); 5480 FPU : S3(2);
5503 MEM : S3; 5481 MEM : S3;
5504 %} 5482 %}
5505 5483
5506 // Float reg-mem operation 5484 // Float reg-mem operation
5507 pipe_class fpu_reg_mem(regD dst, memory mem) %{ 5485 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
5508 instruction_count(2); 5486 instruction_count(2);
5509 dst : S5(write); 5487 dst : S5(write);
5510 mem : S3(read); 5488 mem : S3(read);
5511 D0 : S0; // big decoder only 5489 D0 : S0; // big decoder only
5512 DECODE : S1; // any decoder for FPU POP 5490 DECODE : S1; // any decoder for FPU POP
5513 FPU : S4; 5491 FPU : S4;
5514 MEM : S3; // any mem 5492 MEM : S3; // any mem
5515 %} 5493 %}
5516 5494
5517 // Float reg-mem operation 5495 // Float reg-mem operation
5518 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{ 5496 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
5519 instruction_count(3); 5497 instruction_count(3);
5520 dst : S5(write); 5498 dst : S5(write);
5521 src1 : S3(read); 5499 src1 : S3(read);
5522 mem : S3(read); 5500 mem : S3(read);
5523 D0 : S0; // big decoder only 5501 D0 : S0; // big decoder only
5525 FPU : S4; 5503 FPU : S4;
5526 MEM : S3; // any mem 5504 MEM : S3; // any mem
5527 %} 5505 %}
5528 5506
5529 // Float mem-reg operation 5507 // Float mem-reg operation
5530 pipe_class fpu_mem_reg(memory mem, regD src) %{ 5508 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
5531 instruction_count(2); 5509 instruction_count(2);
5532 src : S5(read); 5510 src : S5(read);
5533 mem : S3(read); 5511 mem : S3(read);
5534 DECODE : S0; // any decoder for FPU PUSH 5512 DECODE : S0; // any decoder for FPU PUSH
5535 D0 : S1; // big decoder only 5513 D0 : S1; // big decoder only
5536 FPU : S4; 5514 FPU : S4;
5537 MEM : S3; // any mem 5515 MEM : S3; // any mem
5538 %} 5516 %}
5539 5517
5540 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{ 5518 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
5541 instruction_count(3); 5519 instruction_count(3);
5542 src1 : S3(read); 5520 src1 : S3(read);
5543 src2 : S3(read); 5521 src2 : S3(read);
5544 mem : S3(read); 5522 mem : S3(read);
5545 DECODE : S0(2); // any decoder for FPU PUSH 5523 DECODE : S0(2); // any decoder for FPU PUSH
5546 D0 : S1; // big decoder only 5524 D0 : S1; // big decoder only
5547 FPU : S4; 5525 FPU : S4;
5548 MEM : S3; // any mem 5526 MEM : S3; // any mem
5549 %} 5527 %}
5550 5528
5551 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{ 5529 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
5552 instruction_count(3); 5530 instruction_count(3);
5553 src1 : S3(read); 5531 src1 : S3(read);
5554 src2 : S3(read); 5532 src2 : S3(read);
5555 mem : S4(read); 5533 mem : S4(read);
5556 DECODE : S0; // any decoder for FPU PUSH 5534 DECODE : S0; // any decoder for FPU PUSH
5575 D0 : S0(3); // big decoder only 5553 D0 : S0(3); // big decoder only
5576 FPU : S4; 5554 FPU : S4;
5577 MEM : S3(3); // any mem 5555 MEM : S3(3); // any mem
5578 %} 5556 %}
5579 5557
5580 pipe_class fpu_mem_reg_con(memory mem, regD src1) %{ 5558 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
5581 instruction_count(3); 5559 instruction_count(3);
5582 src1 : S4(read); 5560 src1 : S4(read);
5583 mem : S4(read); 5561 mem : S4(read);
5584 DECODE : S0; // any decoder for FPU PUSH 5562 DECODE : S0; // any decoder for FPU PUSH
5585 D0 : S0(2); // big decoder only 5563 D0 : S0(2); // big decoder only
5586 FPU : S4; 5564 FPU : S4;
5587 MEM : S3(2); // any mem 5565 MEM : S3(2); // any mem
5588 %} 5566 %}
5589 5567
5590 // Float load constant 5568 // Float load constant
5591 pipe_class fpu_reg_con(regD dst) %{ 5569 pipe_class fpu_reg_con(regDPR dst) %{
5592 instruction_count(2); 5570 instruction_count(2);
5593 dst : S5(write); 5571 dst : S5(write);
5594 D0 : S0; // big decoder only for the load 5572 D0 : S0; // big decoder only for the load
5595 DECODE : S1; // any decoder for FPU POP 5573 DECODE : S1; // any decoder for FPU POP
5596 FPU : S4; 5574 FPU : S4;
5597 MEM : S3; // any mem 5575 MEM : S3; // any mem
5598 %} 5576 %}
5599 5577
5600 // Float load constant 5578 // Float load constant
5601 pipe_class fpu_reg_reg_con(regD dst, regD src) %{ 5579 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5602 instruction_count(3); 5580 instruction_count(3);
5603 dst : S5(write); 5581 dst : S5(write);
5604 src : S3(read); 5582 src : S3(read);
5605 D0 : S0; // big decoder only for the load 5583 D0 : S0; // big decoder only for the load
5606 DECODE : S1(2); // any decoder for FPU POP 5584 DECODE : S1(2); // any decoder for FPU POP
6311 "FISTp $dst" %} 6289 "FISTp $dst" %}
6312 ins_encode(enc_loadL_volatile(mem,dst)); 6290 ins_encode(enc_loadL_volatile(mem,dst));
6313 ins_pipe( fpu_reg_mem ); 6291 ins_pipe( fpu_reg_mem );
6314 %} 6292 %}
6315 6293
6316 instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{ 6294 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
6317 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 6295 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6318 match(Set dst (LoadL mem)); 6296 match(Set dst (LoadL mem));
6319 effect(TEMP tmp); 6297 effect(TEMP tmp);
6320 ins_cost(180); 6298 ins_cost(180);
6321 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 6299 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
6325 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 6303 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
6326 %} 6304 %}
6327 ins_pipe( pipe_slow ); 6305 ins_pipe( pipe_slow );
6328 %} 6306 %}
6329 6307
6330 instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{ 6308 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
6331 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 6309 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6332 match(Set dst (LoadL mem)); 6310 match(Set dst (LoadL mem));
6333 effect(TEMP tmp); 6311 effect(TEMP tmp);
6334 ins_cost(160); 6312 ins_cost(160);
6335 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 6313 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
6378 ins_encode( OpcP, RegMem(dst,mem)); 6356 ins_encode( OpcP, RegMem(dst,mem));
6379 ins_pipe( ialu_reg_mem ); 6357 ins_pipe( ialu_reg_mem );
6380 %} 6358 %}
6381 6359
6382 // Load Double 6360 // Load Double
6383 instruct loadD(regD dst, memory mem) %{ 6361 instruct loadDPR(regDPR dst, memory mem) %{
6384 predicate(UseSSE<=1); 6362 predicate(UseSSE<=1);
6385 match(Set dst (LoadD mem)); 6363 match(Set dst (LoadD mem));
6386 6364
6387 ins_cost(150); 6365 ins_cost(150);
6388 format %{ "FLD_D ST,$mem\n\t" 6366 format %{ "FLD_D ST,$mem\n\t"
6389 "FSTP $dst" %} 6367 "FSTP $dst" %}
6390 opcode(0xDD); /* DD /0 */ 6368 opcode(0xDD); /* DD /0 */
6391 ins_encode( OpcP, RMopc_Mem(0x00,mem), 6369 ins_encode( OpcP, RMopc_Mem(0x00,mem),
6392 Pop_Reg_D(dst) ); 6370 Pop_Reg_DPR(dst) );
6393 ins_pipe( fpu_reg_mem ); 6371 ins_pipe( fpu_reg_mem );
6394 %} 6372 %}
6395 6373
6396 // Load Double to XMM 6374 // Load Double to XMM
6397 instruct loadXD(regXD dst, memory mem) %{ 6375 instruct loadD(regD dst, memory mem) %{
6398 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 6376 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
6399 match(Set dst (LoadD mem)); 6377 match(Set dst (LoadD mem));
6400 ins_cost(145); 6378 ins_cost(145);
6401 format %{ "MOVSD $dst,$mem" %} 6379 format %{ "MOVSD $dst,$mem" %}
6402 ins_encode %{ 6380 ins_encode %{
6403 __ movdbl ($dst$$XMMRegister, $mem$$Address); 6381 __ movdbl ($dst$$XMMRegister, $mem$$Address);
6404 %} 6382 %}
6405 ins_pipe( pipe_slow ); 6383 ins_pipe( pipe_slow );
6406 %} 6384 %}
6407 6385
6408 instruct loadXD_partial(regXD dst, memory mem) %{ 6386 instruct loadD_partial(regD dst, memory mem) %{
6409 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 6387 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
6410 match(Set dst (LoadD mem)); 6388 match(Set dst (LoadD mem));
6411 ins_cost(145); 6389 ins_cost(145);
6412 format %{ "MOVLPD $dst,$mem" %} 6390 format %{ "MOVLPD $dst,$mem" %}
6413 ins_encode %{ 6391 ins_encode %{
6416 ins_pipe( pipe_slow ); 6394 ins_pipe( pipe_slow );
6417 %} 6395 %}
6418 6396
6419 // Load to XMM register (single-precision floating point) 6397 // Load to XMM register (single-precision floating point)
6420 // MOVSS instruction 6398 // MOVSS instruction
6421 instruct loadX(regX dst, memory mem) %{ 6399 instruct loadF(regF dst, memory mem) %{
6422 predicate(UseSSE>=1); 6400 predicate(UseSSE>=1);
6423 match(Set dst (LoadF mem)); 6401 match(Set dst (LoadF mem));
6424 ins_cost(145); 6402 ins_cost(145);
6425 format %{ "MOVSS $dst,$mem" %} 6403 format %{ "MOVSS $dst,$mem" %}
6426 ins_encode %{ 6404 ins_encode %{
6428 %} 6406 %}
6429 ins_pipe( pipe_slow ); 6407 ins_pipe( pipe_slow );
6430 %} 6408 %}
6431 6409
6432 // Load Float 6410 // Load Float
6433 instruct loadF(regF dst, memory mem) %{ 6411 instruct loadFPR(regFPR dst, memory mem) %{
6434 predicate(UseSSE==0); 6412 predicate(UseSSE==0);
6435 match(Set dst (LoadF mem)); 6413 match(Set dst (LoadF mem));
6436 6414
6437 ins_cost(150); 6415 ins_cost(150);
6438 format %{ "FLD_S ST,$mem\n\t" 6416 format %{ "FLD_S ST,$mem\n\t"
6439 "FSTP $dst" %} 6417 "FSTP $dst" %}
6440 opcode(0xD9); /* D9 /0 */ 6418 opcode(0xD9); /* D9 /0 */
6441 ins_encode( OpcP, RMopc_Mem(0x00,mem), 6419 ins_encode( OpcP, RMopc_Mem(0x00,mem),
6442 Pop_Reg_F(dst) ); 6420 Pop_Reg_FPR(dst) );
6443 ins_pipe( fpu_reg_mem ); 6421 ins_pipe( fpu_reg_mem );
6444 %} 6422 %}
6445 6423
6446 // Load Aligned Packed Byte to XMM register 6424 // Load Aligned Packed Byte to XMM register
6447 instruct loadA8B(regXD dst, memory mem) %{ 6425 instruct loadA8B(regD dst, memory mem) %{
6448 predicate(UseSSE>=1); 6426 predicate(UseSSE>=1);
6449 match(Set dst (Load8B mem)); 6427 match(Set dst (Load8B mem));
6450 ins_cost(125); 6428 ins_cost(125);
6451 format %{ "MOVQ $dst,$mem\t! packed8B" %} 6429 format %{ "MOVQ $dst,$mem\t! packed8B" %}
6452 ins_encode %{ 6430 ins_encode %{
6454 %} 6432 %}
6455 ins_pipe( pipe_slow ); 6433 ins_pipe( pipe_slow );
6456 %} 6434 %}
6457 6435
6458 // Load Aligned Packed Short to XMM register 6436 // Load Aligned Packed Short to XMM register
6459 instruct loadA4S(regXD dst, memory mem) %{ 6437 instruct loadA4S(regD dst, memory mem) %{
6460 predicate(UseSSE>=1); 6438 predicate(UseSSE>=1);
6461 match(Set dst (Load4S mem)); 6439 match(Set dst (Load4S mem));
6462 ins_cost(125); 6440 ins_cost(125);
6463 format %{ "MOVQ $dst,$mem\t! packed4S" %} 6441 format %{ "MOVQ $dst,$mem\t! packed4S" %}
6464 ins_encode %{ 6442 ins_encode %{
6466 %} 6444 %}
6467 ins_pipe( pipe_slow ); 6445 ins_pipe( pipe_slow );
6468 %} 6446 %}
6469 6447
6470 // Load Aligned Packed Char to XMM register 6448 // Load Aligned Packed Char to XMM register
6471 instruct loadA4C(regXD dst, memory mem) %{ 6449 instruct loadA4C(regD dst, memory mem) %{
6472 predicate(UseSSE>=1); 6450 predicate(UseSSE>=1);
6473 match(Set dst (Load4C mem)); 6451 match(Set dst (Load4C mem));
6474 ins_cost(125); 6452 ins_cost(125);
6475 format %{ "MOVQ $dst,$mem\t! packed4C" %} 6453 format %{ "MOVQ $dst,$mem\t! packed4C" %}
6476 ins_encode %{ 6454 ins_encode %{
6478 %} 6456 %}
6479 ins_pipe( pipe_slow ); 6457 ins_pipe( pipe_slow );
6480 %} 6458 %}
6481 6459
6482 // Load Aligned Packed Integer to XMM register 6460 // Load Aligned Packed Integer to XMM register
6483 instruct load2IU(regXD dst, memory mem) %{ 6461 instruct load2IU(regD dst, memory mem) %{
6484 predicate(UseSSE>=1); 6462 predicate(UseSSE>=1);
6485 match(Set dst (Load2I mem)); 6463 match(Set dst (Load2I mem));
6486 ins_cost(125); 6464 ins_cost(125);
6487 format %{ "MOVQ $dst,$mem\t! packed2I" %} 6465 format %{ "MOVQ $dst,$mem\t! packed2I" %}
6488 ins_encode %{ 6466 ins_encode %{
6490 %} 6468 %}
6491 ins_pipe( pipe_slow ); 6469 ins_pipe( pipe_slow );
6492 %} 6470 %}
6493 6471
6494 // Load Aligned Packed Single to XMM 6472 // Load Aligned Packed Single to XMM
6495 instruct loadA2F(regXD dst, memory mem) %{ 6473 instruct loadA2F(regD dst, memory mem) %{
6496 predicate(UseSSE>=1); 6474 predicate(UseSSE>=1);
6497 match(Set dst (Load2F mem)); 6475 match(Set dst (Load2F mem));
6498 ins_cost(145); 6476 ins_cost(145);
6499 format %{ "MOVQ $dst,$mem\t! packed2F" %} 6477 format %{ "MOVQ $dst,$mem\t! packed2F" %}
6500 ins_encode %{ 6478 ins_encode %{
6604 opcode(0x33,0x33); 6582 opcode(0x33,0x33);
6605 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6583 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6606 ins_pipe( ialu_reg_long ); 6584 ins_pipe( ialu_reg_long );
6607 %} 6585 %}
6608 6586
6587 // The instruction usage is guarded by predicate in operand immFPR().
6588 instruct loadConFPR(regFPR dst, immFPR con) %{
6589 match(Set dst con);
6590 ins_cost(125);
6591 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6592 "FSTP $dst" %}
6593 ins_encode %{
6594 __ fld_s($constantaddress($con));
6595 __ fstp_d($dst$$reg);
6596 %}
6597 ins_pipe(fpu_reg_con);
6598 %}
6599
6600 // The instruction usage is guarded by predicate in operand immFPR0().
6601 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6602 match(Set dst con);
6603 ins_cost(125);
6604 format %{ "FLDZ ST\n\t"
6605 "FSTP $dst" %}
6606 ins_encode %{
6607 __ fldz();
6608 __ fstp_d($dst$$reg);
6609 %}
6610 ins_pipe(fpu_reg_con);
6611 %}
6612
6613 // The instruction usage is guarded by predicate in operand immFPR1().
6614 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6615 match(Set dst con);
6616 ins_cost(125);
6617 format %{ "FLD1 ST\n\t"
6618 "FSTP $dst" %}
6619 ins_encode %{
6620 __ fld1();
6621 __ fstp_d($dst$$reg);
6622 %}
6623 ins_pipe(fpu_reg_con);
6624 %}
6625
6609 // The instruction usage is guarded by predicate in operand immF(). 6626 // The instruction usage is guarded by predicate in operand immF().
6610 instruct loadConF(regF dst, immF con) %{ 6627 instruct loadConF(regF dst, immF con) %{
6611 match(Set dst con); 6628 match(Set dst con);
6612 ins_cost(125); 6629 ins_cost(125);
6613 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6630 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6614 "FSTP $dst" %} 6631 ins_encode %{
6615 ins_encode %{ 6632 __ movflt($dst$$XMMRegister, $constantaddress($con));
6616 __ fld_s($constantaddress($con)); 6633 %}
6617 __ fstp_d($dst$$reg); 6634 ins_pipe(pipe_slow);
6618 %}
6619 ins_pipe(fpu_reg_con);
6620 %} 6635 %}
6621 6636
6622 // The instruction usage is guarded by predicate in operand immF0(). 6637 // The instruction usage is guarded by predicate in operand immF0().
6623 instruct loadConF0(regF dst, immF0 con) %{ 6638 instruct loadConF0(regF dst, immF0 src) %{
6624 match(Set dst con);
6625 ins_cost(125);
6626 format %{ "FLDZ ST\n\t"
6627 "FSTP $dst" %}
6628 ins_encode %{
6629 __ fldz();
6630 __ fstp_d($dst$$reg);
6631 %}
6632 ins_pipe(fpu_reg_con);
6633 %}
6634
6635 // The instruction usage is guarded by predicate in operand immF1().
6636 instruct loadConF1(regF dst, immF1 con) %{
6637 match(Set dst con);
6638 ins_cost(125);
6639 format %{ "FLD1 ST\n\t"
6640 "FSTP $dst" %}
6641 ins_encode %{
6642 __ fld1();
6643 __ fstp_d($dst$$reg);
6644 %}
6645 ins_pipe(fpu_reg_con);
6646 %}
6647
6648 // The instruction usage is guarded by predicate in operand immXF().
6649 instruct loadConX(regX dst, immXF con) %{
6650 match(Set dst con);
6651 ins_cost(125);
6652 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6653 ins_encode %{
6654 __ movflt($dst$$XMMRegister, $constantaddress($con));
6655 %}
6656 ins_pipe(pipe_slow);
6657 %}
6658
6659 // The instruction usage is guarded by predicate in operand immXF0().
6660 instruct loadConX0(regX dst, immXF0 src) %{
6661 match(Set dst src); 6639 match(Set dst src);
6662 ins_cost(100); 6640 ins_cost(100);
6663 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6641 format %{ "XORPS $dst,$dst\t# float 0.0" %}
6664 ins_encode %{ 6642 ins_encode %{
6665 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6643 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6666 %} 6644 %}
6667 ins_pipe(pipe_slow); 6645 ins_pipe(pipe_slow);
6646 %}
6647
6648 // The instruction usage is guarded by predicate in operand immDPR().
6649 instruct loadConDPR(regDPR dst, immDPR con) %{
6650 match(Set dst con);
6651 ins_cost(125);
6652
6653 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6654 "FSTP $dst" %}
6655 ins_encode %{
6656 __ fld_d($constantaddress($con));
6657 __ fstp_d($dst$$reg);
6658 %}
6659 ins_pipe(fpu_reg_con);
6660 %}
6661
6662 // The instruction usage is guarded by predicate in operand immDPR0().
6663 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6664 match(Set dst con);
6665 ins_cost(125);
6666
6667 format %{ "FLDZ ST\n\t"
6668 "FSTP $dst" %}
6669 ins_encode %{
6670 __ fldz();
6671 __ fstp_d($dst$$reg);
6672 %}
6673 ins_pipe(fpu_reg_con);
6674 %}
6675
6676 // The instruction usage is guarded by predicate in operand immDPR1().
6677 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6678 match(Set dst con);
6679 ins_cost(125);
6680
6681 format %{ "FLD1 ST\n\t"
6682 "FSTP $dst" %}
6683 ins_encode %{
6684 __ fld1();
6685 __ fstp_d($dst$$reg);
6686 %}
6687 ins_pipe(fpu_reg_con);
6668 %} 6688 %}
6669 6689
6670 // The instruction usage is guarded by predicate in operand immD(). 6690 // The instruction usage is guarded by predicate in operand immD().
6671 instruct loadConD(regD dst, immD con) %{ 6691 instruct loadConD(regD dst, immD con) %{
6672 match(Set dst con); 6692 match(Set dst con);
6673 ins_cost(125); 6693 ins_cost(125);
6674 6694 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6675 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6695 ins_encode %{
6676 "FSTP $dst" %} 6696 __ movdbl($dst$$XMMRegister, $constantaddress($con));
6677 ins_encode %{ 6697 %}
6678 __ fld_d($constantaddress($con)); 6698 ins_pipe(pipe_slow);
6679 __ fstp_d($dst$$reg);
6680 %}
6681 ins_pipe(fpu_reg_con);
6682 %} 6699 %}
6683 6700
6684 // The instruction usage is guarded by predicate in operand immD0(). 6701 // The instruction usage is guarded by predicate in operand immD0().
6685 instruct loadConD0(regD dst, immD0 con) %{ 6702 instruct loadConD0(regD dst, immD0 src) %{
6686 match(Set dst con);
6687 ins_cost(125);
6688
6689 format %{ "FLDZ ST\n\t"
6690 "FSTP $dst" %}
6691 ins_encode %{
6692 __ fldz();
6693 __ fstp_d($dst$$reg);
6694 %}
6695 ins_pipe(fpu_reg_con);
6696 %}
6697
6698 // The instruction usage is guarded by predicate in operand immD1().
6699 instruct loadConD1(regD dst, immD1 con) %{
6700 match(Set dst con);
6701 ins_cost(125);
6702
6703 format %{ "FLD1 ST\n\t"
6704 "FSTP $dst" %}
6705 ins_encode %{
6706 __ fld1();
6707 __ fstp_d($dst$$reg);
6708 %}
6709 ins_pipe(fpu_reg_con);
6710 %}
6711
6712 // The instruction usage is guarded by predicate in operand immXD().
6713 instruct loadConXD(regXD dst, immXD con) %{
6714 match(Set dst con);
6715 ins_cost(125);
6716 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6717 ins_encode %{
6718 __ movdbl($dst$$XMMRegister, $constantaddress($con));
6719 %}
6720 ins_pipe(pipe_slow);
6721 %}
6722
6723 // The instruction usage is guarded by predicate in operand immXD0().
6724 instruct loadConXD0(regXD dst, immXD0 src) %{
6725 match(Set dst src); 6703 match(Set dst src);
6726 ins_cost(100); 6704 ins_cost(100);
6727 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6705 format %{ "XORPD $dst,$dst\t# double 0.0" %}
6728 ins_encode %{ 6706 ins_encode %{
6729 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6707 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6763 ins_encode( OpcP, RegMem(dst,src)); 6741 ins_encode( OpcP, RegMem(dst,src));
6764 ins_pipe( ialu_reg_mem ); 6742 ins_pipe( ialu_reg_mem );
6765 %} 6743 %}
6766 6744
6767 // Load Stack Slot 6745 // Load Stack Slot
6768 instruct loadSSF(regF dst, stackSlotF src) %{ 6746 instruct loadSSF(regFPR dst, stackSlotF src) %{
6769 match(Set dst src); 6747 match(Set dst src);
6770 ins_cost(125); 6748 ins_cost(125);
6771 6749
6772 format %{ "FLD_S $src\n\t" 6750 format %{ "FLD_S $src\n\t"
6773 "FSTP $dst" %} 6751 "FSTP $dst" %}
6774 opcode(0xD9); /* D9 /0, FLD m32real */ 6752 opcode(0xD9); /* D9 /0, FLD m32real */
6775 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6753 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6776 Pop_Reg_F(dst) ); 6754 Pop_Reg_FPR(dst) );
6777 ins_pipe( fpu_reg_mem ); 6755 ins_pipe( fpu_reg_mem );
6778 %} 6756 %}
6779 6757
6780 // Load Stack Slot 6758 // Load Stack Slot
6781 instruct loadSSD(regD dst, stackSlotD src) %{ 6759 instruct loadSSD(regDPR dst, stackSlotD src) %{
6782 match(Set dst src); 6760 match(Set dst src);
6783 ins_cost(125); 6761 ins_cost(125);
6784 6762
6785 format %{ "FLD_D $src\n\t" 6763 format %{ "FLD_D $src\n\t"
6786 "FSTP $dst" %} 6764 "FSTP $dst" %}
6787 opcode(0xDD); /* DD /0, FLD m64real */ 6765 opcode(0xDD); /* DD /0, FLD m64real */
6788 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6766 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6789 Pop_Reg_D(dst) ); 6767 Pop_Reg_DPR(dst) );
6790 ins_pipe( fpu_reg_mem ); 6768 ins_pipe( fpu_reg_mem );
6791 %} 6769 %}
6792 6770
6793 // Prefetch instructions. 6771 // Prefetch instructions.
6794 // Must be safe to execute with invalid address (cannot fault). 6772 // Must be safe to execute with invalid address (cannot fault).
7019 opcode(0x3B); 6997 opcode(0x3B);
7020 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6998 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
7021 ins_pipe( fpu_reg_mem ); 6999 ins_pipe( fpu_reg_mem );
7022 %} 7000 %}
7023 7001
7024 instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{ 7002 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
7025 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 7003 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7026 match(Set mem (StoreL mem src)); 7004 match(Set mem (StoreL mem src));
7027 effect( TEMP tmp, KILL cr ); 7005 effect( TEMP tmp, KILL cr );
7028 ins_cost(380); 7006 ins_cost(380);
7029 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 7007 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7035 __ movdbl($mem$$Address, $tmp$$XMMRegister); 7013 __ movdbl($mem$$Address, $tmp$$XMMRegister);
7036 %} 7014 %}
7037 ins_pipe( pipe_slow ); 7015 ins_pipe( pipe_slow );
7038 %} 7016 %}
7039 7017
7040 instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{ 7018 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
7041 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 7019 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7042 match(Set mem (StoreL mem src)); 7020 match(Set mem (StoreL mem src));
7043 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 7021 effect( TEMP tmp2 , TEMP tmp, KILL cr );
7044 ins_cost(360); 7022 ins_cost(360);
7045 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 7023 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7113 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 7091 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
7114 ins_pipe( ialu_mem_imm ); 7092 ins_pipe( ialu_mem_imm );
7115 %} 7093 %}
7116 7094
7117 // Store Aligned Packed Byte XMM register to memory 7095 // Store Aligned Packed Byte XMM register to memory
7118 instruct storeA8B(memory mem, regXD src) %{ 7096 instruct storeA8B(memory mem, regD src) %{
7119 predicate(UseSSE>=1); 7097 predicate(UseSSE>=1);
7120 match(Set mem (Store8B mem src)); 7098 match(Set mem (Store8B mem src));
7121 ins_cost(145); 7099 ins_cost(145);
7122 format %{ "MOVQ $mem,$src\t! packed8B" %} 7100 format %{ "MOVQ $mem,$src\t! packed8B" %}
7123 ins_encode %{ 7101 ins_encode %{
7125 %} 7103 %}
7126 ins_pipe( pipe_slow ); 7104 ins_pipe( pipe_slow );
7127 %} 7105 %}
7128 7106
7129 // Store Aligned Packed Char/Short XMM register to memory 7107 // Store Aligned Packed Char/Short XMM register to memory
7130 instruct storeA4C(memory mem, regXD src) %{ 7108 instruct storeA4C(memory mem, regD src) %{
7131 predicate(UseSSE>=1); 7109 predicate(UseSSE>=1);
7132 match(Set mem (Store4C mem src)); 7110 match(Set mem (Store4C mem src));
7133 ins_cost(145); 7111 ins_cost(145);
7134 format %{ "MOVQ $mem,$src\t! packed4C" %} 7112 format %{ "MOVQ $mem,$src\t! packed4C" %}
7135 ins_encode %{ 7113 ins_encode %{
7137 %} 7115 %}
7138 ins_pipe( pipe_slow ); 7116 ins_pipe( pipe_slow );
7139 %} 7117 %}
7140 7118
7141 // Store Aligned Packed Integer XMM register to memory 7119 // Store Aligned Packed Integer XMM register to memory
7142 instruct storeA2I(memory mem, regXD src) %{ 7120 instruct storeA2I(memory mem, regD src) %{
7143 predicate(UseSSE>=1); 7121 predicate(UseSSE>=1);
7144 match(Set mem (Store2I mem src)); 7122 match(Set mem (Store2I mem src));
7145 ins_cost(145); 7123 ins_cost(145);
7146 format %{ "MOVQ $mem,$src\t! packed2I" %} 7124 format %{ "MOVQ $mem,$src\t! packed2I" %}
7147 ins_encode %{ 7125 ins_encode %{
7160 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 7138 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
7161 ins_pipe( ialu_mem_imm ); 7139 ins_pipe( ialu_mem_imm );
7162 %} 7140 %}
7163 7141
7164 // Store Double 7142 // Store Double
7165 instruct storeD( memory mem, regDPR1 src) %{ 7143 instruct storeDPR( memory mem, regDPR1 src) %{
7166 predicate(UseSSE<=1); 7144 predicate(UseSSE<=1);
7167 match(Set mem (StoreD mem src)); 7145 match(Set mem (StoreD mem src));
7168 7146
7169 ins_cost(100); 7147 ins_cost(100);
7170 format %{ "FST_D $mem,$src" %} 7148 format %{ "FST_D $mem,$src" %}
7171 opcode(0xDD); /* DD /2 */ 7149 opcode(0xDD); /* DD /2 */
7172 ins_encode( enc_FP_store(mem,src) ); 7150 ins_encode( enc_FPR_store(mem,src) );
7173 ins_pipe( fpu_mem_reg ); 7151 ins_pipe( fpu_mem_reg );
7174 %} 7152 %}
7175 7153
7176 // Store double does rounding on x86 7154 // Store double does rounding on x86
7177 instruct storeD_rounded( memory mem, regDPR1 src) %{ 7155 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
7178 predicate(UseSSE<=1); 7156 predicate(UseSSE<=1);
7179 match(Set mem (StoreD mem (RoundDouble src))); 7157 match(Set mem (StoreD mem (RoundDouble src)));
7180 7158
7181 ins_cost(100); 7159 ins_cost(100);
7182 format %{ "FST_D $mem,$src\t# round" %} 7160 format %{ "FST_D $mem,$src\t# round" %}
7183 opcode(0xDD); /* DD /2 */ 7161 opcode(0xDD); /* DD /2 */
7184 ins_encode( enc_FP_store(mem,src) ); 7162 ins_encode( enc_FPR_store(mem,src) );
7185 ins_pipe( fpu_mem_reg ); 7163 ins_pipe( fpu_mem_reg );
7186 %} 7164 %}
7187 7165
7188 // Store XMM register to memory (double-precision floating points) 7166 // Store XMM register to memory (double-precision floating points)
7189 // MOVSD instruction 7167 // MOVSD instruction
7190 instruct storeXD(memory mem, regXD src) %{ 7168 instruct storeD(memory mem, regD src) %{
7191 predicate(UseSSE>=2); 7169 predicate(UseSSE>=2);
7192 match(Set mem (StoreD mem src)); 7170 match(Set mem (StoreD mem src));
7193 ins_cost(95); 7171 ins_cost(95);
7194 format %{ "MOVSD $mem,$src" %} 7172 format %{ "MOVSD $mem,$src" %}
7195 ins_encode %{ 7173 ins_encode %{
7198 ins_pipe( pipe_slow ); 7176 ins_pipe( pipe_slow );
7199 %} 7177 %}
7200 7178
7201 // Store XMM register to memory (single-precision floating point) 7179 // Store XMM register to memory (single-precision floating point)
7202 // MOVSS instruction 7180 // MOVSS instruction
7203 instruct storeX(memory mem, regX src) %{ 7181 instruct storeF(memory mem, regF src) %{
7204 predicate(UseSSE>=1); 7182 predicate(UseSSE>=1);
7205 match(Set mem (StoreF mem src)); 7183 match(Set mem (StoreF mem src));
7206 ins_cost(95); 7184 ins_cost(95);
7207 format %{ "MOVSS $mem,$src" %} 7185 format %{ "MOVSS $mem,$src" %}
7208 ins_encode %{ 7186 ins_encode %{
7210 %} 7188 %}
7211 ins_pipe( pipe_slow ); 7189 ins_pipe( pipe_slow );
7212 %} 7190 %}
7213 7191
7214 // Store Aligned Packed Single Float XMM register to memory 7192 // Store Aligned Packed Single Float XMM register to memory
7215 instruct storeA2F(memory mem, regXD src) %{ 7193 instruct storeA2F(memory mem, regD src) %{
7216 predicate(UseSSE>=1); 7194 predicate(UseSSE>=1);
7217 match(Set mem (Store2F mem src)); 7195 match(Set mem (Store2F mem src));
7218 ins_cost(145); 7196 ins_cost(145);
7219 format %{ "MOVQ $mem,$src\t! packed2F" %} 7197 format %{ "MOVQ $mem,$src\t! packed2F" %}
7220 ins_encode %{ 7198 ins_encode %{
7222 %} 7200 %}
7223 ins_pipe( pipe_slow ); 7201 ins_pipe( pipe_slow );
7224 %} 7202 %}
7225 7203
7226 // Store Float 7204 // Store Float
7227 instruct storeF( memory mem, regFPR1 src) %{ 7205 instruct storeFPR( memory mem, regFPR1 src) %{
7228 predicate(UseSSE==0); 7206 predicate(UseSSE==0);
7229 match(Set mem (StoreF mem src)); 7207 match(Set mem (StoreF mem src));
7230 7208
7231 ins_cost(100); 7209 ins_cost(100);
7232 format %{ "FST_S $mem,$src" %} 7210 format %{ "FST_S $mem,$src" %}
7233 opcode(0xD9); /* D9 /2 */ 7211 opcode(0xD9); /* D9 /2 */
7234 ins_encode( enc_FP_store(mem,src) ); 7212 ins_encode( enc_FPR_store(mem,src) );
7235 ins_pipe( fpu_mem_reg ); 7213 ins_pipe( fpu_mem_reg );
7236 %} 7214 %}
7237 7215
7238 // Store Float does rounding on x86 7216 // Store Float does rounding on x86
7239 instruct storeF_rounded( memory mem, regFPR1 src) %{ 7217 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
7240 predicate(UseSSE==0); 7218 predicate(UseSSE==0);
7241 match(Set mem (StoreF mem (RoundFloat src))); 7219 match(Set mem (StoreF mem (RoundFloat src)));
7242 7220
7243 ins_cost(100); 7221 ins_cost(100);
7244 format %{ "FST_S $mem,$src\t# round" %} 7222 format %{ "FST_S $mem,$src\t# round" %}
7245 opcode(0xD9); /* D9 /2 */ 7223 opcode(0xD9); /* D9 /2 */
7246 ins_encode( enc_FP_store(mem,src) ); 7224 ins_encode( enc_FPR_store(mem,src) );
7247 ins_pipe( fpu_mem_reg ); 7225 ins_pipe( fpu_mem_reg );
7248 %} 7226 %}
7249 7227
7250 // Store Float does rounding on x86 7228 // Store Float does rounding on x86
7251 instruct storeF_Drounded( memory mem, regDPR1 src) %{ 7229 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
7252 predicate(UseSSE<=1); 7230 predicate(UseSSE<=1);
7253 match(Set mem (StoreF mem (ConvD2F src))); 7231 match(Set mem (StoreF mem (ConvD2F src)));
7254 7232
7255 ins_cost(100); 7233 ins_cost(100);
7256 format %{ "FST_S $mem,$src\t# D-round" %} 7234 format %{ "FST_S $mem,$src\t# D-round" %}
7257 opcode(0xD9); /* D9 /2 */ 7235 opcode(0xD9); /* D9 /2 */
7258 ins_encode( enc_FP_store(mem,src) ); 7236 ins_encode( enc_FPR_store(mem,src) );
7259 ins_pipe( fpu_mem_reg ); 7237 ins_pipe( fpu_mem_reg );
7260 %} 7238 %}
7261 7239
7262 // Store immediate Float value (it is faster than store from FPU register) 7240 // Store immediate Float value (it is faster than store from FPU register)
7241 // The instruction usage is guarded by predicate in operand immFPR().
7242 instruct storeFPR_imm( memory mem, immFPR src) %{
7243 match(Set mem (StoreF mem src));
7244
7245 ins_cost(50);
7246 format %{ "MOV $mem,$src\t# store float" %}
7247 opcode(0xC7); /* C7 /0 */
7248 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src ));
7249 ins_pipe( ialu_mem_imm );
7250 %}
7251
7252 // Store immediate Float value (it is faster than store from XMM register)
7263 // The instruction usage is guarded by predicate in operand immF(). 7253 // The instruction usage is guarded by predicate in operand immF().
7264 instruct storeF_imm( memory mem, immF src) %{ 7254 instruct storeF_imm( memory mem, immF src) %{
7265 match(Set mem (StoreF mem src)); 7255 match(Set mem (StoreF mem src));
7266 7256
7267 ins_cost(50); 7257 ins_cost(50);
7268 format %{ "MOV $mem,$src\t# store float" %} 7258 format %{ "MOV $mem,$src\t# store float" %}
7269 opcode(0xC7); /* C7 /0 */ 7259 opcode(0xC7); /* C7 /0 */
7270 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 7260 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src ));
7271 ins_pipe( ialu_mem_imm );
7272 %}
7273
7274 // Store immediate Float value (it is faster than store from XMM register)
7275 // The instruction usage is guarded by predicate in operand immXF().
7276 instruct storeX_imm( memory mem, immXF src) %{
7277 match(Set mem (StoreF mem src));
7278
7279 ins_cost(50);
7280 format %{ "MOV $mem,$src\t# store float" %}
7281 opcode(0xC7); /* C7 /0 */
7282 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32XF_as_bits( src ));
7283 ins_pipe( ialu_mem_imm ); 7261 ins_pipe( ialu_mem_imm );
7284 %} 7262 %}
7285 7263
7286 // Store Integer to stack slot 7264 // Store Integer to stack slot
7287 instruct storeSSI(stackSlotI dst, eRegI src) %{ 7265 instruct storeSSI(stackSlotI dst, eRegI src) %{
7575 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 7553 // ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7576 // ins_pipe( pipe_cmov_mem ); 7554 // ins_pipe( pipe_cmov_mem );
7577 //%} 7555 //%}
7578 7556
7579 // Conditional move 7557 // Conditional move
7580 instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{ 7558 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
7581 predicate(UseSSE<=1); 7559 predicate(UseSSE<=1);
7582 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7560 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7583 ins_cost(200); 7561 ins_cost(200);
7584 format %{ "FCMOV$cop $dst,$src\t# double" %} 7562 format %{ "FCMOV$cop $dst,$src\t# double" %}
7585 opcode(0xDA); 7563 opcode(0xDA);
7586 ins_encode( enc_cmov_d(cop,src) ); 7564 ins_encode( enc_cmov_dpr(cop,src) );
7587 ins_pipe( pipe_cmovD_reg ); 7565 ins_pipe( pipe_cmovDPR_reg );
7588 %} 7566 %}
7589 7567
7590 // Conditional move 7568 // Conditional move
7591 instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{ 7569 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
7592 predicate(UseSSE==0); 7570 predicate(UseSSE==0);
7593 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7571 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7594 ins_cost(200); 7572 ins_cost(200);
7595 format %{ "FCMOV$cop $dst,$src\t# float" %} 7573 format %{ "FCMOV$cop $dst,$src\t# float" %}
7596 opcode(0xDA); 7574 opcode(0xDA);
7597 ins_encode( enc_cmov_d(cop,src) ); 7575 ins_encode( enc_cmov_dpr(cop,src) );
7598 ins_pipe( pipe_cmovD_reg ); 7576 ins_pipe( pipe_cmovDPR_reg );
7599 %} 7577 %}
7600 7578
7601 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 7579 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7602 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 7580 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
7603 predicate(UseSSE<=1); 7581 predicate(UseSSE<=1);
7604 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7582 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7605 ins_cost(200); 7583 ins_cost(200);
7606 format %{ "Jn$cop skip\n\t" 7584 format %{ "Jn$cop skip\n\t"
7607 "MOV $dst,$src\t# double\n" 7585 "MOV $dst,$src\t# double\n"
7608 "skip:" %} 7586 "skip:" %}
7609 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 7587 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
7610 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) ); 7588 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
7611 ins_pipe( pipe_cmovD_reg ); 7589 ins_pipe( pipe_cmovDPR_reg );
7612 %} 7590 %}
7613 7591
7614 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 7592 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7615 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 7593 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
7616 predicate(UseSSE==0); 7594 predicate(UseSSE==0);
7617 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7595 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7618 ins_cost(200); 7596 ins_cost(200);
7619 format %{ "Jn$cop skip\n\t" 7597 format %{ "Jn$cop skip\n\t"
7620 "MOV $dst,$src\t# float\n" 7598 "MOV $dst,$src\t# float\n"
7621 "skip:" %} 7599 "skip:" %}
7622 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 7600 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
7623 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) ); 7601 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
7624 ins_pipe( pipe_cmovD_reg ); 7602 ins_pipe( pipe_cmovDPR_reg );
7625 %} 7603 %}
7626 7604
7627 // No CMOVE with SSE/SSE2 7605 // No CMOVE with SSE/SSE2
7628 instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{ 7606 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
7629 predicate (UseSSE>=1); 7607 predicate (UseSSE>=1);
7630 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7608 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7631 ins_cost(200); 7609 ins_cost(200);
7632 format %{ "Jn$cop skip\n\t" 7610 format %{ "Jn$cop skip\n\t"
7633 "MOVSS $dst,$src\t# float\n" 7611 "MOVSS $dst,$src\t# float\n"
7641 %} 7619 %}
7642 ins_pipe( pipe_slow ); 7620 ins_pipe( pipe_slow );
7643 %} 7621 %}
7644 7622
7645 // No CMOVE with SSE/SSE2 7623 // No CMOVE with SSE/SSE2
7646 instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{ 7624 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
7647 predicate (UseSSE>=2); 7625 predicate (UseSSE>=2);
7648 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7626 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7649 ins_cost(200); 7627 ins_cost(200);
7650 format %{ "Jn$cop skip\n\t" 7628 format %{ "Jn$cop skip\n\t"
7651 "MOVSD $dst,$src\t# float\n" 7629 "MOVSD $dst,$src\t# float\n"
7659 %} 7637 %}
7660 ins_pipe( pipe_slow ); 7638 ins_pipe( pipe_slow );
7661 %} 7639 %}
7662 7640
7663 // unsigned version 7641 // unsigned version
7664 instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{ 7642 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
7665 predicate (UseSSE>=1); 7643 predicate (UseSSE>=1);
7666 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7644 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7667 ins_cost(200); 7645 ins_cost(200);
7668 format %{ "Jn$cop skip\n\t" 7646 format %{ "Jn$cop skip\n\t"
7669 "MOVSS $dst,$src\t# float\n" 7647 "MOVSS $dst,$src\t# float\n"
7676 __ bind(skip); 7654 __ bind(skip);
7677 %} 7655 %}
7678 ins_pipe( pipe_slow ); 7656 ins_pipe( pipe_slow );
7679 %} 7657 %}
7680 7658
7681 instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{ 7659 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
7682 predicate (UseSSE>=1); 7660 predicate (UseSSE>=1);
7683 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7661 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7684 ins_cost(200); 7662 ins_cost(200);
7685 expand %{ 7663 expand %{
7686 fcmovX_regU(cop, cr, dst, src); 7664 fcmovF_regU(cop, cr, dst, src);
7687 %} 7665 %}
7688 %} 7666 %}
7689 7667
7690 // unsigned version 7668 // unsigned version
7691 instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{ 7669 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
7692 predicate (UseSSE>=2); 7670 predicate (UseSSE>=2);
7693 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7671 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7694 ins_cost(200); 7672 ins_cost(200);
7695 format %{ "Jn$cop skip\n\t" 7673 format %{ "Jn$cop skip\n\t"
7696 "MOVSD $dst,$src\t# float\n" 7674 "MOVSD $dst,$src\t# float\n"
7703 __ bind(skip); 7681 __ bind(skip);
7704 %} 7682 %}
7705 ins_pipe( pipe_slow ); 7683 ins_pipe( pipe_slow );
7706 %} 7684 %}
7707 7685
7708 instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{ 7686 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
7709 predicate (UseSSE>=2); 7687 predicate (UseSSE>=2);
7710 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7688 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7711 ins_cost(200); 7689 ins_cost(200);
7712 expand %{ 7690 expand %{
7713 fcmovXD_regU(cop, cr, dst, src); 7691 fcmovD_regU(cop, cr, dst, src);
7714 %} 7692 %}
7715 %} 7693 %}
7716 7694
7717 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 7695 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7718 predicate(VM_Version::supports_cmov() ); 7696 predicate(VM_Version::supports_cmov() );
7938 "FISTp $dst" %} 7916 "FISTp $dst" %}
7939 ins_encode(enc_loadL_volatile(mem,dst)); 7917 ins_encode(enc_loadL_volatile(mem,dst));
7940 ins_pipe( fpu_reg_mem ); 7918 ins_pipe( fpu_reg_mem );
7941 %} 7919 %}
7942 7920
7943 instruct loadLX_Locked(stackSlotL dst, memory mem, regXD tmp) %{ 7921 instruct loadLX_Locked(stackSlotL dst, memory mem, regD tmp) %{
7944 predicate(UseSSE>=2); 7922 predicate(UseSSE>=2);
7945 match(Set dst (LoadLLocked mem)); 7923 match(Set dst (LoadLLocked mem));
7946 effect(TEMP tmp); 7924 effect(TEMP tmp);
7947 ins_cost(180); 7925 ins_cost(180);
7948 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 7926 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
7952 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 7930 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
7953 %} 7931 %}
7954 ins_pipe( pipe_slow ); 7932 ins_pipe( pipe_slow );
7955 %} 7933 %}
7956 7934
7957 instruct loadLX_reg_Locked(eRegL dst, memory mem, regXD tmp) %{ 7935 instruct loadLX_reg_Locked(eRegL dst, memory mem, regD tmp) %{
7958 predicate(UseSSE>=2); 7936 predicate(UseSSE>=2);
7959 match(Set dst (LoadLLocked mem)); 7937 match(Set dst (LoadLLocked mem));
7960 effect(TEMP tmp); 7938 effect(TEMP tmp);
7961 ins_cost(160); 7939 ins_cost(160);
7962 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 7940 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
9549 // Double Math 9527 // Double Math
9550 9528
9551 // Compare & branch 9529 // Compare & branch
9552 9530
9553 // P6 version of float compare, sets condition codes in EFLAGS 9531 // P6 version of float compare, sets condition codes in EFLAGS
9554 instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{ 9532 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9555 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9533 predicate(VM_Version::supports_cmov() && UseSSE <=1);
9556 match(Set cr (CmpD src1 src2)); 9534 match(Set cr (CmpD src1 src2));
9557 effect(KILL rax); 9535 effect(KILL rax);
9558 ins_cost(150); 9536 ins_cost(150);
9559 format %{ "FLD $src1\n\t" 9537 format %{ "FLD $src1\n\t"
9561 "JNP exit\n\t" 9539 "JNP exit\n\t"
9562 "MOV ah,1 // saw a NaN, set CF\n\t" 9540 "MOV ah,1 // saw a NaN, set CF\n\t"
9563 "SAHF\n" 9541 "SAHF\n"
9564 "exit:\tNOP // avoid branch to branch" %} 9542 "exit:\tNOP // avoid branch to branch" %}
9565 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9543 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9566 ins_encode( Push_Reg_D(src1), 9544 ins_encode( Push_Reg_DPR(src1),
9567 OpcP, RegOpc(src2), 9545 OpcP, RegOpc(src2),
9568 cmpF_P6_fixup ); 9546 cmpF_P6_fixup );
9569 ins_pipe( pipe_slow ); 9547 ins_pipe( pipe_slow );
9570 %} 9548 %}
9571 9549
9572 instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9550 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9573 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9551 predicate(VM_Version::supports_cmov() && UseSSE <=1);
9574 match(Set cr (CmpD src1 src2)); 9552 match(Set cr (CmpD src1 src2));
9575 ins_cost(150); 9553 ins_cost(150);
9576 format %{ "FLD $src1\n\t" 9554 format %{ "FLD $src1\n\t"
9577 "FUCOMIP ST,$src2 // P6 instruction" %} 9555 "FUCOMIP ST,$src2 // P6 instruction" %}
9578 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9556 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9579 ins_encode( Push_Reg_D(src1), 9557 ins_encode( Push_Reg_DPR(src1),
9580 OpcP, RegOpc(src2)); 9558 OpcP, RegOpc(src2));
9581 ins_pipe( pipe_slow ); 9559 ins_pipe( pipe_slow );
9582 %} 9560 %}
9583 9561
9584 // Compare & branch 9562 // Compare & branch
9585 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{ 9563 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9586 predicate(UseSSE<=1); 9564 predicate(UseSSE<=1);
9587 match(Set cr (CmpD src1 src2)); 9565 match(Set cr (CmpD src1 src2));
9588 effect(KILL rax); 9566 effect(KILL rax);
9589 ins_cost(200); 9567 ins_cost(200);
9590 format %{ "FLD $src1\n\t" 9568 format %{ "FLD $src1\n\t"
9593 "TEST AX,0x400\n\t" 9571 "TEST AX,0x400\n\t"
9594 "JZ,s flags\n\t" 9572 "JZ,s flags\n\t"
9595 "MOV AH,1\t# unordered treat as LT\n" 9573 "MOV AH,1\t# unordered treat as LT\n"
9596 "flags:\tSAHF" %} 9574 "flags:\tSAHF" %}
9597 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9575 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9598 ins_encode( Push_Reg_D(src1), 9576 ins_encode( Push_Reg_DPR(src1),
9599 OpcP, RegOpc(src2), 9577 OpcP, RegOpc(src2),
9600 fpu_flags); 9578 fpu_flags);
9601 ins_pipe( pipe_slow ); 9579 ins_pipe( pipe_slow );
9602 %} 9580 %}
9603 9581
9604 // Compare vs zero into -1,0,1 9582 // Compare vs zero into -1,0,1
9605 instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{ 9583 instruct cmpDPR_0(eRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9606 predicate(UseSSE<=1); 9584 predicate(UseSSE<=1);
9607 match(Set dst (CmpD3 src1 zero)); 9585 match(Set dst (CmpD3 src1 zero));
9608 effect(KILL cr, KILL rax); 9586 effect(KILL cr, KILL rax);
9609 ins_cost(280); 9587 ins_cost(280);
9610 format %{ "FTSTD $dst,$src1" %} 9588 format %{ "FTSTD $dst,$src1" %}
9611 opcode(0xE4, 0xD9); 9589 opcode(0xE4, 0xD9);
9612 ins_encode( Push_Reg_D(src1), 9590 ins_encode( Push_Reg_DPR(src1),
9613 OpcS, OpcP, PopFPU, 9591 OpcS, OpcP, PopFPU,
9614 CmpF_Result(dst)); 9592 CmpF_Result(dst));
9615 ins_pipe( pipe_slow ); 9593 ins_pipe( pipe_slow );
9616 %} 9594 %}
9617 9595
9618 // Compare into -1,0,1 9596 // Compare into -1,0,1
9619 instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{ 9597 instruct cmpDPR_reg(eRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9620 predicate(UseSSE<=1); 9598 predicate(UseSSE<=1);
9621 match(Set dst (CmpD3 src1 src2)); 9599 match(Set dst (CmpD3 src1 src2));
9622 effect(KILL cr, KILL rax); 9600 effect(KILL cr, KILL rax);
9623 ins_cost(300); 9601 ins_cost(300);
9624 format %{ "FCMPD $dst,$src1,$src2" %} 9602 format %{ "FCMPD $dst,$src1,$src2" %}
9625 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9603 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9626 ins_encode( Push_Reg_D(src1), 9604 ins_encode( Push_Reg_DPR(src1),
9627 OpcP, RegOpc(src2), 9605 OpcP, RegOpc(src2),
9628 CmpF_Result(dst)); 9606 CmpF_Result(dst));
9629 ins_pipe( pipe_slow ); 9607 ins_pipe( pipe_slow );
9630 %} 9608 %}
9631 9609
9632 // float compare and set condition codes in EFLAGS by XMM regs 9610 // float compare and set condition codes in EFLAGS by XMM regs
9633 instruct cmpXD_cc(eFlagsRegU cr, regXD src1, regXD src2) %{ 9611 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9634 predicate(UseSSE>=2); 9612 predicate(UseSSE>=2);
9635 match(Set cr (CmpD src1 src2)); 9613 match(Set cr (CmpD src1 src2));
9636 ins_cost(145); 9614 ins_cost(145);
9637 format %{ "UCOMISD $src1,$src2\n\t" 9615 format %{ "UCOMISD $src1,$src2\n\t"
9638 "JNP,s exit\n\t" 9616 "JNP,s exit\n\t"
9645 emit_cmpfp_fixup(_masm); 9623 emit_cmpfp_fixup(_masm);
9646 %} 9624 %}
9647 ins_pipe( pipe_slow ); 9625 ins_pipe( pipe_slow );
9648 %} 9626 %}
9649 9627
9650 instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD src1, regXD src2) %{ 9628 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9651 predicate(UseSSE>=2); 9629 predicate(UseSSE>=2);
9652 match(Set cr (CmpD src1 src2)); 9630 match(Set cr (CmpD src1 src2));
9653 ins_cost(100); 9631 ins_cost(100);
9654 format %{ "UCOMISD $src1,$src2" %} 9632 format %{ "UCOMISD $src1,$src2" %}
9655 ins_encode %{ 9633 ins_encode %{
9657 %} 9635 %}
9658 ins_pipe( pipe_slow ); 9636 ins_pipe( pipe_slow );
9659 %} 9637 %}
9660 9638
9661 // float compare and set condition codes in EFLAGS by XMM regs 9639 // float compare and set condition codes in EFLAGS by XMM regs
9662 instruct cmpXD_ccmem(eFlagsRegU cr, regXD src1, memory src2) %{ 9640 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9663 predicate(UseSSE>=2); 9641 predicate(UseSSE>=2);
9664 match(Set cr (CmpD src1 (LoadD src2))); 9642 match(Set cr (CmpD src1 (LoadD src2)));
9665 ins_cost(145); 9643 ins_cost(145);
9666 format %{ "UCOMISD $src1,$src2\n\t" 9644 format %{ "UCOMISD $src1,$src2\n\t"
9667 "JNP,s exit\n\t" 9645 "JNP,s exit\n\t"
9674 emit_cmpfp_fixup(_masm); 9652 emit_cmpfp_fixup(_masm);
9675 %} 9653 %}
9676 ins_pipe( pipe_slow ); 9654 ins_pipe( pipe_slow );
9677 %} 9655 %}
9678 9656
9679 instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD src1, memory src2) %{ 9657 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9680 predicate(UseSSE>=2); 9658 predicate(UseSSE>=2);
9681 match(Set cr (CmpD src1 (LoadD src2))); 9659 match(Set cr (CmpD src1 (LoadD src2)));
9682 ins_cost(100); 9660 ins_cost(100);
9683 format %{ "UCOMISD $src1,$src2" %} 9661 format %{ "UCOMISD $src1,$src2" %}
9684 ins_encode %{ 9662 ins_encode %{
9686 %} 9664 %}
9687 ins_pipe( pipe_slow ); 9665 ins_pipe( pipe_slow );
9688 %} 9666 %}
9689 9667
9690 // Compare into -1,0,1 in XMM 9668 // Compare into -1,0,1 in XMM
9691 instruct cmpXD_reg(xRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{ 9669 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9692 predicate(UseSSE>=2); 9670 predicate(UseSSE>=2);
9693 match(Set dst (CmpD3 src1 src2)); 9671 match(Set dst (CmpD3 src1 src2));
9694 effect(KILL cr); 9672 effect(KILL cr);
9695 ins_cost(255); 9673 ins_cost(255);
9696 format %{ "UCOMISD $src1, $src2\n\t" 9674 format %{ "UCOMISD $src1, $src2\n\t"
9706 %} 9684 %}
9707 ins_pipe( pipe_slow ); 9685 ins_pipe( pipe_slow );
9708 %} 9686 %}
9709 9687
9710 // Compare into -1,0,1 in XMM and memory 9688 // Compare into -1,0,1 in XMM and memory
9711 instruct cmpXD_regmem(xRegI dst, regXD src1, memory src2, eFlagsReg cr) %{ 9689 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9712 predicate(UseSSE>=2); 9690 predicate(UseSSE>=2);
9713 match(Set dst (CmpD3 src1 (LoadD src2))); 9691 match(Set dst (CmpD3 src1 (LoadD src2)));
9714 effect(KILL cr); 9692 effect(KILL cr);
9715 ins_cost(275); 9693 ins_cost(275);
9716 format %{ "UCOMISD $src1, $src2\n\t" 9694 format %{ "UCOMISD $src1, $src2\n\t"
9726 %} 9704 %}
9727 ins_pipe( pipe_slow ); 9705 ins_pipe( pipe_slow );
9728 %} 9706 %}
9729 9707
9730 9708
9731 instruct subD_reg(regD dst, regD src) %{ 9709 instruct subDPR_reg(regDPR dst, regDPR src) %{
9732 predicate (UseSSE <=1); 9710 predicate (UseSSE <=1);
9733 match(Set dst (SubD dst src)); 9711 match(Set dst (SubD dst src));
9734 9712
9735 format %{ "FLD $src\n\t" 9713 format %{ "FLD $src\n\t"
9736 "DSUBp $dst,ST" %} 9714 "DSUBp $dst,ST" %}
9737 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9715 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
9738 ins_cost(150); 9716 ins_cost(150);
9739 ins_encode( Push_Reg_D(src), 9717 ins_encode( Push_Reg_DPR(src),
9740 OpcP, RegOpc(dst) ); 9718 OpcP, RegOpc(dst) );
9741 ins_pipe( fpu_reg_reg ); 9719 ins_pipe( fpu_reg_reg );
9742 %} 9720 %}
9743 9721
9744 instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{ 9722 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9745 predicate (UseSSE <=1); 9723 predicate (UseSSE <=1);
9746 match(Set dst (RoundDouble (SubD src1 src2))); 9724 match(Set dst (RoundDouble (SubD src1 src2)));
9747 ins_cost(250); 9725 ins_cost(250);
9748 9726
9749 format %{ "FLD $src2\n\t" 9727 format %{ "FLD $src2\n\t"
9750 "DSUB ST,$src1\n\t" 9728 "DSUB ST,$src1\n\t"
9751 "FSTP_D $dst\t# D-round" %} 9729 "FSTP_D $dst\t# D-round" %}
9752 opcode(0xD8, 0x5); 9730 opcode(0xD8, 0x5);
9753 ins_encode( Push_Reg_D(src2), 9731 ins_encode( Push_Reg_DPR(src2),
9754 OpcP, RegOpc(src1), Pop_Mem_D(dst) ); 9732 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9755 ins_pipe( fpu_mem_reg_reg ); 9733 ins_pipe( fpu_mem_reg_reg );
9756 %} 9734 %}
9757 9735
9758 9736
9759 instruct subD_reg_mem(regD dst, memory src) %{ 9737 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9760 predicate (UseSSE <=1); 9738 predicate (UseSSE <=1);
9761 match(Set dst (SubD dst (LoadD src))); 9739 match(Set dst (SubD dst (LoadD src)));
9762 ins_cost(150); 9740 ins_cost(150);
9763 9741
9764 format %{ "FLD $src\n\t" 9742 format %{ "FLD $src\n\t"
9767 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9745 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9768 OpcP, RegOpc(dst) ); 9746 OpcP, RegOpc(dst) );
9769 ins_pipe( fpu_reg_mem ); 9747 ins_pipe( fpu_reg_mem );
9770 %} 9748 %}
9771 9749
9772 instruct absD_reg(regDPR1 dst, regDPR1 src) %{ 9750 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9773 predicate (UseSSE<=1); 9751 predicate (UseSSE<=1);
9774 match(Set dst (AbsD src)); 9752 match(Set dst (AbsD src));
9775 ins_cost(100); 9753 ins_cost(100);
9776 format %{ "FABS" %} 9754 format %{ "FABS" %}
9777 opcode(0xE1, 0xD9); 9755 opcode(0xE1, 0xD9);
9778 ins_encode( OpcS, OpcP ); 9756 ins_encode( OpcS, OpcP );
9779 ins_pipe( fpu_reg_reg ); 9757 ins_pipe( fpu_reg_reg );
9780 %} 9758 %}
9781 9759
9782 instruct absXD_reg( regXD dst ) %{ 9760 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9783 predicate(UseSSE>=2);
9784 match(Set dst (AbsD dst));
9785 ins_cost(150);
9786 format %{ "ANDPD $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
9787 ins_encode %{
9788 __ andpd($dst$$XMMRegister,
9789 ExternalAddress((address)double_signmask_pool));
9790 %}
9791 ins_pipe( pipe_slow );
9792 %}
9793
9794 instruct negD_reg(regDPR1 dst, regDPR1 src) %{
9795 predicate(UseSSE<=1); 9761 predicate(UseSSE<=1);
9796 match(Set dst (NegD src)); 9762 match(Set dst (NegD src));
9797 ins_cost(100); 9763 ins_cost(100);
9798 format %{ "FCHS" %} 9764 format %{ "FCHS" %}
9799 opcode(0xE0, 0xD9); 9765 opcode(0xE0, 0xD9);
9800 ins_encode( OpcS, OpcP ); 9766 ins_encode( OpcS, OpcP );
9801 ins_pipe( fpu_reg_reg ); 9767 ins_pipe( fpu_reg_reg );
9802 %} 9768 %}
9803 9769
9804 instruct negXD_reg( regXD dst ) %{ 9770 instruct addDPR_reg(regDPR dst, regDPR src) %{
9805 predicate(UseSSE>=2);
9806 match(Set dst (NegD dst));
9807 ins_cost(150);
9808 format %{ "XORPD $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
9809 ins_encode %{
9810 __ xorpd($dst$$XMMRegister,
9811 ExternalAddress((address)double_signflip_pool));
9812 %}
9813 ins_pipe( pipe_slow );
9814 %}
9815
9816 instruct addD_reg(regD dst, regD src) %{
9817 predicate(UseSSE<=1); 9771 predicate(UseSSE<=1);
9818 match(Set dst (AddD dst src)); 9772 match(Set dst (AddD dst src));
9819 format %{ "FLD $src\n\t" 9773 format %{ "FLD $src\n\t"
9820 "DADD $dst,ST" %} 9774 "DADD $dst,ST" %}
9821 size(4); 9775 size(4);
9822 ins_cost(150); 9776 ins_cost(150);
9823 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9777 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9824 ins_encode( Push_Reg_D(src), 9778 ins_encode( Push_Reg_DPR(src),
9825 OpcP, RegOpc(dst) ); 9779 OpcP, RegOpc(dst) );
9826 ins_pipe( fpu_reg_reg ); 9780 ins_pipe( fpu_reg_reg );
9827 %} 9781 %}
9828 9782
9829 9783
9830 instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{ 9784 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9831 predicate(UseSSE<=1); 9785 predicate(UseSSE<=1);
9832 match(Set dst (RoundDouble (AddD src1 src2))); 9786 match(Set dst (RoundDouble (AddD src1 src2)));
9833 ins_cost(250); 9787 ins_cost(250);
9834 9788
9835 format %{ "FLD $src2\n\t" 9789 format %{ "FLD $src2\n\t"
9836 "DADD ST,$src1\n\t" 9790 "DADD ST,$src1\n\t"
9837 "FSTP_D $dst\t# D-round" %} 9791 "FSTP_D $dst\t# D-round" %}
9838 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9792 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9839 ins_encode( Push_Reg_D(src2), 9793 ins_encode( Push_Reg_DPR(src2),
9840 OpcP, RegOpc(src1), Pop_Mem_D(dst) ); 9794 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9841 ins_pipe( fpu_mem_reg_reg ); 9795 ins_pipe( fpu_mem_reg_reg );
9842 %} 9796 %}
9843 9797
9844 9798
9845 instruct addD_reg_mem(regD dst, memory src) %{ 9799 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9846 predicate(UseSSE<=1); 9800 predicate(UseSSE<=1);
9847 match(Set dst (AddD dst (LoadD src))); 9801 match(Set dst (AddD dst (LoadD src)));
9848 ins_cost(150); 9802 ins_cost(150);
9849 9803
9850 format %{ "FLD $src\n\t" 9804 format %{ "FLD $src\n\t"
9854 OpcP, RegOpc(dst) ); 9808 OpcP, RegOpc(dst) );
9855 ins_pipe( fpu_reg_mem ); 9809 ins_pipe( fpu_reg_mem );
9856 %} 9810 %}
9857 9811
9858 // add-to-memory 9812 // add-to-memory
9859 instruct addD_mem_reg(memory dst, regD src) %{ 9813 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9860 predicate(UseSSE<=1); 9814 predicate(UseSSE<=1);
9861 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9815 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9862 ins_cost(150); 9816 ins_cost(150);
9863 9817
9864 format %{ "FLD_D $dst\n\t" 9818 format %{ "FLD_D $dst\n\t"
9870 set_instruction_start, 9824 set_instruction_start,
9871 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9825 Opcode(0xDD), RMopc_Mem(0x03,dst) );
9872 ins_pipe( fpu_reg_mem ); 9826 ins_pipe( fpu_reg_mem );
9873 %} 9827 %}
9874 9828
9875 instruct addD_reg_imm1(regD dst, immD1 con) %{ 9829 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9876 predicate(UseSSE<=1); 9830 predicate(UseSSE<=1);
9877 match(Set dst (AddD dst con)); 9831 match(Set dst (AddD dst con));
9878 ins_cost(125); 9832 ins_cost(125);
9879 format %{ "FLD1\n\t" 9833 format %{ "FLD1\n\t"
9880 "DADDp $dst,ST" %} 9834 "DADDp $dst,ST" %}
9883 __ faddp($dst$$reg); 9837 __ faddp($dst$$reg);
9884 %} 9838 %}
9885 ins_pipe(fpu_reg); 9839 ins_pipe(fpu_reg);
9886 %} 9840 %}
9887 9841
9888 instruct addD_reg_imm(regD dst, immD con) %{ 9842 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9889 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9843 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9890 match(Set dst (AddD dst con)); 9844 match(Set dst (AddD dst con));
9891 ins_cost(200); 9845 ins_cost(200);
9892 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9846 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9893 "DADDp $dst,ST" %} 9847 "DADDp $dst,ST" %}
9896 __ faddp($dst$$reg); 9850 __ faddp($dst$$reg);
9897 %} 9851 %}
9898 ins_pipe(fpu_reg_mem); 9852 ins_pipe(fpu_reg_mem);
9899 %} 9853 %}
9900 9854
9901 instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{ 9855 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9902 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9856 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9903 match(Set dst (RoundDouble (AddD src con))); 9857 match(Set dst (RoundDouble (AddD src con)));
9904 ins_cost(200); 9858 ins_cost(200);
9905 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9859 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9906 "DADD ST,$src\n\t" 9860 "DADD ST,$src\n\t"
9911 __ fstp_d(Address(rsp, $dst$$disp)); 9865 __ fstp_d(Address(rsp, $dst$$disp));
9912 %} 9866 %}
9913 ins_pipe(fpu_mem_reg_con); 9867 ins_pipe(fpu_mem_reg_con);
9914 %} 9868 %}
9915 9869
9916 // Add two double precision floating point values in xmm 9870 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9917 instruct addXD_reg(regXD dst, regXD src) %{
9918 predicate(UseSSE>=2);
9919 match(Set dst (AddD dst src));
9920 format %{ "ADDSD $dst,$src" %}
9921 ins_encode %{
9922 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
9923 %}
9924 ins_pipe( pipe_slow );
9925 %}
9926
9927 instruct addXD_imm(regXD dst, immXD con) %{
9928 predicate(UseSSE>=2);
9929 match(Set dst (AddD dst con));
9930 format %{ "ADDSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
9931 ins_encode %{
9932 __ addsd($dst$$XMMRegister, $constantaddress($con));
9933 %}
9934 ins_pipe(pipe_slow);
9935 %}
9936
9937 instruct addXD_mem(regXD dst, memory mem) %{
9938 predicate(UseSSE>=2);
9939 match(Set dst (AddD dst (LoadD mem)));
9940 format %{ "ADDSD $dst,$mem" %}
9941 ins_encode %{
9942 __ addsd($dst$$XMMRegister, $mem$$Address);
9943 %}
9944 ins_pipe( pipe_slow );
9945 %}
9946
9947 // Sub two double precision floating point values in xmm
9948 instruct subXD_reg(regXD dst, regXD src) %{
9949 predicate(UseSSE>=2);
9950 match(Set dst (SubD dst src));
9951 ins_cost(150);
9952 format %{ "SUBSD $dst,$src" %}
9953 ins_encode %{
9954 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
9955 %}
9956 ins_pipe( pipe_slow );
9957 %}
9958
9959 instruct subXD_imm(regXD dst, immXD con) %{
9960 predicate(UseSSE>=2);
9961 match(Set dst (SubD dst con));
9962 ins_cost(150);
9963 format %{ "SUBSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
9964 ins_encode %{
9965 __ subsd($dst$$XMMRegister, $constantaddress($con));
9966 %}
9967 ins_pipe(pipe_slow);
9968 %}
9969
9970 instruct subXD_mem(regXD dst, memory mem) %{
9971 predicate(UseSSE>=2);
9972 match(Set dst (SubD dst (LoadD mem)));
9973 ins_cost(150);
9974 format %{ "SUBSD $dst,$mem" %}
9975 ins_encode %{
9976 __ subsd($dst$$XMMRegister, $mem$$Address);
9977 %}
9978 ins_pipe( pipe_slow );
9979 %}
9980
9981 // Mul two double precision floating point values in xmm
9982 instruct mulXD_reg(regXD dst, regXD src) %{
9983 predicate(UseSSE>=2);
9984 match(Set dst (MulD dst src));
9985 format %{ "MULSD $dst,$src" %}
9986 ins_encode %{
9987 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
9988 %}
9989 ins_pipe( pipe_slow );
9990 %}
9991
9992 instruct mulXD_imm(regXD dst, immXD con) %{
9993 predicate(UseSSE>=2);
9994 match(Set dst (MulD dst con));
9995 format %{ "MULSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
9996 ins_encode %{
9997 __ mulsd($dst$$XMMRegister, $constantaddress($con));
9998 %}
9999 ins_pipe(pipe_slow);
10000 %}
10001
10002 instruct mulXD_mem(regXD dst, memory mem) %{
10003 predicate(UseSSE>=2);
10004 match(Set dst (MulD dst (LoadD mem)));
10005 format %{ "MULSD $dst,$mem" %}
10006 ins_encode %{
10007 __ mulsd($dst$$XMMRegister, $mem$$Address);
10008 %}
10009 ins_pipe( pipe_slow );
10010 %}
10011
10012 // Div two double precision floating point values in xmm
10013 instruct divXD_reg(regXD dst, regXD src) %{
10014 predicate(UseSSE>=2);
10015 match(Set dst (DivD dst src));
10016 format %{ "DIVSD $dst,$src" %}
10017 opcode(0xF2, 0x0F, 0x5E);
10018 ins_encode %{
10019 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
10020 %}
10021 ins_pipe( pipe_slow );
10022 %}
10023
10024 instruct divXD_imm(regXD dst, immXD con) %{
10025 predicate(UseSSE>=2);
10026 match(Set dst (DivD dst con));
10027 format %{ "DIVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10028 ins_encode %{
10029 __ divsd($dst$$XMMRegister, $constantaddress($con));
10030 %}
10031 ins_pipe(pipe_slow);
10032 %}
10033
10034 instruct divXD_mem(regXD dst, memory mem) %{
10035 predicate(UseSSE>=2);
10036 match(Set dst (DivD dst (LoadD mem)));
10037 format %{ "DIVSD $dst,$mem" %}
10038 ins_encode %{
10039 __ divsd($dst$$XMMRegister, $mem$$Address);
10040 %}
10041 ins_pipe( pipe_slow );
10042 %}
10043
10044
10045 instruct mulD_reg(regD dst, regD src) %{
10046 predicate(UseSSE<=1); 9871 predicate(UseSSE<=1);
10047 match(Set dst (MulD dst src)); 9872 match(Set dst (MulD dst src));
10048 format %{ "FLD $src\n\t" 9873 format %{ "FLD $src\n\t"
10049 "DMULp $dst,ST" %} 9874 "DMULp $dst,ST" %}
10050 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9875 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10051 ins_cost(150); 9876 ins_cost(150);
10052 ins_encode( Push_Reg_D(src), 9877 ins_encode( Push_Reg_DPR(src),
10053 OpcP, RegOpc(dst) ); 9878 OpcP, RegOpc(dst) );
10054 ins_pipe( fpu_reg_reg ); 9879 ins_pipe( fpu_reg_reg );
10055 %} 9880 %}
10056 9881
10057 // Strict FP instruction biases argument before multiply then 9882 // Strict FP instruction biases argument before multiply then
10060 // scale arg1 by multiplying arg1 by 2^(-15360) 9885 // scale arg1 by multiplying arg1 by 2^(-15360)
10061 // load arg2 9886 // load arg2
10062 // multiply scaled arg1 by arg2 9887 // multiply scaled arg1 by arg2
10063 // rescale product by 2^(15360) 9888 // rescale product by 2^(15360)
10064 // 9889 //
10065 instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{ 9890 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
10066 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9891 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10067 match(Set dst (MulD dst src)); 9892 match(Set dst (MulD dst src));
10068 ins_cost(1); // Select this instruction for all strict FP double multiplies 9893 ins_cost(1); // Select this instruction for all strict FP double multiplies
10069 9894
10070 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9895 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t"
10073 "DMULp $dst,ST\n\t" 9898 "DMULp $dst,ST\n\t"
10074 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9899 "FLD StubRoutines::_fpu_subnormal_bias2\n\t"
10075 "DMULp $dst,ST\n\t" %} 9900 "DMULp $dst,ST\n\t" %}
10076 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9901 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10077 ins_encode( strictfp_bias1(dst), 9902 ins_encode( strictfp_bias1(dst),
10078 Push_Reg_D(src), 9903 Push_Reg_DPR(src),
10079 OpcP, RegOpc(dst), 9904 OpcP, RegOpc(dst),
10080 strictfp_bias2(dst) ); 9905 strictfp_bias2(dst) );
10081 ins_pipe( fpu_reg_reg ); 9906 ins_pipe( fpu_reg_reg );
10082 %} 9907 %}
10083 9908
10084 instruct mulD_reg_imm(regD dst, immD con) %{ 9909 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
10085 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9910 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
10086 match(Set dst (MulD dst con)); 9911 match(Set dst (MulD dst con));
10087 ins_cost(200); 9912 ins_cost(200);
10088 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9913 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
10089 "DMULp $dst,ST" %} 9914 "DMULp $dst,ST" %}
10093 %} 9918 %}
10094 ins_pipe(fpu_reg_mem); 9919 ins_pipe(fpu_reg_mem);
10095 %} 9920 %}
10096 9921
10097 9922
10098 instruct mulD_reg_mem(regD dst, memory src) %{ 9923 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
10099 predicate( UseSSE<=1 ); 9924 predicate( UseSSE<=1 );
10100 match(Set dst (MulD dst (LoadD src))); 9925 match(Set dst (MulD dst (LoadD src)));
10101 ins_cost(200); 9926 ins_cost(200);
10102 format %{ "FLD_D $src\n\t" 9927 format %{ "FLD_D $src\n\t"
10103 "DMULp $dst,ST" %} 9928 "DMULp $dst,ST" %}
10107 ins_pipe( fpu_reg_mem ); 9932 ins_pipe( fpu_reg_mem );
10108 %} 9933 %}
10109 9934
10110 // 9935 //
10111 // Cisc-alternate to reg-reg multiply 9936 // Cisc-alternate to reg-reg multiply
10112 instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{ 9937 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
10113 predicate( UseSSE<=1 ); 9938 predicate( UseSSE<=1 );
10114 match(Set dst (MulD src (LoadD mem))); 9939 match(Set dst (MulD src (LoadD mem)));
10115 ins_cost(250); 9940 ins_cost(250);
10116 format %{ "FLD_D $mem\n\t" 9941 format %{ "FLD_D $mem\n\t"
10117 "DMUL ST,$src\n\t" 9942 "DMUL ST,$src\n\t"
10118 "FSTP_D $dst" %} 9943 "FSTP_D $dst" %}
10119 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9944 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */
10120 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9945 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
10121 OpcReg_F(src), 9946 OpcReg_FPR(src),
10122 Pop_Reg_D(dst) ); 9947 Pop_Reg_DPR(dst) );
10123 ins_pipe( fpu_reg_reg_mem ); 9948 ins_pipe( fpu_reg_reg_mem );
10124 %} 9949 %}
10125 9950
10126 9951
10127 // MACRO3 -- addD a mulD 9952 // MACRO3 -- addDPR a mulDPR
10128 // This instruction is a '2-address' instruction in that the result goes 9953 // This instruction is a '2-address' instruction in that the result goes
10129 // back to src2. This eliminates a move from the macro; possibly the 9954 // back to src2. This eliminates a move from the macro; possibly the
10130 // register allocator will have to add it back (and maybe not). 9955 // register allocator will have to add it back (and maybe not).
10131 instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{ 9956 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
10132 predicate( UseSSE<=1 ); 9957 predicate( UseSSE<=1 );
10133 match(Set src2 (AddD (MulD src0 src1) src2)); 9958 match(Set src2 (AddD (MulD src0 src1) src2));
10134 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9959 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
10135 "DMUL ST,$src1\n\t" 9960 "DMUL ST,$src1\n\t"
10136 "DADDp $src2,ST" %} 9961 "DADDp $src2,ST" %}
10137 ins_cost(250); 9962 ins_cost(250);
10138 opcode(0xDD); /* LoadD DD /0 */ 9963 opcode(0xDD); /* LoadD DD /0 */
10139 ins_encode( Push_Reg_F(src0), 9964 ins_encode( Push_Reg_FPR(src0),
10140 FMul_ST_reg(src1), 9965 FMul_ST_reg(src1),
10141 FAddP_reg_ST(src2) ); 9966 FAddP_reg_ST(src2) );
10142 ins_pipe( fpu_reg_reg_reg ); 9967 ins_pipe( fpu_reg_reg_reg );
10143 %} 9968 %}
10144 9969
10145 9970
10146 // MACRO3 -- subD a mulD 9971 // MACRO3 -- subDPR a mulDPR
10147 instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{ 9972 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
10148 predicate( UseSSE<=1 ); 9973 predicate( UseSSE<=1 );
10149 match(Set src2 (SubD (MulD src0 src1) src2)); 9974 match(Set src2 (SubD (MulD src0 src1) src2));
10150 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9975 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
10151 "DMUL ST,$src1\n\t" 9976 "DMUL ST,$src1\n\t"
10152 "DSUBRp $src2,ST" %} 9977 "DSUBRp $src2,ST" %}
10153 ins_cost(250); 9978 ins_cost(250);
10154 ins_encode( Push_Reg_F(src0), 9979 ins_encode( Push_Reg_FPR(src0),
10155 FMul_ST_reg(src1), 9980 FMul_ST_reg(src1),
10156 Opcode(0xDE), Opc_plus(0xE0,src2)); 9981 Opcode(0xDE), Opc_plus(0xE0,src2));
10157 ins_pipe( fpu_reg_reg_reg ); 9982 ins_pipe( fpu_reg_reg_reg );
10158 %} 9983 %}
10159 9984
10160 9985
10161 instruct divD_reg(regD dst, regD src) %{ 9986 instruct divDPR_reg(regDPR dst, regDPR src) %{
10162 predicate( UseSSE<=1 ); 9987 predicate( UseSSE<=1 );
10163 match(Set dst (DivD dst src)); 9988 match(Set dst (DivD dst src));
10164 9989
10165 format %{ "FLD $src\n\t" 9990 format %{ "FLD $src\n\t"
10166 "FDIVp $dst,ST" %} 9991 "FDIVp $dst,ST" %}
10167 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9992 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10168 ins_cost(150); 9993 ins_cost(150);
10169 ins_encode( Push_Reg_D(src), 9994 ins_encode( Push_Reg_DPR(src),
10170 OpcP, RegOpc(dst) ); 9995 OpcP, RegOpc(dst) );
10171 ins_pipe( fpu_reg_reg ); 9996 ins_pipe( fpu_reg_reg );
10172 %} 9997 %}
10173 9998
10174 // Strict FP instruction biases argument before division then 9999 // Strict FP instruction biases argument before division then
10177 // scale dividend by multiplying dividend by 2^(-15360) 10002 // scale dividend by multiplying dividend by 2^(-15360)
10178 // load divisor 10003 // load divisor
10179 // divide scaled dividend by divisor 10004 // divide scaled dividend by divisor
10180 // rescale quotient by 2^(15360) 10005 // rescale quotient by 2^(15360)
10181 // 10006 //
10182 instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{ 10007 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
10183 predicate (UseSSE<=1); 10008 predicate (UseSSE<=1);
10184 match(Set dst (DivD dst src)); 10009 match(Set dst (DivD dst src));
10185 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 10010 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10186 ins_cost(01); 10011 ins_cost(01);
10187 10012
10191 "FDIVp $dst,ST\n\t" 10016 "FDIVp $dst,ST\n\t"
10192 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 10017 "FLD StubRoutines::_fpu_subnormal_bias2\n\t"
10193 "DMULp $dst,ST\n\t" %} 10018 "DMULp $dst,ST\n\t" %}
10194 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10019 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10195 ins_encode( strictfp_bias1(dst), 10020 ins_encode( strictfp_bias1(dst),
10196 Push_Reg_D(src), 10021 Push_Reg_DPR(src),
10197 OpcP, RegOpc(dst), 10022 OpcP, RegOpc(dst),
10198 strictfp_bias2(dst) ); 10023 strictfp_bias2(dst) );
10199 ins_pipe( fpu_reg_reg ); 10024 ins_pipe( fpu_reg_reg );
10200 %} 10025 %}
10201 10026
10202 instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{ 10027 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
10203 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 10028 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
10204 match(Set dst (RoundDouble (DivD src1 src2))); 10029 match(Set dst (RoundDouble (DivD src1 src2)));
10205 10030
10206 format %{ "FLD $src1\n\t" 10031 format %{ "FLD $src1\n\t"
10207 "FDIV ST,$src2\n\t" 10032 "FDIV ST,$src2\n\t"
10208 "FSTP_D $dst\t# D-round" %} 10033 "FSTP_D $dst\t# D-round" %}
10209 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 10034 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
10210 ins_encode( Push_Reg_D(src1), 10035 ins_encode( Push_Reg_DPR(src1),
10211 OpcP, RegOpc(src2), Pop_Mem_D(dst) ); 10036 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
10212 ins_pipe( fpu_mem_reg_reg ); 10037 ins_pipe( fpu_mem_reg_reg );
10213 %} 10038 %}
10214 10039
10215 10040
10216 instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{ 10041 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
10217 predicate(UseSSE<=1); 10042 predicate(UseSSE<=1);
10218 match(Set dst (ModD dst src)); 10043 match(Set dst (ModD dst src));
10219 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS 10044 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10220 10045
10221 format %{ "DMOD $dst,$src" %} 10046 format %{ "DMOD $dst,$src" %}
10222 ins_cost(250); 10047 ins_cost(250);
10223 ins_encode(Push_Reg_Mod_D(dst, src), 10048 ins_encode(Push_Reg_Mod_DPR(dst, src),
10224 emitModD(), 10049 emitModDPR(),
10225 Push_Result_Mod_D(src), 10050 Push_Result_Mod_DPR(src),
10226 Pop_Reg_D(dst)); 10051 Pop_Reg_DPR(dst));
10227 ins_pipe( pipe_slow ); 10052 ins_pipe( pipe_slow );
10228 %} 10053 %}
10229 10054
10230 instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{ 10055 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
10231 predicate(UseSSE>=2); 10056 predicate(UseSSE>=2);
10232 match(Set dst (ModD src0 src1)); 10057 match(Set dst (ModD src0 src1));
10233 effect(KILL rax, KILL cr); 10058 effect(KILL rax, KILL cr);
10234 10059
10235 format %{ "SUB ESP,8\t # DMOD\n" 10060 format %{ "SUB ESP,8\t # DMOD\n"
10246 "\tMOVSD $dst,[ESP+0]\n" 10071 "\tMOVSD $dst,[ESP+0]\n"
10247 "\tADD ESP,8\n" 10072 "\tADD ESP,8\n"
10248 "\tFSTP ST0\t # Restore FPU Stack" 10073 "\tFSTP ST0\t # Restore FPU Stack"
10249 %} 10074 %}
10250 ins_cost(250); 10075 ins_cost(250);
10251 ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU); 10076 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
10252 ins_pipe( pipe_slow ); 10077 ins_pipe( pipe_slow );
10253 %} 10078 %}
10254 10079
10255 instruct sinD_reg(regDPR1 dst, regDPR1 src) %{ 10080 instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{
10256 predicate (UseSSE<=1); 10081 predicate (UseSSE<=1);
10257 match(Set dst (SinD src)); 10082 match(Set dst (SinD src));
10258 ins_cost(1800); 10083 ins_cost(1800);
10259 format %{ "DSIN $dst" %} 10084 format %{ "DSIN $dst" %}
10260 opcode(0xD9, 0xFE); 10085 opcode(0xD9, 0xFE);
10261 ins_encode( OpcP, OpcS ); 10086 ins_encode( OpcP, OpcS );
10262 ins_pipe( pipe_slow ); 10087 ins_pipe( pipe_slow );
10263 %} 10088 %}
10264 10089
10265 instruct sinXD_reg(regXD dst, eFlagsReg cr) %{ 10090 instruct sinD_reg(regD dst, eFlagsReg cr) %{
10266 predicate (UseSSE>=2); 10091 predicate (UseSSE>=2);
10267 match(Set dst (SinD dst)); 10092 match(Set dst (SinD dst));
10268 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 10093 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10269 ins_cost(1800); 10094 ins_cost(1800);
10270 format %{ "DSIN $dst" %} 10095 format %{ "DSIN $dst" %}
10271 opcode(0xD9, 0xFE); 10096 opcode(0xD9, 0xFE);
10272 ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) ); 10097 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
10273 ins_pipe( pipe_slow ); 10098 ins_pipe( pipe_slow );
10274 %} 10099 %}
10275 10100
10276 instruct cosD_reg(regDPR1 dst, regDPR1 src) %{ 10101 instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{
10277 predicate (UseSSE<=1); 10102 predicate (UseSSE<=1);
10278 match(Set dst (CosD src)); 10103 match(Set dst (CosD src));
10279 ins_cost(1800); 10104 ins_cost(1800);
10280 format %{ "DCOS $dst" %} 10105 format %{ "DCOS $dst" %}
10281 opcode(0xD9, 0xFF); 10106 opcode(0xD9, 0xFF);
10282 ins_encode( OpcP, OpcS ); 10107 ins_encode( OpcP, OpcS );
10283 ins_pipe( pipe_slow ); 10108 ins_pipe( pipe_slow );
10284 %} 10109 %}
10285 10110
10286 instruct cosXD_reg(regXD dst, eFlagsReg cr) %{ 10111 instruct cosD_reg(regD dst, eFlagsReg cr) %{
10287 predicate (UseSSE>=2); 10112 predicate (UseSSE>=2);
10288 match(Set dst (CosD dst)); 10113 match(Set dst (CosD dst));
10289 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 10114 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10290 ins_cost(1800); 10115 ins_cost(1800);
10291 format %{ "DCOS $dst" %} 10116 format %{ "DCOS $dst" %}
10292 opcode(0xD9, 0xFF); 10117 opcode(0xD9, 0xFF);
10293 ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) ); 10118 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
10294 ins_pipe( pipe_slow ); 10119 ins_pipe( pipe_slow );
10295 %} 10120 %}
10296 10121
10297 instruct tanD_reg(regDPR1 dst, regDPR1 src) %{ 10122 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
10298 predicate (UseSSE<=1); 10123 predicate (UseSSE<=1);
10299 match(Set dst(TanD src)); 10124 match(Set dst(TanD src));
10300 format %{ "DTAN $dst" %} 10125 format %{ "DTAN $dst" %}
10301 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan 10126 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan
10302 Opcode(0xDD), Opcode(0xD8)); // fstp st 10127 Opcode(0xDD), Opcode(0xD8)); // fstp st
10303 ins_pipe( pipe_slow ); 10128 ins_pipe( pipe_slow );
10304 %} 10129 %}
10305 10130
10306 instruct tanXD_reg(regXD dst, eFlagsReg cr) %{ 10131 instruct tanD_reg(regD dst, eFlagsReg cr) %{
10307 predicate (UseSSE>=2); 10132 predicate (UseSSE>=2);
10308 match(Set dst(TanD dst)); 10133 match(Set dst(TanD dst));
10309 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 10134 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10310 format %{ "DTAN $dst" %} 10135 format %{ "DTAN $dst" %}
10311 ins_encode( Push_SrcXD(dst), 10136 ins_encode( Push_SrcD(dst),
10312 Opcode(0xD9), Opcode(0xF2), // fptan 10137 Opcode(0xD9), Opcode(0xF2), // fptan
10313 Opcode(0xDD), Opcode(0xD8), // fstp st 10138 Opcode(0xDD), Opcode(0xD8), // fstp st
10314 Push_ResultXD(dst) ); 10139 Push_ResultD(dst) );
10315 ins_pipe( pipe_slow ); 10140 ins_pipe( pipe_slow );
10316 %} 10141 %}
10317 10142
10318 instruct atanD_reg(regD dst, regD src) %{ 10143 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10319 predicate (UseSSE<=1); 10144 predicate (UseSSE<=1);
10320 match(Set dst(AtanD dst src)); 10145 match(Set dst(AtanD dst src));
10321 format %{ "DATA $dst,$src" %} 10146 format %{ "DATA $dst,$src" %}
10322 opcode(0xD9, 0xF3); 10147 opcode(0xD9, 0xF3);
10323 ins_encode( Push_Reg_D(src), 10148 ins_encode( Push_Reg_DPR(src),
10324 OpcP, OpcS, RegOpc(dst) ); 10149 OpcP, OpcS, RegOpc(dst) );
10325 ins_pipe( pipe_slow ); 10150 ins_pipe( pipe_slow );
10326 %} 10151 %}
10327 10152
10328 instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{ 10153 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10329 predicate (UseSSE>=2); 10154 predicate (UseSSE>=2);
10330 match(Set dst(AtanD dst src)); 10155 match(Set dst(AtanD dst src));
10331 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 10156 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10332 format %{ "DATA $dst,$src" %} 10157 format %{ "DATA $dst,$src" %}
10333 opcode(0xD9, 0xF3); 10158 opcode(0xD9, 0xF3);
10334 ins_encode( Push_SrcXD(src), 10159 ins_encode( Push_SrcD(src),
10335 OpcP, OpcS, Push_ResultXD(dst) ); 10160 OpcP, OpcS, Push_ResultD(dst) );
10336 ins_pipe( pipe_slow ); 10161 ins_pipe( pipe_slow );
10337 %} 10162 %}
10338 10163
10339 instruct sqrtD_reg(regD dst, regD src) %{ 10164 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10340 predicate (UseSSE<=1); 10165 predicate (UseSSE<=1);
10341 match(Set dst (SqrtD src)); 10166 match(Set dst (SqrtD src));
10342 format %{ "DSQRT $dst,$src" %} 10167 format %{ "DSQRT $dst,$src" %}
10343 opcode(0xFA, 0xD9); 10168 opcode(0xFA, 0xD9);
10344 ins_encode( Push_Reg_D(src), 10169 ins_encode( Push_Reg_DPR(src),
10345 OpcS, OpcP, Pop_Reg_D(dst) ); 10170 OpcS, OpcP, Pop_Reg_DPR(dst) );
10346 ins_pipe( pipe_slow ); 10171 ins_pipe( pipe_slow );
10347 %} 10172 %}
10348 10173
10349 instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 10174 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10350 predicate (UseSSE<=1); 10175 predicate (UseSSE<=1);
10351 match(Set Y (PowD X Y)); // Raise X to the Yth power 10176 match(Set Y (PowD X Y)); // Raise X to the Yth power
10352 effect(KILL rax, KILL rbx, KILL rcx); 10177 effect(KILL rax, KILL rbx, KILL rcx);
10353 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t" 10178 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t"
10354 "FLD_D $X\n\t" 10179 "FLD_D $X\n\t"
10373 "FMUL ST(0),[ESP+0]\t# Scale\n\t" 10198 "FMUL ST(0),[ESP+0]\t# Scale\n\t"
10374 10199
10375 "ADD ESP,8" 10200 "ADD ESP,8"
10376 %} 10201 %}
10377 ins_encode( push_stack_temp_qword, 10202 ins_encode( push_stack_temp_qword,
10378 Push_Reg_D(X), 10203 Push_Reg_DPR(X),
10379 Opcode(0xD9), Opcode(0xF1), // fyl2x 10204 Opcode(0xD9), Opcode(0xF1), // fyl2x
10380 pow_exp_core_encoding, 10205 pow_exp_core_encoding,
10381 pop_stack_temp_qword); 10206 pop_stack_temp_qword);
10382 ins_pipe( pipe_slow ); 10207 ins_pipe( pipe_slow );
10383 %} 10208 %}
10384 10209
10385 instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{ 10210 instruct powD_reg(regD dst, regD src0, regD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
10386 predicate (UseSSE>=2); 10211 predicate (UseSSE>=2);
10387 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power 10212 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power
10388 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx ); 10213 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx );
10389 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t" 10214 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t"
10390 "MOVSD [ESP],$src1\n\t" 10215 "MOVSD [ESP],$src1\n\t"
10418 ins_encode( push_stack_temp_qword, 10243 ins_encode( push_stack_temp_qword,
10419 push_xmm_to_fpr1(src1), 10244 push_xmm_to_fpr1(src1),
10420 push_xmm_to_fpr1(src0), 10245 push_xmm_to_fpr1(src0),
10421 Opcode(0xD9), Opcode(0xF1), // fyl2x 10246 Opcode(0xD9), Opcode(0xF1), // fyl2x
10422 pow_exp_core_encoding, 10247 pow_exp_core_encoding,
10423 Push_ResultXD(dst) ); 10248 Push_ResultD(dst) );
10424 ins_pipe( pipe_slow ); 10249 ins_pipe( pipe_slow );
10425 %} 10250 %}
10426 10251
10427 10252
10428 instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 10253 instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10429 predicate (UseSSE<=1); 10254 predicate (UseSSE<=1);
10430 match(Set dpr1 (ExpD dpr1)); 10255 match(Set dpr1 (ExpD dpr1));
10431 effect(KILL rax, KILL rbx, KILL rcx); 10256 effect(KILL rax, KILL rbx, KILL rcx);
10432 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding" 10257 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding"
10433 "FLDL2E \t\t\t# Ld log2(e) X\n\t" 10258 "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10459 pow_exp_core_encoding, 10284 pow_exp_core_encoding,
10460 pop_stack_temp_qword); 10285 pop_stack_temp_qword);
10461 ins_pipe( pipe_slow ); 10286 ins_pipe( pipe_slow );
10462 %} 10287 %}
10463 10288
10464 instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 10289 instruct expD_reg(regD dst, regD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10465 predicate (UseSSE>=2); 10290 predicate (UseSSE>=2);
10466 match(Set dst (ExpD src)); 10291 match(Set dst (ExpD src));
10467 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx); 10292 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx);
10468 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding\n\t" 10293 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding\n\t"
10469 "MOVSD [ESP],$src\n\t" 10294 "MOVSD [ESP],$src\n\t"
10490 10315
10491 "FST_D [ESP]\n\t" 10316 "FST_D [ESP]\n\t"
10492 "MOVSD $dst,[ESP]\n\t" 10317 "MOVSD $dst,[ESP]\n\t"
10493 "ADD ESP,8" 10318 "ADD ESP,8"
10494 %} 10319 %}
10495 ins_encode( Push_SrcXD(src), 10320 ins_encode( Push_SrcD(src),
10496 Opcode(0xD9), Opcode(0xEA), // fldl2e 10321 Opcode(0xD9), Opcode(0xEA), // fldl2e
10497 Opcode(0xDE), Opcode(0xC9), // fmulp 10322 Opcode(0xDE), Opcode(0xC9), // fmulp
10498 pow_exp_core_encoding, 10323 pow_exp_core_encoding,
10499 Push_ResultXD(dst) ); 10324 Push_ResultD(dst) );
10500 ins_pipe( pipe_slow ); 10325 ins_pipe( pipe_slow );
10501 %} 10326 %}
10502 10327
10503 10328
10504 10329
10505 instruct log10D_reg(regDPR1 dst, regDPR1 src) %{ 10330 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
10506 predicate (UseSSE<=1); 10331 predicate (UseSSE<=1);
10507 // The source Double operand on FPU stack 10332 // The source Double operand on FPU stack
10508 match(Set dst (Log10D src)); 10333 match(Set dst (Log10D src));
10509 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 10334 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
10510 // fxch ; swap ST(0) with ST(1) 10335 // fxch ; swap ST(0) with ST(1)
10518 Opcode(0xD9), Opcode(0xF1)); // fyl2x 10343 Opcode(0xD9), Opcode(0xF1)); // fyl2x
10519 10344
10520 ins_pipe( pipe_slow ); 10345 ins_pipe( pipe_slow );
10521 %} 10346 %}
10522 10347
10523 instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{ 10348 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
10524 predicate (UseSSE>=2); 10349 predicate (UseSSE>=2);
10525 effect(KILL cr); 10350 effect(KILL cr);
10526 match(Set dst (Log10D src)); 10351 match(Set dst (Log10D src));
10527 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 10352 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
10528 // fyl2x ; compute log_10(2) * log_2(x) 10353 // fyl2x ; compute log_10(2) * log_2(x)
10529 format %{ "FLDLG2 \t\t\t#Log10\n\t" 10354 format %{ "FLDLG2 \t\t\t#Log10\n\t"
10530 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 10355 "FYL2X \t\t\t# Q=Log10*Log_2(x)"
10531 %} 10356 %}
10532 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 10357 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
10533 Push_SrcXD(src), 10358 Push_SrcD(src),
10534 Opcode(0xD9), Opcode(0xF1), // fyl2x 10359 Opcode(0xD9), Opcode(0xF1), // fyl2x
10535 Push_ResultXD(dst)); 10360 Push_ResultD(dst));
10536 10361
10537 ins_pipe( pipe_slow ); 10362 ins_pipe( pipe_slow );
10538 %} 10363 %}
10539 10364
10540 instruct logD_reg(regDPR1 dst, regDPR1 src) %{ 10365 instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{
10541 predicate (UseSSE<=1); 10366 predicate (UseSSE<=1);
10542 // The source Double operand on FPU stack 10367 // The source Double operand on FPU stack
10543 match(Set dst (LogD src)); 10368 match(Set dst (LogD src));
10544 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number 10369 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number
10545 // fxch ; swap ST(0) with ST(1) 10370 // fxch ; swap ST(0) with ST(1)
10553 Opcode(0xD9), Opcode(0xF1)); // fyl2x 10378 Opcode(0xD9), Opcode(0xF1)); // fyl2x
10554 10379
10555 ins_pipe( pipe_slow ); 10380 ins_pipe( pipe_slow );
10556 %} 10381 %}
10557 10382
10558 instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{ 10383 instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{
10559 predicate (UseSSE>=2); 10384 predicate (UseSSE>=2);
10560 effect(KILL cr); 10385 effect(KILL cr);
10561 // The source and result Double operands in XMM registers 10386 // The source and result Double operands in XMM registers
10562 match(Set dst (LogD src)); 10387 match(Set dst (LogD src));
10563 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number 10388 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number
10564 // fyl2x ; compute log_e(2) * log_2(x) 10389 // fyl2x ; compute log_e(2) * log_2(x)
10565 format %{ "FLDLN2 \t\t\t#Log_e\n\t" 10390 format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10566 "FYL2X \t\t\t# Q=Log_e*Log_2(x)" 10391 "FYL2X \t\t\t# Q=Log_e*Log_2(x)"
10567 %} 10392 %}
10568 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 10393 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2
10569 Push_SrcXD(src), 10394 Push_SrcD(src),
10570 Opcode(0xD9), Opcode(0xF1), // fyl2x 10395 Opcode(0xD9), Opcode(0xF1), // fyl2x
10571 Push_ResultXD(dst)); 10396 Push_ResultD(dst));
10572 ins_pipe( pipe_slow ); 10397 ins_pipe( pipe_slow );
10573 %} 10398 %}
10574 10399
10575 //-------------Float Instructions------------------------------- 10400 //-------------Float Instructions-------------------------------
10576 // Float Math 10401 // Float Math
10587 // jcc(Assembler::equal, exit); 10412 // jcc(Assembler::equal, exit);
10588 // movl(dst, greater_result); 10413 // movl(dst, greater_result);
10589 // exit: 10414 // exit:
10590 10415
10591 // P6 version of float compare, sets condition codes in EFLAGS 10416 // P6 version of float compare, sets condition codes in EFLAGS
10592 instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{ 10417 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10593 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10418 predicate(VM_Version::supports_cmov() && UseSSE == 0);
10594 match(Set cr (CmpF src1 src2)); 10419 match(Set cr (CmpF src1 src2));
10595 effect(KILL rax); 10420 effect(KILL rax);
10596 ins_cost(150); 10421 ins_cost(150);
10597 format %{ "FLD $src1\n\t" 10422 format %{ "FLD $src1\n\t"
10599 "JNP exit\n\t" 10424 "JNP exit\n\t"
10600 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10425 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t"
10601 "SAHF\n" 10426 "SAHF\n"
10602 "exit:\tNOP // avoid branch to branch" %} 10427 "exit:\tNOP // avoid branch to branch" %}
10603 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10428 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10604 ins_encode( Push_Reg_D(src1), 10429 ins_encode( Push_Reg_DPR(src1),
10605 OpcP, RegOpc(src2), 10430 OpcP, RegOpc(src2),
10606 cmpF_P6_fixup ); 10431 cmpF_P6_fixup );
10607 ins_pipe( pipe_slow ); 10432 ins_pipe( pipe_slow );
10608 %} 10433 %}
10609 10434
10610 instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10435 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10611 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10436 predicate(VM_Version::supports_cmov() && UseSSE == 0);
10612 match(Set cr (CmpF src1 src2)); 10437 match(Set cr (CmpF src1 src2));
10613 ins_cost(100); 10438 ins_cost(100);
10614 format %{ "FLD $src1\n\t" 10439 format %{ "FLD $src1\n\t"
10615 "FUCOMIP ST,$src2 // P6 instruction" %} 10440 "FUCOMIP ST,$src2 // P6 instruction" %}
10616 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10441 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10617 ins_encode( Push_Reg_D(src1), 10442 ins_encode( Push_Reg_DPR(src1),
10618 OpcP, RegOpc(src2)); 10443 OpcP, RegOpc(src2));
10619 ins_pipe( pipe_slow ); 10444 ins_pipe( pipe_slow );
10620 %} 10445 %}
10621 10446
10622 10447
10623 // Compare & branch 10448 // Compare & branch
10624 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{ 10449 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10625 predicate(UseSSE == 0); 10450 predicate(UseSSE == 0);
10626 match(Set cr (CmpF src1 src2)); 10451 match(Set cr (CmpF src1 src2));
10627 effect(KILL rax); 10452 effect(KILL rax);
10628 ins_cost(200); 10453 ins_cost(200);
10629 format %{ "FLD $src1\n\t" 10454 format %{ "FLD $src1\n\t"
10632 "TEST AX,0x400\n\t" 10457 "TEST AX,0x400\n\t"
10633 "JZ,s flags\n\t" 10458 "JZ,s flags\n\t"
10634 "MOV AH,1\t# unordered treat as LT\n" 10459 "MOV AH,1\t# unordered treat as LT\n"
10635 "flags:\tSAHF" %} 10460 "flags:\tSAHF" %}
10636 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10461 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10637 ins_encode( Push_Reg_D(src1), 10462 ins_encode( Push_Reg_DPR(src1),
10638 OpcP, RegOpc(src2), 10463 OpcP, RegOpc(src2),
10639 fpu_flags); 10464 fpu_flags);
10640 ins_pipe( pipe_slow ); 10465 ins_pipe( pipe_slow );
10641 %} 10466 %}
10642 10467
10643 // Compare vs zero into -1,0,1 10468 // Compare vs zero into -1,0,1
10644 instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{ 10469 instruct cmpFPR_0(eRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10645 predicate(UseSSE == 0); 10470 predicate(UseSSE == 0);
10646 match(Set dst (CmpF3 src1 zero)); 10471 match(Set dst (CmpF3 src1 zero));
10647 effect(KILL cr, KILL rax); 10472 effect(KILL cr, KILL rax);
10648 ins_cost(280); 10473 ins_cost(280);
10649 format %{ "FTSTF $dst,$src1" %} 10474 format %{ "FTSTF $dst,$src1" %}
10650 opcode(0xE4, 0xD9); 10475 opcode(0xE4, 0xD9);
10651 ins_encode( Push_Reg_D(src1), 10476 ins_encode( Push_Reg_DPR(src1),
10652 OpcS, OpcP, PopFPU, 10477 OpcS, OpcP, PopFPU,
10653 CmpF_Result(dst)); 10478 CmpF_Result(dst));
10654 ins_pipe( pipe_slow ); 10479 ins_pipe( pipe_slow );
10655 %} 10480 %}
10656 10481
10657 // Compare into -1,0,1 10482 // Compare into -1,0,1
10658 instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{ 10483 instruct cmpFPR_reg(eRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10659 predicate(UseSSE == 0); 10484 predicate(UseSSE == 0);
10660 match(Set dst (CmpF3 src1 src2)); 10485 match(Set dst (CmpF3 src1 src2));
10661 effect(KILL cr, KILL rax); 10486 effect(KILL cr, KILL rax);
10662 ins_cost(300); 10487 ins_cost(300);
10663 format %{ "FCMPF $dst,$src1,$src2" %} 10488 format %{ "FCMPF $dst,$src1,$src2" %}
10664 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10489 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10665 ins_encode( Push_Reg_D(src1), 10490 ins_encode( Push_Reg_DPR(src1),
10666 OpcP, RegOpc(src2), 10491 OpcP, RegOpc(src2),
10667 CmpF_Result(dst)); 10492 CmpF_Result(dst));
10668 ins_pipe( pipe_slow ); 10493 ins_pipe( pipe_slow );
10669 %} 10494 %}
10670 10495
10671 // float compare and set condition codes in EFLAGS by XMM regs 10496 // float compare and set condition codes in EFLAGS by XMM regs
10672 instruct cmpX_cc(eFlagsRegU cr, regX src1, regX src2) %{ 10497 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10673 predicate(UseSSE>=1); 10498 predicate(UseSSE>=1);
10674 match(Set cr (CmpF src1 src2)); 10499 match(Set cr (CmpF src1 src2));
10675 ins_cost(145); 10500 ins_cost(145);
10676 format %{ "UCOMISS $src1,$src2\n\t" 10501 format %{ "UCOMISS $src1,$src2\n\t"
10677 "JNP,s exit\n\t" 10502 "JNP,s exit\n\t"
10684 emit_cmpfp_fixup(_masm); 10509 emit_cmpfp_fixup(_masm);
10685 %} 10510 %}
10686 ins_pipe( pipe_slow ); 10511 ins_pipe( pipe_slow );
10687 %} 10512 %}
10688 10513
10689 instruct cmpX_ccCF(eFlagsRegUCF cr, regX src1, regX src2) %{ 10514 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10690 predicate(UseSSE>=1); 10515 predicate(UseSSE>=1);
10691 match(Set cr (CmpF src1 src2)); 10516 match(Set cr (CmpF src1 src2));
10692 ins_cost(100); 10517 ins_cost(100);
10693 format %{ "UCOMISS $src1,$src2" %} 10518 format %{ "UCOMISS $src1,$src2" %}
10694 ins_encode %{ 10519 ins_encode %{
10696 %} 10521 %}
10697 ins_pipe( pipe_slow ); 10522 ins_pipe( pipe_slow );
10698 %} 10523 %}
10699 10524
10700 // float compare and set condition codes in EFLAGS by XMM regs 10525 // float compare and set condition codes in EFLAGS by XMM regs
10701 instruct cmpX_ccmem(eFlagsRegU cr, regX src1, memory src2) %{ 10526 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10702 predicate(UseSSE>=1); 10527 predicate(UseSSE>=1);
10703 match(Set cr (CmpF src1 (LoadF src2))); 10528 match(Set cr (CmpF src1 (LoadF src2)));
10704 ins_cost(165); 10529 ins_cost(165);
10705 format %{ "UCOMISS $src1,$src2\n\t" 10530 format %{ "UCOMISS $src1,$src2\n\t"
10706 "JNP,s exit\n\t" 10531 "JNP,s exit\n\t"
10713 emit_cmpfp_fixup(_masm); 10538 emit_cmpfp_fixup(_masm);
10714 %} 10539 %}
10715 ins_pipe( pipe_slow ); 10540 ins_pipe( pipe_slow );
10716 %} 10541 %}
10717 10542
10718 instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX src1, memory src2) %{ 10543 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10719 predicate(UseSSE>=1); 10544 predicate(UseSSE>=1);
10720 match(Set cr (CmpF src1 (LoadF src2))); 10545 match(Set cr (CmpF src1 (LoadF src2)));
10721 ins_cost(100); 10546 ins_cost(100);
10722 format %{ "UCOMISS $src1,$src2" %} 10547 format %{ "UCOMISS $src1,$src2" %}
10723 ins_encode %{ 10548 ins_encode %{
10725 %} 10550 %}
10726 ins_pipe( pipe_slow ); 10551 ins_pipe( pipe_slow );
10727 %} 10552 %}
10728 10553
10729 // Compare into -1,0,1 in XMM 10554 // Compare into -1,0,1 in XMM
10730 instruct cmpX_reg(xRegI dst, regX src1, regX src2, eFlagsReg cr) %{ 10555 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10731 predicate(UseSSE>=1); 10556 predicate(UseSSE>=1);
10732 match(Set dst (CmpF3 src1 src2)); 10557 match(Set dst (CmpF3 src1 src2));
10733 effect(KILL cr); 10558 effect(KILL cr);
10734 ins_cost(255); 10559 ins_cost(255);
10735 format %{ "UCOMISS $src1, $src2\n\t" 10560 format %{ "UCOMISS $src1, $src2\n\t"
10745 %} 10570 %}
10746 ins_pipe( pipe_slow ); 10571 ins_pipe( pipe_slow );
10747 %} 10572 %}
10748 10573
10749 // Compare into -1,0,1 in XMM and memory 10574 // Compare into -1,0,1 in XMM and memory
10750 instruct cmpX_regmem(xRegI dst, regX src1, memory src2, eFlagsReg cr) %{ 10575 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10751 predicate(UseSSE>=1); 10576 predicate(UseSSE>=1);
10752 match(Set dst (CmpF3 src1 (LoadF src2))); 10577 match(Set dst (CmpF3 src1 (LoadF src2)));
10753 effect(KILL cr); 10578 effect(KILL cr);
10754 ins_cost(275); 10579 ins_cost(275);
10755 format %{ "UCOMISS $src1, $src2\n\t" 10580 format %{ "UCOMISS $src1, $src2\n\t"
10765 %} 10590 %}
10766 ins_pipe( pipe_slow ); 10591 ins_pipe( pipe_slow );
10767 %} 10592 %}
10768 10593
10769 // Spill to obtain 24-bit precision 10594 // Spill to obtain 24-bit precision
10770 instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{ 10595 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10771 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10596 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10772 match(Set dst (SubF src1 src2)); 10597 match(Set dst (SubF src1 src2));
10773 10598
10774 format %{ "FSUB $dst,$src1 - $src2" %} 10599 format %{ "FSUB $dst,$src1 - $src2" %}
10775 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10600 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10776 ins_encode( Push_Reg_F(src1), 10601 ins_encode( Push_Reg_FPR(src1),
10777 OpcReg_F(src2), 10602 OpcReg_FPR(src2),
10778 Pop_Mem_F(dst) ); 10603 Pop_Mem_FPR(dst) );
10779 ins_pipe( fpu_mem_reg_reg ); 10604 ins_pipe( fpu_mem_reg_reg );
10780 %} 10605 %}
10781 // 10606 //
10782 // This instruction does not round to 24-bits 10607 // This instruction does not round to 24-bits
10783 instruct subF_reg(regF dst, regF src) %{ 10608 instruct subFPR_reg(regFPR dst, regFPR src) %{
10784 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10609 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10785 match(Set dst (SubF dst src)); 10610 match(Set dst (SubF dst src));
10786 10611
10787 format %{ "FSUB $dst,$src" %} 10612 format %{ "FSUB $dst,$src" %}
10788 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10613 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
10789 ins_encode( Push_Reg_F(src), 10614 ins_encode( Push_Reg_FPR(src),
10790 OpcP, RegOpc(dst) ); 10615 OpcP, RegOpc(dst) );
10791 ins_pipe( fpu_reg_reg ); 10616 ins_pipe( fpu_reg_reg );
10792 %} 10617 %}
10793 10618
10794 // Spill to obtain 24-bit precision 10619 // Spill to obtain 24-bit precision
10795 instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{ 10620 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10796 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10621 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10797 match(Set dst (AddF src1 src2)); 10622 match(Set dst (AddF src1 src2));
10798 10623
10799 format %{ "FADD $dst,$src1,$src2" %} 10624 format %{ "FADD $dst,$src1,$src2" %}
10800 opcode(0xD8, 0x0); /* D8 C0+i */ 10625 opcode(0xD8, 0x0); /* D8 C0+i */
10801 ins_encode( Push_Reg_F(src2), 10626 ins_encode( Push_Reg_FPR(src2),
10802 OpcReg_F(src1), 10627 OpcReg_FPR(src1),
10803 Pop_Mem_F(dst) ); 10628 Pop_Mem_FPR(dst) );
10804 ins_pipe( fpu_mem_reg_reg ); 10629 ins_pipe( fpu_mem_reg_reg );
10805 %} 10630 %}
10806 // 10631 //
10807 // This instruction does not round to 24-bits 10632 // This instruction does not round to 24-bits
10808 instruct addF_reg(regF dst, regF src) %{ 10633 instruct addFPR_reg(regFPR dst, regFPR src) %{
10809 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10634 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10810 match(Set dst (AddF dst src)); 10635 match(Set dst (AddF dst src));
10811 10636
10812 format %{ "FLD $src\n\t" 10637 format %{ "FLD $src\n\t"
10813 "FADDp $dst,ST" %} 10638 "FADDp $dst,ST" %}
10814 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10639 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10815 ins_encode( Push_Reg_F(src), 10640 ins_encode( Push_Reg_FPR(src),
10816 OpcP, RegOpc(dst) ); 10641 OpcP, RegOpc(dst) );
10817 ins_pipe( fpu_reg_reg ); 10642 ins_pipe( fpu_reg_reg );
10818 %} 10643 %}
10819 10644
10820 // Add two single precision floating point values in xmm 10645 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10821 instruct addX_reg(regX dst, regX src) %{
10822 predicate(UseSSE>=1);
10823 match(Set dst (AddF dst src));
10824 format %{ "ADDSS $dst,$src" %}
10825 ins_encode %{
10826 __ addss($dst$$XMMRegister, $src$$XMMRegister);
10827 %}
10828 ins_pipe( pipe_slow );
10829 %}
10830
10831 instruct addX_imm(regX dst, immXF con) %{
10832 predicate(UseSSE>=1);
10833 match(Set dst (AddF dst con));
10834 format %{ "ADDSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
10835 ins_encode %{
10836 __ addss($dst$$XMMRegister, $constantaddress($con));
10837 %}
10838 ins_pipe(pipe_slow);
10839 %}
10840
10841 instruct addX_mem(regX dst, memory mem) %{
10842 predicate(UseSSE>=1);
10843 match(Set dst (AddF dst (LoadF mem)));
10844 format %{ "ADDSS $dst,$mem" %}
10845 ins_encode %{
10846 __ addss($dst$$XMMRegister, $mem$$Address);
10847 %}
10848 ins_pipe( pipe_slow );
10849 %}
10850
10851 // Subtract two single precision floating point values in xmm
10852 instruct subX_reg(regX dst, regX src) %{
10853 predicate(UseSSE>=1);
10854 match(Set dst (SubF dst src));
10855 ins_cost(150);
10856 format %{ "SUBSS $dst,$src" %}
10857 ins_encode %{
10858 __ subss($dst$$XMMRegister, $src$$XMMRegister);
10859 %}
10860 ins_pipe( pipe_slow );
10861 %}
10862
10863 instruct subX_imm(regX dst, immXF con) %{
10864 predicate(UseSSE>=1);
10865 match(Set dst (SubF dst con));
10866 ins_cost(150);
10867 format %{ "SUBSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
10868 ins_encode %{
10869 __ subss($dst$$XMMRegister, $constantaddress($con));
10870 %}
10871 ins_pipe(pipe_slow);
10872 %}
10873
10874 instruct subX_mem(regX dst, memory mem) %{
10875 predicate(UseSSE>=1);
10876 match(Set dst (SubF dst (LoadF mem)));
10877 ins_cost(150);
10878 format %{ "SUBSS $dst,$mem" %}
10879 ins_encode %{
10880 __ subss($dst$$XMMRegister, $mem$$Address);
10881 %}
10882 ins_pipe( pipe_slow );
10883 %}
10884
10885 // Multiply two single precision floating point values in xmm
10886 instruct mulX_reg(regX dst, regX src) %{
10887 predicate(UseSSE>=1);
10888 match(Set dst (MulF dst src));
10889 format %{ "MULSS $dst,$src" %}
10890 ins_encode %{
10891 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
10892 %}
10893 ins_pipe( pipe_slow );
10894 %}
10895
10896 instruct mulX_imm(regX dst, immXF con) %{
10897 predicate(UseSSE>=1);
10898 match(Set dst (MulF dst con));
10899 format %{ "MULSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
10900 ins_encode %{
10901 __ mulss($dst$$XMMRegister, $constantaddress($con));
10902 %}
10903 ins_pipe(pipe_slow);
10904 %}
10905
10906 instruct mulX_mem(regX dst, memory mem) %{
10907 predicate(UseSSE>=1);
10908 match(Set dst (MulF dst (LoadF mem)));
10909 format %{ "MULSS $dst,$mem" %}
10910 ins_encode %{
10911 __ mulss($dst$$XMMRegister, $mem$$Address);
10912 %}
10913 ins_pipe( pipe_slow );
10914 %}
10915
10916 // Divide two single precision floating point values in xmm
10917 instruct divX_reg(regX dst, regX src) %{
10918 predicate(UseSSE>=1);
10919 match(Set dst (DivF dst src));
10920 format %{ "DIVSS $dst,$src" %}
10921 ins_encode %{
10922 __ divss($dst$$XMMRegister, $src$$XMMRegister);
10923 %}
10924 ins_pipe( pipe_slow );
10925 %}
10926
10927 instruct divX_imm(regX dst, immXF con) %{
10928 predicate(UseSSE>=1);
10929 match(Set dst (DivF dst con));
10930 format %{ "DIVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
10931 ins_encode %{
10932 __ divss($dst$$XMMRegister, $constantaddress($con));
10933 %}
10934 ins_pipe(pipe_slow);
10935 %}
10936
10937 instruct divX_mem(regX dst, memory mem) %{
10938 predicate(UseSSE>=1);
10939 match(Set dst (DivF dst (LoadF mem)));
10940 format %{ "DIVSS $dst,$mem" %}
10941 ins_encode %{
10942 __ divss($dst$$XMMRegister, $mem$$Address);
10943 %}
10944 ins_pipe( pipe_slow );
10945 %}
10946
10947 // Get the square root of a single precision floating point values in xmm
10948 instruct sqrtX_reg(regX dst, regX src) %{
10949 predicate(UseSSE>=1);
10950 match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10951 ins_cost(150);
10952 format %{ "SQRTSS $dst,$src" %}
10953 ins_encode %{
10954 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
10955 %}
10956 ins_pipe( pipe_slow );
10957 %}
10958
10959 instruct sqrtX_mem(regX dst, memory mem) %{
10960 predicate(UseSSE>=1);
10961 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem)))));
10962 ins_cost(150);
10963 format %{ "SQRTSS $dst,$mem" %}
10964 ins_encode %{
10965 __ sqrtss($dst$$XMMRegister, $mem$$Address);
10966 %}
10967 ins_pipe( pipe_slow );
10968 %}
10969
10970 // Get the square root of a double precision floating point values in xmm
10971 instruct sqrtXD_reg(regXD dst, regXD src) %{
10972 predicate(UseSSE>=2);
10973 match(Set dst (SqrtD src));
10974 ins_cost(150);
10975 format %{ "SQRTSD $dst,$src" %}
10976 ins_encode %{
10977 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
10978 %}
10979 ins_pipe( pipe_slow );
10980 %}
10981
10982 instruct sqrtXD_mem(regXD dst, memory mem) %{
10983 predicate(UseSSE>=2);
10984 match(Set dst (SqrtD (LoadD mem)));
10985 ins_cost(150);
10986 format %{ "SQRTSD $dst,$mem" %}
10987 ins_encode %{
10988 __ sqrtsd($dst$$XMMRegister, $mem$$Address);
10989 %}
10990 ins_pipe( pipe_slow );
10991 %}
10992
10993 instruct absF_reg(regFPR1 dst, regFPR1 src) %{
10994 predicate(UseSSE==0); 10646 predicate(UseSSE==0);
10995 match(Set dst (AbsF src)); 10647 match(Set dst (AbsF src));
10996 ins_cost(100); 10648 ins_cost(100);
10997 format %{ "FABS" %} 10649 format %{ "FABS" %}
10998 opcode(0xE1, 0xD9); 10650 opcode(0xE1, 0xD9);
10999 ins_encode( OpcS, OpcP ); 10651 ins_encode( OpcS, OpcP );
11000 ins_pipe( fpu_reg_reg ); 10652 ins_pipe( fpu_reg_reg );
11001 %} 10653 %}
11002 10654
11003 instruct absX_reg(regX dst ) %{ 10655 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
11004 predicate(UseSSE>=1);
11005 match(Set dst (AbsF dst));
11006 ins_cost(150);
11007 format %{ "ANDPS $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
11008 ins_encode %{
11009 __ andps($dst$$XMMRegister,
11010 ExternalAddress((address)float_signmask_pool));
11011 %}
11012 ins_pipe( pipe_slow );
11013 %}
11014
11015 instruct negF_reg(regFPR1 dst, regFPR1 src) %{
11016 predicate(UseSSE==0); 10656 predicate(UseSSE==0);
11017 match(Set dst (NegF src)); 10657 match(Set dst (NegF src));
11018 ins_cost(100); 10658 ins_cost(100);
11019 format %{ "FCHS" %} 10659 format %{ "FCHS" %}
11020 opcode(0xE0, 0xD9); 10660 opcode(0xE0, 0xD9);
11021 ins_encode( OpcS, OpcP ); 10661 ins_encode( OpcS, OpcP );
11022 ins_pipe( fpu_reg_reg ); 10662 ins_pipe( fpu_reg_reg );
11023 %} 10663 %}
11024 10664
11025 instruct negX_reg( regX dst ) %{ 10665 // Cisc-alternate to addFPR_reg
11026 predicate(UseSSE>=1);
11027 match(Set dst (NegF dst));
11028 ins_cost(150);
11029 format %{ "XORPS $dst,[0x80000000]\t# CHS F by sign flipping" %}
11030 ins_encode %{
11031 __ xorps($dst$$XMMRegister,
11032 ExternalAddress((address)float_signflip_pool));
11033 %}
11034 ins_pipe( pipe_slow );
11035 %}
11036
11037 // Cisc-alternate to addF_reg
11038 // Spill to obtain 24-bit precision 10666 // Spill to obtain 24-bit precision
11039 instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{ 10667 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
11040 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10668 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11041 match(Set dst (AddF src1 (LoadF src2))); 10669 match(Set dst (AddF src1 (LoadF src2)));
11042 10670
11043 format %{ "FLD $src2\n\t" 10671 format %{ "FLD $src2\n\t"
11044 "FADD ST,$src1\n\t" 10672 "FADD ST,$src1\n\t"
11045 "FSTP_S $dst" %} 10673 "FSTP_S $dst" %}
11046 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10674 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
11047 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10675 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11048 OpcReg_F(src1), 10676 OpcReg_FPR(src1),
11049 Pop_Mem_F(dst) ); 10677 Pop_Mem_FPR(dst) );
11050 ins_pipe( fpu_mem_reg_mem ); 10678 ins_pipe( fpu_mem_reg_mem );
11051 %} 10679 %}
11052 // 10680 //
11053 // Cisc-alternate to addF_reg 10681 // Cisc-alternate to addFPR_reg
11054 // This instruction does not round to 24-bits 10682 // This instruction does not round to 24-bits
11055 instruct addF_reg_mem(regF dst, memory src) %{ 10683 instruct addFPR_reg_mem(regFPR dst, memory src) %{
11056 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10684 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11057 match(Set dst (AddF dst (LoadF src))); 10685 match(Set dst (AddF dst (LoadF src)));
11058 10686
11059 format %{ "FADD $dst,$src" %} 10687 format %{ "FADD $dst,$src" %}
11060 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10688 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */
11063 ins_pipe( fpu_reg_mem ); 10691 ins_pipe( fpu_reg_mem );
11064 %} 10692 %}
11065 10693
11066 // // Following two instructions for _222_mpegaudio 10694 // // Following two instructions for _222_mpegaudio
11067 // Spill to obtain 24-bit precision 10695 // Spill to obtain 24-bit precision
11068 instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{ 10696 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
11069 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10697 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11070 match(Set dst (AddF src1 src2)); 10698 match(Set dst (AddF src1 src2));
11071 10699
11072 format %{ "FADD $dst,$src1,$src2" %} 10700 format %{ "FADD $dst,$src1,$src2" %}
11073 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10701 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
11074 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10702 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
11075 OpcReg_F(src2), 10703 OpcReg_FPR(src2),
11076 Pop_Mem_F(dst) ); 10704 Pop_Mem_FPR(dst) );
11077 ins_pipe( fpu_mem_reg_mem ); 10705 ins_pipe( fpu_mem_reg_mem );
11078 %} 10706 %}
11079 10707
11080 // Cisc-spill variant 10708 // Cisc-spill variant
11081 // Spill to obtain 24-bit precision 10709 // Spill to obtain 24-bit precision
11082 instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10710 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
11083 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10711 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11084 match(Set dst (AddF src1 (LoadF src2))); 10712 match(Set dst (AddF src1 (LoadF src2)));
11085 10713
11086 format %{ "FADD $dst,$src1,$src2 cisc" %} 10714 format %{ "FADD $dst,$src1,$src2 cisc" %}
11087 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10715 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
11088 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10716 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11089 set_instruction_start, 10717 set_instruction_start,
11090 OpcP, RMopc_Mem(secondary,src1), 10718 OpcP, RMopc_Mem(secondary,src1),
11091 Pop_Mem_F(dst) ); 10719 Pop_Mem_FPR(dst) );
11092 ins_pipe( fpu_mem_mem_mem ); 10720 ins_pipe( fpu_mem_mem_mem );
11093 %} 10721 %}
11094 10722
11095 // Spill to obtain 24-bit precision 10723 // Spill to obtain 24-bit precision
11096 instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10724 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11097 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10725 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11098 match(Set dst (AddF src1 src2)); 10726 match(Set dst (AddF src1 src2));
11099 10727
11100 format %{ "FADD $dst,$src1,$src2" %} 10728 format %{ "FADD $dst,$src1,$src2" %}
11101 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10729 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */
11102 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10730 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11103 set_instruction_start, 10731 set_instruction_start,
11104 OpcP, RMopc_Mem(secondary,src1), 10732 OpcP, RMopc_Mem(secondary,src1),
11105 Pop_Mem_F(dst) ); 10733 Pop_Mem_FPR(dst) );
11106 ins_pipe( fpu_mem_mem_mem ); 10734 ins_pipe( fpu_mem_mem_mem );
11107 %} 10735 %}
11108 10736
11109 10737
11110 // Spill to obtain 24-bit precision 10738 // Spill to obtain 24-bit precision
11111 instruct addF24_reg_imm(stackSlotF dst, regF src, immF con) %{ 10739 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
11112 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10740 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11113 match(Set dst (AddF src con)); 10741 match(Set dst (AddF src con));
11114 format %{ "FLD $src\n\t" 10742 format %{ "FLD $src\n\t"
11115 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10743 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11116 "FSTP_S $dst" %} 10744 "FSTP_S $dst" %}
11121 %} 10749 %}
11122 ins_pipe(fpu_mem_reg_con); 10750 ins_pipe(fpu_mem_reg_con);
11123 %} 10751 %}
11124 // 10752 //
11125 // This instruction does not round to 24-bits 10753 // This instruction does not round to 24-bits
11126 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 10754 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
11127 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10755 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11128 match(Set dst (AddF src con)); 10756 match(Set dst (AddF src con));
11129 format %{ "FLD $src\n\t" 10757 format %{ "FLD $src\n\t"
11130 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10758 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11131 "FSTP $dst" %} 10759 "FSTP $dst" %}
11136 %} 10764 %}
11137 ins_pipe(fpu_reg_reg_con); 10765 ins_pipe(fpu_reg_reg_con);
11138 %} 10766 %}
11139 10767
11140 // Spill to obtain 24-bit precision 10768 // Spill to obtain 24-bit precision
11141 instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{ 10769 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
11142 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10770 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11143 match(Set dst (MulF src1 src2)); 10771 match(Set dst (MulF src1 src2));
11144 10772
11145 format %{ "FLD $src1\n\t" 10773 format %{ "FLD $src1\n\t"
11146 "FMUL $src2\n\t" 10774 "FMUL $src2\n\t"
11147 "FSTP_S $dst" %} 10775 "FSTP_S $dst" %}
11148 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10776 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
11149 ins_encode( Push_Reg_F(src1), 10777 ins_encode( Push_Reg_FPR(src1),
11150 OpcReg_F(src2), 10778 OpcReg_FPR(src2),
11151 Pop_Mem_F(dst) ); 10779 Pop_Mem_FPR(dst) );
11152 ins_pipe( fpu_mem_reg_reg ); 10780 ins_pipe( fpu_mem_reg_reg );
11153 %} 10781 %}
11154 // 10782 //
11155 // This instruction does not round to 24-bits 10783 // This instruction does not round to 24-bits
11156 instruct mulF_reg(regF dst, regF src1, regF src2) %{ 10784 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
11157 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10785 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11158 match(Set dst (MulF src1 src2)); 10786 match(Set dst (MulF src1 src2));
11159 10787
11160 format %{ "FLD $src1\n\t" 10788 format %{ "FLD $src1\n\t"
11161 "FMUL $src2\n\t" 10789 "FMUL $src2\n\t"
11162 "FSTP_S $dst" %} 10790 "FSTP_S $dst" %}
11163 opcode(0xD8, 0x1); /* D8 C8+i */ 10791 opcode(0xD8, 0x1); /* D8 C8+i */
11164 ins_encode( Push_Reg_F(src2), 10792 ins_encode( Push_Reg_FPR(src2),
11165 OpcReg_F(src1), 10793 OpcReg_FPR(src1),
11166 Pop_Reg_F(dst) ); 10794 Pop_Reg_FPR(dst) );
11167 ins_pipe( fpu_reg_reg_reg ); 10795 ins_pipe( fpu_reg_reg_reg );
11168 %} 10796 %}
11169 10797
11170 10798
11171 // Spill to obtain 24-bit precision 10799 // Spill to obtain 24-bit precision
11172 // Cisc-alternate to reg-reg multiply 10800 // Cisc-alternate to reg-reg multiply
11173 instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{ 10801 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
11174 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10802 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11175 match(Set dst (MulF src1 (LoadF src2))); 10803 match(Set dst (MulF src1 (LoadF src2)));
11176 10804
11177 format %{ "FLD_S $src2\n\t" 10805 format %{ "FLD_S $src2\n\t"
11178 "FMUL $src1\n\t" 10806 "FMUL $src1\n\t"
11179 "FSTP_S $dst" %} 10807 "FSTP_S $dst" %}
11180 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10808 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */
11181 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10809 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11182 OpcReg_F(src1), 10810 OpcReg_FPR(src1),
11183 Pop_Mem_F(dst) ); 10811 Pop_Mem_FPR(dst) );
11184 ins_pipe( fpu_mem_reg_mem ); 10812 ins_pipe( fpu_mem_reg_mem );
11185 %} 10813 %}
11186 // 10814 //
11187 // This instruction does not round to 24-bits 10815 // This instruction does not round to 24-bits
11188 // Cisc-alternate to reg-reg multiply 10816 // Cisc-alternate to reg-reg multiply
11189 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 10817 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
11190 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10818 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11191 match(Set dst (MulF src1 (LoadF src2))); 10819 match(Set dst (MulF src1 (LoadF src2)));
11192 10820
11193 format %{ "FMUL $dst,$src1,$src2" %} 10821 format %{ "FMUL $dst,$src1,$src2" %}
11194 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10822 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */
11195 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10823 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11196 OpcReg_F(src1), 10824 OpcReg_FPR(src1),
11197 Pop_Reg_F(dst) ); 10825 Pop_Reg_FPR(dst) );
11198 ins_pipe( fpu_reg_reg_mem ); 10826 ins_pipe( fpu_reg_reg_mem );
11199 %} 10827 %}
11200 10828
11201 // Spill to obtain 24-bit precision 10829 // Spill to obtain 24-bit precision
11202 instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10830 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11203 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10831 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11204 match(Set dst (MulF src1 src2)); 10832 match(Set dst (MulF src1 src2));
11205 10833
11206 format %{ "FMUL $dst,$src1,$src2" %} 10834 format %{ "FMUL $dst,$src1,$src2" %}
11207 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10835 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */
11208 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10836 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11209 set_instruction_start, 10837 set_instruction_start,
11210 OpcP, RMopc_Mem(secondary,src1), 10838 OpcP, RMopc_Mem(secondary,src1),
11211 Pop_Mem_F(dst) ); 10839 Pop_Mem_FPR(dst) );
11212 ins_pipe( fpu_mem_mem_mem ); 10840 ins_pipe( fpu_mem_mem_mem );
11213 %} 10841 %}
11214 10842
11215 // Spill to obtain 24-bit precision 10843 // Spill to obtain 24-bit precision
11216 instruct mulF24_reg_imm(stackSlotF dst, regF src, immF con) %{ 10844 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
11217 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10845 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11218 match(Set dst (MulF src con)); 10846 match(Set dst (MulF src con));
11219 10847
11220 format %{ "FLD $src\n\t" 10848 format %{ "FLD $src\n\t"
11221 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10849 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11227 %} 10855 %}
11228 ins_pipe(fpu_mem_reg_con); 10856 ins_pipe(fpu_mem_reg_con);
11229 %} 10857 %}
11230 // 10858 //
11231 // This instruction does not round to 24-bits 10859 // This instruction does not round to 24-bits
11232 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 10860 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
11233 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10861 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11234 match(Set dst (MulF src con)); 10862 match(Set dst (MulF src con));
11235 10863
11236 format %{ "FLD $src\n\t" 10864 format %{ "FLD $src\n\t"
11237 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10865 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11244 ins_pipe(fpu_reg_reg_con); 10872 ins_pipe(fpu_reg_reg_con);
11245 %} 10873 %}
11246 10874
11247 10875
11248 // 10876 //
11249 // MACRO1 -- subsume unshared load into mulF 10877 // MACRO1 -- subsume unshared load into mulFPR
11250 // This instruction does not round to 24-bits 10878 // This instruction does not round to 24-bits
11251 instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{ 10879 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
11252 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10880 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11253 match(Set dst (MulF (LoadF mem1) src)); 10881 match(Set dst (MulF (LoadF mem1) src));
11254 10882
11255 format %{ "FLD $mem1 ===MACRO1===\n\t" 10883 format %{ "FLD $mem1 ===MACRO1===\n\t"
11256 "FMUL ST,$src\n\t" 10884 "FMUL ST,$src\n\t"
11257 "FSTP $dst" %} 10885 "FSTP $dst" %}
11258 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10886 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */
11259 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10887 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
11260 OpcReg_F(src), 10888 OpcReg_FPR(src),
11261 Pop_Reg_F(dst) ); 10889 Pop_Reg_FPR(dst) );
11262 ins_pipe( fpu_reg_reg_mem ); 10890 ins_pipe( fpu_reg_reg_mem );
11263 %} 10891 %}
11264 // 10892 //
11265 // MACRO2 -- addF a mulF which subsumed an unshared load 10893 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
11266 // This instruction does not round to 24-bits 10894 // This instruction does not round to 24-bits
11267 instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{ 10895 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
11268 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10896 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11269 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10897 match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
11270 ins_cost(95); 10898 ins_cost(95);
11271 10899
11272 format %{ "FLD $mem1 ===MACRO2===\n\t" 10900 format %{ "FLD $mem1 ===MACRO2===\n\t"
11273 "FMUL ST,$src1 subsume mulF left load\n\t" 10901 "FMUL ST,$src1 subsume mulFPR left load\n\t"
11274 "FADD ST,$src2\n\t" 10902 "FADD ST,$src2\n\t"
11275 "FSTP $dst" %} 10903 "FSTP $dst" %}
11276 opcode(0xD9); /* LoadF D9 /0 */ 10904 opcode(0xD9); /* LoadF D9 /0 */
11277 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10905 ins_encode( OpcP, RMopc_Mem(0x00,mem1),
11278 FMul_ST_reg(src1), 10906 FMul_ST_reg(src1),
11279 FAdd_ST_reg(src2), 10907 FAdd_ST_reg(src2),
11280 Pop_Reg_F(dst) ); 10908 Pop_Reg_FPR(dst) );
11281 ins_pipe( fpu_reg_mem_reg_reg ); 10909 ins_pipe( fpu_reg_mem_reg_reg );
11282 %} 10910 %}
11283 10911
11284 // MACRO3 -- addF a mulF 10912 // MACRO3 -- addFPR a mulFPR
11285 // This instruction does not round to 24-bits. It is a '2-address' 10913 // This instruction does not round to 24-bits. It is a '2-address'
11286 // instruction in that the result goes back to src2. This eliminates 10914 // instruction in that the result goes back to src2. This eliminates
11287 // a move from the macro; possibly the register allocator will have 10915 // a move from the macro; possibly the register allocator will have
11288 // to add it back (and maybe not). 10916 // to add it back (and maybe not).
11289 instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{ 10917 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
11290 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10918 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11291 match(Set src2 (AddF (MulF src0 src1) src2)); 10919 match(Set src2 (AddF (MulF src0 src1) src2));
11292 10920
11293 format %{ "FLD $src0 ===MACRO3===\n\t" 10921 format %{ "FLD $src0 ===MACRO3===\n\t"
11294 "FMUL ST,$src1\n\t" 10922 "FMUL ST,$src1\n\t"
11295 "FADDP $src2,ST" %} 10923 "FADDP $src2,ST" %}
11296 opcode(0xD9); /* LoadF D9 /0 */ 10924 opcode(0xD9); /* LoadF D9 /0 */
11297 ins_encode( Push_Reg_F(src0), 10925 ins_encode( Push_Reg_FPR(src0),
11298 FMul_ST_reg(src1), 10926 FMul_ST_reg(src1),
11299 FAddP_reg_ST(src2) ); 10927 FAddP_reg_ST(src2) );
11300 ins_pipe( fpu_reg_reg_reg ); 10928 ins_pipe( fpu_reg_reg_reg );
11301 %} 10929 %}
11302 10930
11303 // MACRO4 -- divF subF 10931 // MACRO4 -- divFPR subFPR
11304 // This instruction does not round to 24-bits 10932 // This instruction does not round to 24-bits
11305 instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{ 10933 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
11306 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10934 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11307 match(Set dst (DivF (SubF src2 src1) src3)); 10935 match(Set dst (DivF (SubF src2 src1) src3));
11308 10936
11309 format %{ "FLD $src2 ===MACRO4===\n\t" 10937 format %{ "FLD $src2 ===MACRO4===\n\t"
11310 "FSUB ST,$src1\n\t" 10938 "FSUB ST,$src1\n\t"
11311 "FDIV ST,$src3\n\t" 10939 "FDIV ST,$src3\n\t"
11312 "FSTP $dst" %} 10940 "FSTP $dst" %}
11313 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10941 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11314 ins_encode( Push_Reg_F(src2), 10942 ins_encode( Push_Reg_FPR(src2),
11315 subF_divF_encode(src1,src3), 10943 subFPR_divFPR_encode(src1,src3),
11316 Pop_Reg_F(dst) ); 10944 Pop_Reg_FPR(dst) );
11317 ins_pipe( fpu_reg_reg_reg_reg ); 10945 ins_pipe( fpu_reg_reg_reg_reg );
11318 %} 10946 %}
11319 10947
11320 // Spill to obtain 24-bit precision 10948 // Spill to obtain 24-bit precision
11321 instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{ 10949 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
11322 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10950 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11323 match(Set dst (DivF src1 src2)); 10951 match(Set dst (DivF src1 src2));
11324 10952
11325 format %{ "FDIV $dst,$src1,$src2" %} 10953 format %{ "FDIV $dst,$src1,$src2" %}
11326 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10954 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
11327 ins_encode( Push_Reg_F(src1), 10955 ins_encode( Push_Reg_FPR(src1),
11328 OpcReg_F(src2), 10956 OpcReg_FPR(src2),
11329 Pop_Mem_F(dst) ); 10957 Pop_Mem_FPR(dst) );
11330 ins_pipe( fpu_mem_reg_reg ); 10958 ins_pipe( fpu_mem_reg_reg );
11331 %} 10959 %}
11332 // 10960 //
11333 // This instruction does not round to 24-bits 10961 // This instruction does not round to 24-bits
11334 instruct divF_reg(regF dst, regF src) %{ 10962 instruct divFPR_reg(regFPR dst, regFPR src) %{
11335 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10963 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11336 match(Set dst (DivF dst src)); 10964 match(Set dst (DivF dst src));
11337 10965
11338 format %{ "FDIV $dst,$src" %} 10966 format %{ "FDIV $dst,$src" %}
11339 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10967 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11340 ins_encode( Push_Reg_F(src), 10968 ins_encode( Push_Reg_FPR(src),
11341 OpcP, RegOpc(dst) ); 10969 OpcP, RegOpc(dst) );
11342 ins_pipe( fpu_reg_reg ); 10970 ins_pipe( fpu_reg_reg );
11343 %} 10971 %}
11344 10972
11345 10973
11346 // Spill to obtain 24-bit precision 10974 // Spill to obtain 24-bit precision
11347 instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{ 10975 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
11348 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10976 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11349 match(Set dst (ModF src1 src2)); 10977 match(Set dst (ModF src1 src2));
11350 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS 10978 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
11351 10979
11352 format %{ "FMOD $dst,$src1,$src2" %} 10980 format %{ "FMOD $dst,$src1,$src2" %}
11353 ins_encode( Push_Reg_Mod_D(src1, src2), 10981 ins_encode( Push_Reg_Mod_DPR(src1, src2),
11354 emitModD(), 10982 emitModDPR(),
11355 Push_Result_Mod_D(src2), 10983 Push_Result_Mod_DPR(src2),
11356 Pop_Mem_F(dst)); 10984 Pop_Mem_FPR(dst));
11357 ins_pipe( pipe_slow ); 10985 ins_pipe( pipe_slow );
11358 %} 10986 %}
11359 // 10987 //
11360 // This instruction does not round to 24-bits 10988 // This instruction does not round to 24-bits
11361 instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{ 10989 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
11362 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10990 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11363 match(Set dst (ModF dst src)); 10991 match(Set dst (ModF dst src));
11364 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS 10992 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
11365 10993
11366 format %{ "FMOD $dst,$src" %} 10994 format %{ "FMOD $dst,$src" %}
11367 ins_encode(Push_Reg_Mod_D(dst, src), 10995 ins_encode(Push_Reg_Mod_DPR(dst, src),
11368 emitModD(), 10996 emitModDPR(),
11369 Push_Result_Mod_D(src), 10997 Push_Result_Mod_DPR(src),
11370 Pop_Reg_F(dst)); 10998 Pop_Reg_FPR(dst));
11371 ins_pipe( pipe_slow ); 10999 ins_pipe( pipe_slow );
11372 %} 11000 %}
11373 11001
11374 instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{ 11002 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
11375 predicate(UseSSE>=1); 11003 predicate(UseSSE>=1);
11376 match(Set dst (ModF src0 src1)); 11004 match(Set dst (ModF src0 src1));
11377 effect(KILL rax, KILL cr); 11005 effect(KILL rax, KILL cr);
11378 format %{ "SUB ESP,4\t # FMOD\n" 11006 format %{ "SUB ESP,4\t # FMOD\n"
11379 "\tMOVSS [ESP+0],$src1\n" 11007 "\tMOVSS [ESP+0],$src1\n"
11389 "\tMOVSS $dst,[ESP+0]\n" 11017 "\tMOVSS $dst,[ESP+0]\n"
11390 "\tADD ESP,4\n" 11018 "\tADD ESP,4\n"
11391 "\tFSTP ST0\t # Restore FPU Stack" 11019 "\tFSTP ST0\t # Restore FPU Stack"
11392 %} 11020 %}
11393 ins_cost(250); 11021 ins_cost(250);
11394 ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU); 11022 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
11395 ins_pipe( pipe_slow ); 11023 ins_pipe( pipe_slow );
11396 %} 11024 %}
11397 11025
11398 11026
11399 //----------Arithmetic Conversion Instructions--------------------------------- 11027 //----------Arithmetic Conversion Instructions---------------------------------
11400 // The conversions operations are all Alpha sorted. Please keep it that way! 11028 // The conversions operations are all Alpha sorted. Please keep it that way!
11401 11029
11402 instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{ 11030 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
11403 predicate(UseSSE==0); 11031 predicate(UseSSE==0);
11404 match(Set dst (RoundFloat src)); 11032 match(Set dst (RoundFloat src));
11405 ins_cost(125); 11033 ins_cost(125);
11406 format %{ "FST_S $dst,$src\t# F-round" %} 11034 format %{ "FST_S $dst,$src\t# F-round" %}
11407 ins_encode( Pop_Mem_Reg_F(dst, src) ); 11035 ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11408 ins_pipe( fpu_mem_reg ); 11036 ins_pipe( fpu_mem_reg );
11409 %} 11037 %}
11410 11038
11411 instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{ 11039 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
11412 predicate(UseSSE<=1); 11040 predicate(UseSSE<=1);
11413 match(Set dst (RoundDouble src)); 11041 match(Set dst (RoundDouble src));
11414 ins_cost(125); 11042 ins_cost(125);
11415 format %{ "FST_D $dst,$src\t# D-round" %} 11043 format %{ "FST_D $dst,$src\t# D-round" %}
11416 ins_encode( Pop_Mem_Reg_D(dst, src) ); 11044 ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11417 ins_pipe( fpu_mem_reg ); 11045 ins_pipe( fpu_mem_reg );
11418 %} 11046 %}
11419 11047
11420 // Force rounding to 24-bit precision and 6-bit exponent 11048 // Force rounding to 24-bit precision and 6-bit exponent
11421 instruct convD2F_reg(stackSlotF dst, regD src) %{ 11049 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
11422 predicate(UseSSE==0); 11050 predicate(UseSSE==0);
11423 match(Set dst (ConvD2F src)); 11051 match(Set dst (ConvD2F src));
11424 format %{ "FST_S $dst,$src\t# F-round" %} 11052 format %{ "FST_S $dst,$src\t# F-round" %}
11425 expand %{ 11053 expand %{
11426 roundFloat_mem_reg(dst,src); 11054 roundFloat_mem_reg(dst,src);
11427 %} 11055 %}
11428 %} 11056 %}
11429 11057
11430 // Force rounding to 24-bit precision and 6-bit exponent 11058 // Force rounding to 24-bit precision and 6-bit exponent
11431 instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{ 11059 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
11432 predicate(UseSSE==1); 11060 predicate(UseSSE==1);
11433 match(Set dst (ConvD2F src)); 11061 match(Set dst (ConvD2F src));
11434 effect( KILL cr ); 11062 effect( KILL cr );
11435 format %{ "SUB ESP,4\n\t" 11063 format %{ "SUB ESP,4\n\t"
11436 "FST_S [ESP],$src\t# F-round\n\t" 11064 "FST_S [ESP],$src\t# F-round\n\t"
11449 %} 11077 %}
11450 ins_pipe( pipe_slow ); 11078 ins_pipe( pipe_slow );
11451 %} 11079 %}
11452 11080
11453 // Force rounding double precision to single precision 11081 // Force rounding double precision to single precision
11454 instruct convXD2X_reg(regX dst, regXD src) %{ 11082 instruct convD2F_reg(regF dst, regD src) %{
11455 predicate(UseSSE>=2); 11083 predicate(UseSSE>=2);
11456 match(Set dst (ConvD2F src)); 11084 match(Set dst (ConvD2F src));
11457 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 11085 format %{ "CVTSD2SS $dst,$src\t# F-round" %}
11458 ins_encode %{ 11086 ins_encode %{
11459 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 11087 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
11460 %} 11088 %}
11461 ins_pipe( pipe_slow ); 11089 ins_pipe( pipe_slow );
11462 %} 11090 %}
11463 11091
11464 instruct convF2D_reg_reg(regD dst, regF src) %{ 11092 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
11465 predicate(UseSSE==0); 11093 predicate(UseSSE==0);
11466 match(Set dst (ConvF2D src)); 11094 match(Set dst (ConvF2D src));
11467 format %{ "FST_S $dst,$src\t# D-round" %} 11095 format %{ "FST_S $dst,$src\t# D-round" %}
11468 ins_encode( Pop_Reg_Reg_D(dst, src)); 11096 ins_encode( Pop_Reg_Reg_DPR(dst, src));
11469 ins_pipe( fpu_reg_reg ); 11097 ins_pipe( fpu_reg_reg );
11470 %} 11098 %}
11471 11099
11472 instruct convF2D_reg(stackSlotD dst, regF src) %{ 11100 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
11473 predicate(UseSSE==1); 11101 predicate(UseSSE==1);
11474 match(Set dst (ConvF2D src)); 11102 match(Set dst (ConvF2D src));
11475 format %{ "FST_D $dst,$src\t# D-round" %} 11103 format %{ "FST_D $dst,$src\t# D-round" %}
11476 expand %{ 11104 expand %{
11477 roundDouble_mem_reg(dst,src); 11105 roundDouble_mem_reg(dst,src);
11478 %} 11106 %}
11479 %} 11107 %}
11480 11108
11481 instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{ 11109 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
11482 predicate(UseSSE==1); 11110 predicate(UseSSE==1);
11483 match(Set dst (ConvF2D src)); 11111 match(Set dst (ConvF2D src));
11484 effect( KILL cr ); 11112 effect( KILL cr );
11485 format %{ "SUB ESP,4\n\t" 11113 format %{ "SUB ESP,4\n\t"
11486 "MOVSS [ESP] $src\n\t" 11114 "MOVSS [ESP] $src\n\t"
11495 __ fstp_d($dst$$reg); 11123 __ fstp_d($dst$$reg);
11496 %} 11124 %}
11497 ins_pipe( pipe_slow ); 11125 ins_pipe( pipe_slow );
11498 %} 11126 %}
11499 11127
11500 instruct convX2XD_reg(regXD dst, regX src) %{ 11128 instruct convF2D_reg(regD dst, regF src) %{
11501 predicate(UseSSE>=2); 11129 predicate(UseSSE>=2);
11502 match(Set dst (ConvF2D src)); 11130 match(Set dst (ConvF2D src));
11503 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 11131 format %{ "CVTSS2SD $dst,$src\t# D-round" %}
11504 ins_encode %{ 11132 ins_encode %{
11505 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 11133 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
11506 %} 11134 %}
11507 ins_pipe( pipe_slow ); 11135 ins_pipe( pipe_slow );
11508 %} 11136 %}
11509 11137
11510 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 11138 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
11511 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 11139 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
11512 predicate(UseSSE<=1); 11140 predicate(UseSSE<=1);
11513 match(Set dst (ConvD2I src)); 11141 match(Set dst (ConvD2I src));
11514 effect( KILL tmp, KILL cr ); 11142 effect( KILL tmp, KILL cr );
11515 format %{ "FLD $src\t# Convert double to int \n\t" 11143 format %{ "FLD $src\t# Convert double to int \n\t"
11516 "FLDCW trunc mode\n\t" 11144 "FLDCW trunc mode\n\t"
11521 "CMP EAX,0x80000000\n\t" 11149 "CMP EAX,0x80000000\n\t"
11522 "JNE,s fast\n\t" 11150 "JNE,s fast\n\t"
11523 "FLD_D $src\n\t" 11151 "FLD_D $src\n\t"
11524 "CALL d2i_wrapper\n" 11152 "CALL d2i_wrapper\n"
11525 "fast:" %} 11153 "fast:" %}
11526 ins_encode( Push_Reg_D(src), D2I_encoding(src) ); 11154 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
11527 ins_pipe( pipe_slow ); 11155 ins_pipe( pipe_slow );
11528 %} 11156 %}
11529 11157
11530 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 11158 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
11531 instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{ 11159 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
11532 predicate(UseSSE>=2); 11160 predicate(UseSSE>=2);
11533 match(Set dst (ConvD2I src)); 11161 match(Set dst (ConvD2I src));
11534 effect( KILL tmp, KILL cr ); 11162 effect( KILL tmp, KILL cr );
11535 format %{ "CVTTSD2SI $dst, $src\n\t" 11163 format %{ "CVTTSD2SI $dst, $src\n\t"
11536 "CMP $dst,0x80000000\n\t" 11164 "CMP $dst,0x80000000\n\t"
11554 __ bind(fast); 11182 __ bind(fast);
11555 %} 11183 %}
11556 ins_pipe( pipe_slow ); 11184 ins_pipe( pipe_slow );
11557 %} 11185 %}
11558 11186
11559 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 11187 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
11560 predicate(UseSSE<=1); 11188 predicate(UseSSE<=1);
11561 match(Set dst (ConvD2L src)); 11189 match(Set dst (ConvD2L src));
11562 effect( KILL cr ); 11190 effect( KILL cr );
11563 format %{ "FLD $src\t# Convert double to long\n\t" 11191 format %{ "FLD $src\t# Convert double to long\n\t"
11564 "FLDCW trunc mode\n\t" 11192 "FLDCW trunc mode\n\t"
11572 "TEST EAX,EAX\n\t" 11200 "TEST EAX,EAX\n\t"
11573 "JNE,s fast\n\t" 11201 "JNE,s fast\n\t"
11574 "FLD $src\n\t" 11202 "FLD $src\n\t"
11575 "CALL d2l_wrapper\n" 11203 "CALL d2l_wrapper\n"
11576 "fast:" %} 11204 "fast:" %}
11577 ins_encode( Push_Reg_D(src), D2L_encoding(src) ); 11205 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) );
11578 ins_pipe( pipe_slow ); 11206 ins_pipe( pipe_slow );
11579 %} 11207 %}
11580 11208
11581 // XMM lacks a float/double->long conversion, so use the old FPU stack. 11209 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11582 instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{ 11210 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
11583 predicate (UseSSE>=2); 11211 predicate (UseSSE>=2);
11584 match(Set dst (ConvD2L src)); 11212 match(Set dst (ConvD2L src));
11585 effect( KILL cr ); 11213 effect( KILL cr );
11586 format %{ "SUB ESP,8\t# Convert double to long\n\t" 11214 format %{ "SUB ESP,8\t# Convert double to long\n\t"
11587 "MOVSD [ESP],$src\n\t" 11215 "MOVSD [ESP],$src\n\t"
11635 // manglations in the corner cases. So we set the rounding mode to 11263 // manglations in the corner cases. So we set the rounding mode to
11636 // 'zero', store the darned double down as an int, and reset the 11264 // 'zero', store the darned double down as an int, and reset the
11637 // rounding mode to 'nearest'. The hardware stores a flag value down 11265 // rounding mode to 'nearest'. The hardware stores a flag value down
11638 // if we would overflow or converted a NAN; we check for this and 11266 // if we would overflow or converted a NAN; we check for this and
11639 // and go the slow path if needed. 11267 // and go the slow path if needed.
11640 instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 11268 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
11641 predicate(UseSSE==0); 11269 predicate(UseSSE==0);
11642 match(Set dst (ConvF2I src)); 11270 match(Set dst (ConvF2I src));
11643 effect( KILL tmp, KILL cr ); 11271 effect( KILL tmp, KILL cr );
11644 format %{ "FLD $src\t# Convert float to int \n\t" 11272 format %{ "FLD $src\t# Convert float to int \n\t"
11645 "FLDCW trunc mode\n\t" 11273 "FLDCW trunc mode\n\t"
11650 "CMP EAX,0x80000000\n\t" 11278 "CMP EAX,0x80000000\n\t"
11651 "JNE,s fast\n\t" 11279 "JNE,s fast\n\t"
11652 "FLD $src\n\t" 11280 "FLD $src\n\t"
11653 "CALL d2i_wrapper\n" 11281 "CALL d2i_wrapper\n"
11654 "fast:" %} 11282 "fast:" %}
11655 // D2I_encoding works for F2I 11283 // DPR2I_encoding works for FPR2I
11656 ins_encode( Push_Reg_F(src), D2I_encoding(src) ); 11284 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
11657 ins_pipe( pipe_slow ); 11285 ins_pipe( pipe_slow );
11658 %} 11286 %}
11659 11287
11660 // Convert a float in xmm to an int reg. 11288 // Convert a float in xmm to an int reg.
11661 instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{ 11289 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11662 predicate(UseSSE>=1); 11290 predicate(UseSSE>=1);
11663 match(Set dst (ConvF2I src)); 11291 match(Set dst (ConvF2I src));
11664 effect( KILL tmp, KILL cr ); 11292 effect( KILL tmp, KILL cr );
11665 format %{ "CVTTSS2SI $dst, $src\n\t" 11293 format %{ "CVTTSS2SI $dst, $src\n\t"
11666 "CMP $dst,0x80000000\n\t" 11294 "CMP $dst,0x80000000\n\t"
11684 __ bind(fast); 11312 __ bind(fast);
11685 %} 11313 %}
11686 ins_pipe( pipe_slow ); 11314 ins_pipe( pipe_slow );
11687 %} 11315 %}
11688 11316
11689 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 11317 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
11690 predicate(UseSSE==0); 11318 predicate(UseSSE==0);
11691 match(Set dst (ConvF2L src)); 11319 match(Set dst (ConvF2L src));
11692 effect( KILL cr ); 11320 effect( KILL cr );
11693 format %{ "FLD $src\t# Convert float to long\n\t" 11321 format %{ "FLD $src\t# Convert float to long\n\t"
11694 "FLDCW trunc mode\n\t" 11322 "FLDCW trunc mode\n\t"
11702 "TEST EAX,EAX\n\t" 11330 "TEST EAX,EAX\n\t"
11703 "JNE,s fast\n\t" 11331 "JNE,s fast\n\t"
11704 "FLD $src\n\t" 11332 "FLD $src\n\t"
11705 "CALL d2l_wrapper\n" 11333 "CALL d2l_wrapper\n"
11706 "fast:" %} 11334 "fast:" %}
11707 // D2L_encoding works for F2L 11335 // DPR2L_encoding works for FPR2L
11708 ins_encode( Push_Reg_F(src), D2L_encoding(src) ); 11336 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
11709 ins_pipe( pipe_slow ); 11337 ins_pipe( pipe_slow );
11710 %} 11338 %}
11711 11339
11712 // XMM lacks a float/double->long conversion, so use the old FPU stack. 11340 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11713 instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{ 11341 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11714 predicate (UseSSE>=1); 11342 predicate (UseSSE>=1);
11715 match(Set dst (ConvF2L src)); 11343 match(Set dst (ConvF2L src));
11716 effect( KILL cr ); 11344 effect( KILL cr );
11717 format %{ "SUB ESP,8\t# Convert float to long\n\t" 11345 format %{ "SUB ESP,8\t# Convert float to long\n\t"
11718 "MOVSS [ESP],$src\n\t" 11346 "MOVSS [ESP],$src\n\t"
11760 __ bind(fast); 11388 __ bind(fast);
11761 %} 11389 %}
11762 ins_pipe( pipe_slow ); 11390 ins_pipe( pipe_slow );
11763 %} 11391 %}
11764 11392
11765 instruct convI2D_reg(regD dst, stackSlotI src) %{ 11393 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11766 predicate( UseSSE<=1 ); 11394 predicate( UseSSE<=1 );
11767 match(Set dst (ConvI2D src)); 11395 match(Set dst (ConvI2D src));
11768 format %{ "FILD $src\n\t" 11396 format %{ "FILD $src\n\t"
11769 "FSTP $dst" %} 11397 "FSTP $dst" %}
11770 opcode(0xDB, 0x0); /* DB /0 */ 11398 opcode(0xDB, 0x0); /* DB /0 */
11771 ins_encode(Push_Mem_I(src), Pop_Reg_D(dst)); 11399 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11772 ins_pipe( fpu_reg_mem ); 11400 ins_pipe( fpu_reg_mem );
11773 %} 11401 %}
11774 11402
11775 instruct convI2XD_reg(regXD dst, eRegI src) %{ 11403 instruct convI2D_reg(regD dst, eRegI src) %{
11776 predicate( UseSSE>=2 && !UseXmmI2D ); 11404 predicate( UseSSE>=2 && !UseXmmI2D );
11777 match(Set dst (ConvI2D src)); 11405 match(Set dst (ConvI2D src));
11778 format %{ "CVTSI2SD $dst,$src" %} 11406 format %{ "CVTSI2SD $dst,$src" %}
11779 ins_encode %{ 11407 ins_encode %{
11780 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11408 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11781 %} 11409 %}
11782 ins_pipe( pipe_slow ); 11410 ins_pipe( pipe_slow );
11783 %} 11411 %}
11784 11412
11785 instruct convI2XD_mem(regXD dst, memory mem) %{ 11413 instruct convI2D_mem(regD dst, memory mem) %{
11786 predicate( UseSSE>=2 ); 11414 predicate( UseSSE>=2 );
11787 match(Set dst (ConvI2D (LoadI mem))); 11415 match(Set dst (ConvI2D (LoadI mem)));
11788 format %{ "CVTSI2SD $dst,$mem" %} 11416 format %{ "CVTSI2SD $dst,$mem" %}
11789 ins_encode %{ 11417 ins_encode %{
11790 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11418 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11791 %} 11419 %}
11792 ins_pipe( pipe_slow ); 11420 ins_pipe( pipe_slow );
11793 %} 11421 %}
11794 11422
11795 instruct convXI2XD_reg(regXD dst, eRegI src) 11423 instruct convXI2D_reg(regD dst, eRegI src)
11796 %{ 11424 %{
11797 predicate( UseSSE>=2 && UseXmmI2D ); 11425 predicate( UseSSE>=2 && UseXmmI2D );
11798 match(Set dst (ConvI2D src)); 11426 match(Set dst (ConvI2D src));
11799 11427
11800 format %{ "MOVD $dst,$src\n\t" 11428 format %{ "MOVD $dst,$src\n\t"
11804 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11432 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11805 %} 11433 %}
11806 ins_pipe(pipe_slow); // XXX 11434 ins_pipe(pipe_slow); // XXX
11807 %} 11435 %}
11808 11436
11809 instruct convI2D_mem(regD dst, memory mem) %{ 11437 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11810 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11438 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11811 match(Set dst (ConvI2D (LoadI mem))); 11439 match(Set dst (ConvI2D (LoadI mem)));
11812 format %{ "FILD $mem\n\t" 11440 format %{ "FILD $mem\n\t"
11813 "FSTP $dst" %} 11441 "FSTP $dst" %}
11814 opcode(0xDB); /* DB /0 */ 11442 opcode(0xDB); /* DB /0 */
11815 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11443 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11816 Pop_Reg_D(dst)); 11444 Pop_Reg_DPR(dst));
11817 ins_pipe( fpu_reg_mem ); 11445 ins_pipe( fpu_reg_mem );
11818 %} 11446 %}
11819 11447
11820 // Convert a byte to a float; no rounding step needed. 11448 // Convert a byte to a float; no rounding step needed.
11821 instruct conv24I2F_reg(regF dst, stackSlotI src) %{ 11449 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11822 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11450 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11823 match(Set dst (ConvI2F src)); 11451 match(Set dst (ConvI2F src));
11824 format %{ "FILD $src\n\t" 11452 format %{ "FILD $src\n\t"
11825 "FSTP $dst" %} 11453 "FSTP $dst" %}
11826 11454
11827 opcode(0xDB, 0x0); /* DB /0 */ 11455 opcode(0xDB, 0x0); /* DB /0 */
11828 ins_encode(Push_Mem_I(src), Pop_Reg_F(dst)); 11456 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11829 ins_pipe( fpu_reg_mem ); 11457 ins_pipe( fpu_reg_mem );
11830 %} 11458 %}
11831 11459
11832 // In 24-bit mode, force exponent rounding by storing back out 11460 // In 24-bit mode, force exponent rounding by storing back out
11833 instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{ 11461 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11834 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11462 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11835 match(Set dst (ConvI2F src)); 11463 match(Set dst (ConvI2F src));
11836 ins_cost(200); 11464 ins_cost(200);
11837 format %{ "FILD $src\n\t" 11465 format %{ "FILD $src\n\t"
11838 "FSTP_S $dst" %} 11466 "FSTP_S $dst" %}
11839 opcode(0xDB, 0x0); /* DB /0 */ 11467 opcode(0xDB, 0x0); /* DB /0 */
11840 ins_encode( Push_Mem_I(src), 11468 ins_encode( Push_Mem_I(src),
11841 Pop_Mem_F(dst)); 11469 Pop_Mem_FPR(dst));
11842 ins_pipe( fpu_mem_mem ); 11470 ins_pipe( fpu_mem_mem );
11843 %} 11471 %}
11844 11472
11845 // In 24-bit mode, force exponent rounding by storing back out 11473 // In 24-bit mode, force exponent rounding by storing back out
11846 instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{ 11474 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11847 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11475 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11848 match(Set dst (ConvI2F (LoadI mem))); 11476 match(Set dst (ConvI2F (LoadI mem)));
11849 ins_cost(200); 11477 ins_cost(200);
11850 format %{ "FILD $mem\n\t" 11478 format %{ "FILD $mem\n\t"
11851 "FSTP_S $dst" %} 11479 "FSTP_S $dst" %}
11852 opcode(0xDB); /* DB /0 */ 11480 opcode(0xDB); /* DB /0 */
11853 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11481 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11854 Pop_Mem_F(dst)); 11482 Pop_Mem_FPR(dst));
11855 ins_pipe( fpu_mem_mem ); 11483 ins_pipe( fpu_mem_mem );
11856 %} 11484 %}
11857 11485
11858 // This instruction does not round to 24-bits 11486 // This instruction does not round to 24-bits
11859 instruct convI2F_reg(regF dst, stackSlotI src) %{ 11487 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11860 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11488 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11861 match(Set dst (ConvI2F src)); 11489 match(Set dst (ConvI2F src));
11862 format %{ "FILD $src\n\t" 11490 format %{ "FILD $src\n\t"
11863 "FSTP $dst" %} 11491 "FSTP $dst" %}
11864 opcode(0xDB, 0x0); /* DB /0 */ 11492 opcode(0xDB, 0x0); /* DB /0 */
11865 ins_encode( Push_Mem_I(src), 11493 ins_encode( Push_Mem_I(src),
11866 Pop_Reg_F(dst)); 11494 Pop_Reg_FPR(dst));
11867 ins_pipe( fpu_reg_mem ); 11495 ins_pipe( fpu_reg_mem );
11868 %} 11496 %}
11869 11497
11870 // This instruction does not round to 24-bits 11498 // This instruction does not round to 24-bits
11871 instruct convI2F_mem(regF dst, memory mem) %{ 11499 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11872 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11500 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11873 match(Set dst (ConvI2F (LoadI mem))); 11501 match(Set dst (ConvI2F (LoadI mem)));
11874 format %{ "FILD $mem\n\t" 11502 format %{ "FILD $mem\n\t"
11875 "FSTP $dst" %} 11503 "FSTP $dst" %}
11876 opcode(0xDB); /* DB /0 */ 11504 opcode(0xDB); /* DB /0 */
11877 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11505 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11878 Pop_Reg_F(dst)); 11506 Pop_Reg_FPR(dst));
11879 ins_pipe( fpu_reg_mem ); 11507 ins_pipe( fpu_reg_mem );
11880 %} 11508 %}
11881 11509
11882 // Convert an int to a float in xmm; no rounding step needed. 11510 // Convert an int to a float in xmm; no rounding step needed.
11883 instruct convI2X_reg(regX dst, eRegI src) %{ 11511 instruct convI2F_reg(regF dst, eRegI src) %{
11884 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11512 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11885 match(Set dst (ConvI2F src)); 11513 match(Set dst (ConvI2F src));
11886 format %{ "CVTSI2SS $dst, $src" %} 11514 format %{ "CVTSI2SS $dst, $src" %}
11887 ins_encode %{ 11515 ins_encode %{
11888 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11516 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11889 %} 11517 %}
11890 ins_pipe( pipe_slow ); 11518 ins_pipe( pipe_slow );
11891 %} 11519 %}
11892 11520
11893 instruct convXI2X_reg(regX dst, eRegI src) 11521 instruct convXI2F_reg(regF dst, eRegI src)
11894 %{ 11522 %{
11895 predicate( UseSSE>=2 && UseXmmI2F ); 11523 predicate( UseSSE>=2 && UseXmmI2F );
11896 match(Set dst (ConvI2F src)); 11524 match(Set dst (ConvI2F src));
11897 11525
11898 format %{ "MOVD $dst,$src\n\t" 11526 format %{ "MOVD $dst,$src\n\t"
11937 opcode(0x33); // XOR 11565 opcode(0x33); // XOR
11938 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11566 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11939 ins_pipe( ialu_reg_reg_long ); 11567 ins_pipe( ialu_reg_reg_long );
11940 %} 11568 %}
11941 11569
11942 instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11570 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11943 predicate (UseSSE<=1); 11571 predicate (UseSSE<=1);
11944 match(Set dst (ConvL2D src)); 11572 match(Set dst (ConvL2D src));
11945 effect( KILL cr ); 11573 effect( KILL cr );
11946 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11574 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
11947 "PUSH $src.lo\n\t" 11575 "PUSH $src.lo\n\t"
11948 "FILD ST,[ESP + #0]\n\t" 11576 "FILD ST,[ESP + #0]\n\t"
11949 "ADD ESP,8\n\t" 11577 "ADD ESP,8\n\t"
11950 "FSTP_D $dst\t# D-round" %} 11578 "FSTP_D $dst\t# D-round" %}
11951 opcode(0xDF, 0x5); /* DF /5 */ 11579 opcode(0xDF, 0x5); /* DF /5 */
11952 ins_encode(convert_long_double(src), Pop_Mem_D(dst)); 11580 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11953 ins_pipe( pipe_slow ); 11581 ins_pipe( pipe_slow );
11954 %} 11582 %}
11955 11583
11956 instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{ 11584 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11957 predicate (UseSSE>=2); 11585 predicate (UseSSE>=2);
11958 match(Set dst (ConvL2D src)); 11586 match(Set dst (ConvL2D src));
11959 effect( KILL cr ); 11587 effect( KILL cr );
11960 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11588 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
11961 "PUSH $src.lo\n\t" 11589 "PUSH $src.lo\n\t"
11962 "FILD_D [ESP]\n\t" 11590 "FILD_D [ESP]\n\t"
11963 "FSTP_D [ESP]\n\t" 11591 "FSTP_D [ESP]\n\t"
11964 "MOVSD $dst,[ESP]\n\t" 11592 "MOVSD $dst,[ESP]\n\t"
11965 "ADD ESP,8" %} 11593 "ADD ESP,8" %}
11966 opcode(0xDF, 0x5); /* DF /5 */ 11594 opcode(0xDF, 0x5); /* DF /5 */
11967 ins_encode(convert_long_double2(src), Push_ResultXD(dst)); 11595 ins_encode(convert_long_double2(src), Push_ResultD(dst));
11968 ins_pipe( pipe_slow ); 11596 ins_pipe( pipe_slow );
11969 %} 11597 %}
11970 11598
11971 instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{ 11599 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11972 predicate (UseSSE>=1); 11600 predicate (UseSSE>=1);
11973 match(Set dst (ConvL2F src)); 11601 match(Set dst (ConvL2F src));
11974 effect( KILL cr ); 11602 effect( KILL cr );
11975 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11603 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
11976 "PUSH $src.lo\n\t" 11604 "PUSH $src.lo\n\t"
11977 "FILD_D [ESP]\n\t" 11605 "FILD_D [ESP]\n\t"
11978 "FSTP_S [ESP]\n\t" 11606 "FSTP_S [ESP]\n\t"
11979 "MOVSS $dst,[ESP]\n\t" 11607 "MOVSS $dst,[ESP]\n\t"
11980 "ADD ESP,8" %} 11608 "ADD ESP,8" %}
11981 opcode(0xDF, 0x5); /* DF /5 */ 11609 opcode(0xDF, 0x5); /* DF /5 */
11982 ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8)); 11610 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11983 ins_pipe( pipe_slow ); 11611 ins_pipe( pipe_slow );
11984 %} 11612 %}
11985 11613
11986 instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11614 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11987 match(Set dst (ConvL2F src)); 11615 match(Set dst (ConvL2F src));
11988 effect( KILL cr ); 11616 effect( KILL cr );
11989 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11617 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
11990 "PUSH $src.lo\n\t" 11618 "PUSH $src.lo\n\t"
11991 "FILD ST,[ESP + #0]\n\t" 11619 "FILD ST,[ESP + #0]\n\t"
11992 "ADD ESP,8\n\t" 11620 "ADD ESP,8\n\t"
11993 "FSTP_S $dst\t# F-round" %} 11621 "FSTP_S $dst\t# F-round" %}
11994 opcode(0xDF, 0x5); /* DF /5 */ 11622 opcode(0xDF, 0x5); /* DF /5 */
11995 ins_encode(convert_long_double(src), Pop_Mem_F(dst)); 11623 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11996 ins_pipe( pipe_slow ); 11624 ins_pipe( pipe_slow );
11997 %} 11625 %}
11998 11626
11999 instruct convL2I_reg( eRegI dst, eRegL src ) %{ 11627 instruct convL2I_reg( eRegI dst, eRegL src ) %{
12000 match(Set dst (ConvL2I src)); 11628 match(Set dst (ConvL2I src));
12014 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11642 __ movl($dst$$Register, Address(rsp, $src$$disp));
12015 %} 11643 %}
12016 ins_pipe( ialu_reg_mem ); 11644 ins_pipe( ialu_reg_mem );
12017 %} 11645 %}
12018 11646
12019 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{ 11647 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
12020 predicate(UseSSE==0); 11648 predicate(UseSSE==0);
12021 match(Set dst (MoveF2I src)); 11649 match(Set dst (MoveF2I src));
12022 effect( DEF dst, USE src ); 11650 effect( DEF dst, USE src );
12023 11651
12024 ins_cost(125); 11652 ins_cost(125);
12025 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11653 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %}
12026 ins_encode( Pop_Mem_Reg_F(dst, src) ); 11654 ins_encode( Pop_Mem_Reg_FPR(dst, src) );
12027 ins_pipe( fpu_mem_reg ); 11655 ins_pipe( fpu_mem_reg );
12028 %} 11656 %}
12029 11657
12030 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{ 11658 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
12031 predicate(UseSSE>=1); 11659 predicate(UseSSE>=1);
12032 match(Set dst (MoveF2I src)); 11660 match(Set dst (MoveF2I src));
12033 effect( DEF dst, USE src ); 11661 effect( DEF dst, USE src );
12034 11662
12035 ins_cost(95); 11663 ins_cost(95);
12038 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11666 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
12039 %} 11667 %}
12040 ins_pipe( pipe_slow ); 11668 ins_pipe( pipe_slow );
12041 %} 11669 %}
12042 11670
12043 instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{ 11671 instruct MoveF2I_reg_reg_sse(eRegI dst, regF src) %{
12044 predicate(UseSSE>=2); 11672 predicate(UseSSE>=2);
12045 match(Set dst (MoveF2I src)); 11673 match(Set dst (MoveF2I src));
12046 effect( DEF dst, USE src ); 11674 effect( DEF dst, USE src );
12047 ins_cost(85); 11675 ins_cost(85);
12048 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11676 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %}
12063 %} 11691 %}
12064 ins_pipe( ialu_mem_reg ); 11692 ins_pipe( ialu_mem_reg );
12065 %} 11693 %}
12066 11694
12067 11695
12068 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{ 11696 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
12069 predicate(UseSSE==0); 11697 predicate(UseSSE==0);
12070 match(Set dst (MoveI2F src)); 11698 match(Set dst (MoveI2F src));
12071 effect(DEF dst, USE src); 11699 effect(DEF dst, USE src);
12072 11700
12073 ins_cost(125); 11701 ins_cost(125);
12074 format %{ "FLD_S $src\n\t" 11702 format %{ "FLD_S $src\n\t"
12075 "FSTP $dst\t# MoveI2F_stack_reg" %} 11703 "FSTP $dst\t# MoveI2F_stack_reg" %}
12076 opcode(0xD9); /* D9 /0, FLD m32real */ 11704 opcode(0xD9); /* D9 /0, FLD m32real */
12077 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11705 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12078 Pop_Reg_F(dst) ); 11706 Pop_Reg_FPR(dst) );
12079 ins_pipe( fpu_reg_mem ); 11707 ins_pipe( fpu_reg_mem );
12080 %} 11708 %}
12081 11709
12082 instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{ 11710 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
12083 predicate(UseSSE>=1); 11711 predicate(UseSSE>=1);
12084 match(Set dst (MoveI2F src)); 11712 match(Set dst (MoveI2F src));
12085 effect( DEF dst, USE src ); 11713 effect( DEF dst, USE src );
12086 11714
12087 ins_cost(95); 11715 ins_cost(95);
12090 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11718 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
12091 %} 11719 %}
12092 ins_pipe( pipe_slow ); 11720 ins_pipe( pipe_slow );
12093 %} 11721 %}
12094 11722
12095 instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{ 11723 instruct MoveI2F_reg_reg_sse(regF dst, eRegI src) %{
12096 predicate(UseSSE>=2); 11724 predicate(UseSSE>=2);
12097 match(Set dst (MoveI2F src)); 11725 match(Set dst (MoveI2F src));
12098 effect( DEF dst, USE src ); 11726 effect( DEF dst, USE src );
12099 11727
12100 ins_cost(85); 11728 ins_cost(85);
12115 opcode(0x8B, 0x8B); 11743 opcode(0x8B, 0x8B);
12116 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11744 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
12117 ins_pipe( ialu_mem_long_reg ); 11745 ins_pipe( ialu_mem_long_reg );
12118 %} 11746 %}
12119 11747
12120 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{ 11748 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
12121 predicate(UseSSE<=1); 11749 predicate(UseSSE<=1);
12122 match(Set dst (MoveD2L src)); 11750 match(Set dst (MoveD2L src));
12123 effect(DEF dst, USE src); 11751 effect(DEF dst, USE src);
12124 11752
12125 ins_cost(125); 11753 ins_cost(125);
12126 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11754 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %}
12127 ins_encode( Pop_Mem_Reg_D(dst, src) ); 11755 ins_encode( Pop_Mem_Reg_DPR(dst, src) );
12128 ins_pipe( fpu_mem_reg ); 11756 ins_pipe( fpu_mem_reg );
12129 %} 11757 %}
12130 11758
12131 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{ 11759 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
12132 predicate(UseSSE>=2); 11760 predicate(UseSSE>=2);
12133 match(Set dst (MoveD2L src)); 11761 match(Set dst (MoveD2L src));
12134 effect(DEF dst, USE src); 11762 effect(DEF dst, USE src);
12135 ins_cost(95); 11763 ins_cost(95);
12136 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11764 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %}
12138 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11766 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
12139 %} 11767 %}
12140 ins_pipe( pipe_slow ); 11768 ins_pipe( pipe_slow );
12141 %} 11769 %}
12142 11770
12143 instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{ 11771 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
12144 predicate(UseSSE>=2); 11772 predicate(UseSSE>=2);
12145 match(Set dst (MoveD2L src)); 11773 match(Set dst (MoveD2L src));
12146 effect(DEF dst, USE src, TEMP tmp); 11774 effect(DEF dst, USE src, TEMP tmp);
12147 ins_cost(85); 11775 ins_cost(85);
12148 format %{ "MOVD $dst.lo,$src\n\t" 11776 format %{ "MOVD $dst.lo,$src\n\t"
12167 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11795 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
12168 ins_pipe( ialu_mem_long_reg ); 11796 ins_pipe( ialu_mem_long_reg );
12169 %} 11797 %}
12170 11798
12171 11799
12172 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{ 11800 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
12173 predicate(UseSSE<=1); 11801 predicate(UseSSE<=1);
12174 match(Set dst (MoveL2D src)); 11802 match(Set dst (MoveL2D src));
12175 effect(DEF dst, USE src); 11803 effect(DEF dst, USE src);
12176 ins_cost(125); 11804 ins_cost(125);
12177 11805
12178 format %{ "FLD_D $src\n\t" 11806 format %{ "FLD_D $src\n\t"
12179 "FSTP $dst\t# MoveL2D_stack_reg" %} 11807 "FSTP $dst\t# MoveL2D_stack_reg" %}
12180 opcode(0xDD); /* DD /0, FLD m64real */ 11808 opcode(0xDD); /* DD /0, FLD m64real */
12181 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11809 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12182 Pop_Reg_D(dst) ); 11810 Pop_Reg_DPR(dst) );
12183 ins_pipe( fpu_reg_mem ); 11811 ins_pipe( fpu_reg_mem );
12184 %} 11812 %}
12185 11813
12186 11814
12187 instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{ 11815 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
12188 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11816 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
12189 match(Set dst (MoveL2D src)); 11817 match(Set dst (MoveL2D src));
12190 effect(DEF dst, USE src); 11818 effect(DEF dst, USE src);
12191 11819
12192 ins_cost(95); 11820 ins_cost(95);
12195 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11823 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
12196 %} 11824 %}
12197 ins_pipe( pipe_slow ); 11825 ins_pipe( pipe_slow );
12198 %} 11826 %}
12199 11827
12200 instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{ 11828 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
12201 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11829 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
12202 match(Set dst (MoveL2D src)); 11830 match(Set dst (MoveL2D src));
12203 effect(DEF dst, USE src); 11831 effect(DEF dst, USE src);
12204 11832
12205 ins_cost(95); 11833 ins_cost(95);
12208 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11836 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
12209 %} 11837 %}
12210 ins_pipe( pipe_slow ); 11838 ins_pipe( pipe_slow );
12211 %} 11839 %}
12212 11840
12213 instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{ 11841 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
12214 predicate(UseSSE>=2); 11842 predicate(UseSSE>=2);
12215 match(Set dst (MoveL2D src)); 11843 match(Set dst (MoveL2D src));
12216 effect(TEMP dst, USE src, TEMP tmp); 11844 effect(TEMP dst, USE src, TEMP tmp);
12217 ins_cost(85); 11845 ins_cost(85);
12218 format %{ "MOVD $dst,$src.lo\n\t" 11846 format %{ "MOVD $dst,$src.lo\n\t"
12225 %} 11853 %}
12226 ins_pipe( pipe_slow ); 11854 ins_pipe( pipe_slow );
12227 %} 11855 %}
12228 11856
12229 // Replicate scalar to packed byte (1 byte) values in xmm 11857 // Replicate scalar to packed byte (1 byte) values in xmm
12230 instruct Repl8B_reg(regXD dst, regXD src) %{ 11858 instruct Repl8B_reg(regD dst, regD src) %{
12231 predicate(UseSSE>=2); 11859 predicate(UseSSE>=2);
12232 match(Set dst (Replicate8B src)); 11860 match(Set dst (Replicate8B src));
12233 format %{ "MOVDQA $dst,$src\n\t" 11861 format %{ "MOVDQA $dst,$src\n\t"
12234 "PUNPCKLBW $dst,$dst\n\t" 11862 "PUNPCKLBW $dst,$dst\n\t"
12235 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} 11863 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12242 %} 11870 %}
12243 ins_pipe( pipe_slow ); 11871 ins_pipe( pipe_slow );
12244 %} 11872 %}
12245 11873
12246 // Replicate scalar to packed byte (1 byte) values in xmm 11874 // Replicate scalar to packed byte (1 byte) values in xmm
12247 instruct Repl8B_eRegI(regXD dst, eRegI src) %{ 11875 instruct Repl8B_eRegI(regD dst, eRegI src) %{
12248 predicate(UseSSE>=2); 11876 predicate(UseSSE>=2);
12249 match(Set dst (Replicate8B src)); 11877 match(Set dst (Replicate8B src));
12250 format %{ "MOVD $dst,$src\n\t" 11878 format %{ "MOVD $dst,$src\n\t"
12251 "PUNPCKLBW $dst,$dst\n\t" 11879 "PUNPCKLBW $dst,$dst\n\t"
12252 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} 11880 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12257 %} 11885 %}
12258 ins_pipe( pipe_slow ); 11886 ins_pipe( pipe_slow );
12259 %} 11887 %}
12260 11888
12261 // Replicate scalar zero to packed byte (1 byte) values in xmm 11889 // Replicate scalar zero to packed byte (1 byte) values in xmm
12262 instruct Repl8B_immI0(regXD dst, immI0 zero) %{ 11890 instruct Repl8B_immI0(regD dst, immI0 zero) %{
12263 predicate(UseSSE>=2); 11891 predicate(UseSSE>=2);
12264 match(Set dst (Replicate8B zero)); 11892 match(Set dst (Replicate8B zero));
12265 format %{ "PXOR $dst,$dst\t! replicate8B" %} 11893 format %{ "PXOR $dst,$dst\t! replicate8B" %}
12266 ins_encode %{ 11894 ins_encode %{
12267 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 11895 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12268 %} 11896 %}
12269 ins_pipe( fpu_reg_reg ); 11897 ins_pipe( fpu_reg_reg );
12270 %} 11898 %}
12271 11899
12272 // Replicate scalar to packed shore (2 byte) values in xmm 11900 // Replicate scalar to packed shore (2 byte) values in xmm
12273 instruct Repl4S_reg(regXD dst, regXD src) %{ 11901 instruct Repl4S_reg(regD dst, regD src) %{
12274 predicate(UseSSE>=2); 11902 predicate(UseSSE>=2);
12275 match(Set dst (Replicate4S src)); 11903 match(Set dst (Replicate4S src));
12276 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %} 11904 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
12277 ins_encode %{ 11905 ins_encode %{
12278 __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00); 11906 __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
12279 %} 11907 %}
12280 ins_pipe( fpu_reg_reg ); 11908 ins_pipe( fpu_reg_reg );
12281 %} 11909 %}
12282 11910
12283 // Replicate scalar to packed shore (2 byte) values in xmm 11911 // Replicate scalar to packed shore (2 byte) values in xmm
12284 instruct Repl4S_eRegI(regXD dst, eRegI src) %{ 11912 instruct Repl4S_eRegI(regD dst, eRegI src) %{
12285 predicate(UseSSE>=2); 11913 predicate(UseSSE>=2);
12286 match(Set dst (Replicate4S src)); 11914 match(Set dst (Replicate4S src));
12287 format %{ "MOVD $dst,$src\n\t" 11915 format %{ "MOVD $dst,$src\n\t"
12288 "PSHUFLW $dst,$dst,0x00\t! replicate4S" %} 11916 "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
12289 ins_encode %{ 11917 ins_encode %{
12292 %} 11920 %}
12293 ins_pipe( fpu_reg_reg ); 11921 ins_pipe( fpu_reg_reg );
12294 %} 11922 %}
12295 11923
12296 // Replicate scalar zero to packed short (2 byte) values in xmm 11924 // Replicate scalar zero to packed short (2 byte) values in xmm
12297 instruct Repl4S_immI0(regXD dst, immI0 zero) %{ 11925 instruct Repl4S_immI0(regD dst, immI0 zero) %{
12298 predicate(UseSSE>=2); 11926 predicate(UseSSE>=2);
12299 match(Set dst (Replicate4S zero)); 11927 match(Set dst (Replicate4S zero));
12300 format %{ "PXOR $dst,$dst\t! replicate4S" %} 11928 format %{ "PXOR $dst,$dst\t! replicate4S" %}
12301 ins_encode %{ 11929 ins_encode %{
12302 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 11930 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12303 %} 11931 %}
12304 ins_pipe( fpu_reg_reg ); 11932 ins_pipe( fpu_reg_reg );
12305 %} 11933 %}
12306 11934
12307 // Replicate scalar to packed char (2 byte) values in xmm 11935 // Replicate scalar to packed char (2 byte) values in xmm
12308 instruct Repl4C_reg(regXD dst, regXD src) %{ 11936 instruct Repl4C_reg(regD dst, regD src) %{
12309 predicate(UseSSE>=2); 11937 predicate(UseSSE>=2);
12310 match(Set dst (Replicate4C src)); 11938 match(Set dst (Replicate4C src));
12311 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %} 11939 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
12312 ins_encode %{ 11940 ins_encode %{
12313 __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00); 11941 __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
12314 %} 11942 %}
12315 ins_pipe( fpu_reg_reg ); 11943 ins_pipe( fpu_reg_reg );
12316 %} 11944 %}
12317 11945
12318 // Replicate scalar to packed char (2 byte) values in xmm 11946 // Replicate scalar to packed char (2 byte) values in xmm
12319 instruct Repl4C_eRegI(regXD dst, eRegI src) %{ 11947 instruct Repl4C_eRegI(regD dst, eRegI src) %{
12320 predicate(UseSSE>=2); 11948 predicate(UseSSE>=2);
12321 match(Set dst (Replicate4C src)); 11949 match(Set dst (Replicate4C src));
12322 format %{ "MOVD $dst,$src\n\t" 11950 format %{ "MOVD $dst,$src\n\t"
12323 "PSHUFLW $dst,$dst,0x00\t! replicate4C" %} 11951 "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
12324 ins_encode %{ 11952 ins_encode %{
12327 %} 11955 %}
12328 ins_pipe( fpu_reg_reg ); 11956 ins_pipe( fpu_reg_reg );
12329 %} 11957 %}
12330 11958
12331 // Replicate scalar zero to packed char (2 byte) values in xmm 11959 // Replicate scalar zero to packed char (2 byte) values in xmm
12332 instruct Repl4C_immI0(regXD dst, immI0 zero) %{ 11960 instruct Repl4C_immI0(regD dst, immI0 zero) %{
12333 predicate(UseSSE>=2); 11961 predicate(UseSSE>=2);
12334 match(Set dst (Replicate4C zero)); 11962 match(Set dst (Replicate4C zero));
12335 format %{ "PXOR $dst,$dst\t! replicate4C" %} 11963 format %{ "PXOR $dst,$dst\t! replicate4C" %}
12336 ins_encode %{ 11964 ins_encode %{
12337 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 11965 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12338 %} 11966 %}
12339 ins_pipe( fpu_reg_reg ); 11967 ins_pipe( fpu_reg_reg );
12340 %} 11968 %}
12341 11969
12342 // Replicate scalar to packed integer (4 byte) values in xmm 11970 // Replicate scalar to packed integer (4 byte) values in xmm
12343 instruct Repl2I_reg(regXD dst, regXD src) %{ 11971 instruct Repl2I_reg(regD dst, regD src) %{
12344 predicate(UseSSE>=2); 11972 predicate(UseSSE>=2);
12345 match(Set dst (Replicate2I src)); 11973 match(Set dst (Replicate2I src));
12346 format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %} 11974 format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
12347 ins_encode %{ 11975 ins_encode %{
12348 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 11976 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
12349 %} 11977 %}
12350 ins_pipe( fpu_reg_reg ); 11978 ins_pipe( fpu_reg_reg );
12351 %} 11979 %}
12352 11980
12353 // Replicate scalar to packed integer (4 byte) values in xmm 11981 // Replicate scalar to packed integer (4 byte) values in xmm
12354 instruct Repl2I_eRegI(regXD dst, eRegI src) %{ 11982 instruct Repl2I_eRegI(regD dst, eRegI src) %{
12355 predicate(UseSSE>=2); 11983 predicate(UseSSE>=2);
12356 match(Set dst (Replicate2I src)); 11984 match(Set dst (Replicate2I src));
12357 format %{ "MOVD $dst,$src\n\t" 11985 format %{ "MOVD $dst,$src\n\t"
12358 "PSHUFD $dst,$dst,0x00\t! replicate2I" %} 11986 "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
12359 ins_encode %{ 11987 ins_encode %{
12362 %} 11990 %}
12363 ins_pipe( fpu_reg_reg ); 11991 ins_pipe( fpu_reg_reg );
12364 %} 11992 %}
12365 11993
12366 // Replicate scalar zero to packed integer (2 byte) values in xmm 11994 // Replicate scalar zero to packed integer (2 byte) values in xmm
12367 instruct Repl2I_immI0(regXD dst, immI0 zero) %{ 11995 instruct Repl2I_immI0(regD dst, immI0 zero) %{
12368 predicate(UseSSE>=2); 11996 predicate(UseSSE>=2);
12369 match(Set dst (Replicate2I zero)); 11997 match(Set dst (Replicate2I zero));
12370 format %{ "PXOR $dst,$dst\t! replicate2I" %} 11998 format %{ "PXOR $dst,$dst\t! replicate2I" %}
12371 ins_encode %{ 11999 ins_encode %{
12372 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 12000 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12373 %} 12001 %}
12374 ins_pipe( fpu_reg_reg ); 12002 ins_pipe( fpu_reg_reg );
12375 %} 12003 %}
12376 12004
12377 // Replicate scalar to packed single precision floating point values in xmm 12005 // Replicate scalar to packed single precision floating point values in xmm
12378 instruct Repl2F_reg(regXD dst, regXD src) %{ 12006 instruct Repl2F_reg(regD dst, regD src) %{
12379 predicate(UseSSE>=2); 12007 predicate(UseSSE>=2);
12380 match(Set dst (Replicate2F src)); 12008 match(Set dst (Replicate2F src));
12381 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} 12009 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12382 ins_encode %{ 12010 ins_encode %{
12383 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0); 12011 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
12384 %} 12012 %}
12385 ins_pipe( fpu_reg_reg ); 12013 ins_pipe( fpu_reg_reg );
12386 %} 12014 %}
12387 12015
12388 // Replicate scalar to packed single precision floating point values in xmm 12016 // Replicate scalar to packed single precision floating point values in xmm
12389 instruct Repl2F_regX(regXD dst, regX src) %{ 12017 instruct Repl2F_regF(regD dst, regF src) %{
12390 predicate(UseSSE>=2); 12018 predicate(UseSSE>=2);
12391 match(Set dst (Replicate2F src)); 12019 match(Set dst (Replicate2F src));
12392 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} 12020 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12393 ins_encode %{ 12021 ins_encode %{
12394 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0); 12022 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
12395 %} 12023 %}
12396 ins_pipe( fpu_reg_reg ); 12024 ins_pipe( fpu_reg_reg );
12397 %} 12025 %}
12398 12026
12399 // Replicate scalar to packed single precision floating point values in xmm 12027 // Replicate scalar to packed single precision floating point values in xmm
12400 instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{ 12028 instruct Repl2F_immF0(regD dst, immF0 zero) %{
12401 predicate(UseSSE>=2); 12029 predicate(UseSSE>=2);
12402 match(Set dst (Replicate2F zero)); 12030 match(Set dst (Replicate2F zero));
12403 format %{ "PXOR $dst,$dst\t! replicate2F" %} 12031 format %{ "PXOR $dst,$dst\t! replicate2F" %}
12404 ins_encode %{ 12032 ins_encode %{
12405 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 12033 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12421 Opcode(0xF3), Opcode(0xAB) ); 12049 Opcode(0xF3), Opcode(0xAB) );
12422 ins_pipe( pipe_slow ); 12050 ins_pipe( pipe_slow );
12423 %} 12051 %}
12424 12052
12425 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 12053 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
12426 eAXRegI result, regXD tmp1, eFlagsReg cr) %{ 12054 eAXRegI result, regD tmp1, eFlagsReg cr) %{
12427 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 12055 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12428 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 12056 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12429 12057
12430 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 12058 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
12431 ins_encode %{ 12059 ins_encode %{
12436 ins_pipe( pipe_slow ); 12064 ins_pipe( pipe_slow );
12437 %} 12065 %}
12438 12066
12439 // fast string equals 12067 // fast string equals
12440 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 12068 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
12441 regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 12069 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
12442 match(Set result (StrEquals (Binary str1 str2) cnt)); 12070 match(Set result (StrEquals (Binary str1 str2) cnt));
12443 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 12071 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
12444 12072
12445 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12073 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
12446 ins_encode %{ 12074 ins_encode %{
12451 ins_pipe( pipe_slow ); 12079 ins_pipe( pipe_slow );
12452 %} 12080 %}
12453 12081
12454 // fast search of substring with known size. 12082 // fast search of substring with known size.
12455 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 12083 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
12456 eBXRegI result, regXD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 12084 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
12457 predicate(UseSSE42Intrinsics); 12085 predicate(UseSSE42Intrinsics);
12458 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 12086 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12459 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 12087 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12460 12088
12461 format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 12089 format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %}
12478 %} 12106 %}
12479 ins_pipe( pipe_slow ); 12107 ins_pipe( pipe_slow );
12480 %} 12108 %}
12481 12109
12482 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12110 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12483 eBXRegI result, regXD vec, eCXRegI tmp, eFlagsReg cr) %{ 12111 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
12484 predicate(UseSSE42Intrinsics); 12112 predicate(UseSSE42Intrinsics);
12485 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12113 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12486 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12114 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12487 12115
12488 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12116 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
12495 ins_pipe( pipe_slow ); 12123 ins_pipe( pipe_slow );
12496 %} 12124 %}
12497 12125
12498 // fast array equals 12126 // fast array equals
12499 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12127 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12500 regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12128 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12501 %{ 12129 %{
12502 match(Set result (AryEq ary1 ary2)); 12130 match(Set result (AryEq ary1 ary2));
12503 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12131 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12504 //ins_cost(300); 12132 //ins_cost(300);
12505 12133
13321 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12949 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13322 ins_pipe( pipe_cmov_reg ); 12950 ins_pipe( pipe_cmov_reg );
13323 %} 12951 %}
13324 12952
13325 // Compare 2 longs and CMOVE doubles 12953 // Compare 2 longs and CMOVE doubles
13326 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12954 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13327 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12955 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13328 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12956 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13329 ins_cost(200); 12957 ins_cost(200);
13330 expand %{ 12958 expand %{
13331 fcmovD_regS(cmp,flags,dst,src); 12959 fcmovDPR_regS(cmp,flags,dst,src);
13332 %} 12960 %}
13333 %} 12961 %}
13334 12962
13335 // Compare 2 longs and CMOVE doubles 12963 // Compare 2 longs and CMOVE doubles
13336 instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{ 12964 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13337 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12965 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13338 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12966 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13339 ins_cost(200); 12967 ins_cost(200);
13340 expand %{ 12968 expand %{
13341 fcmovXD_regS(cmp,flags,dst,src); 12969 fcmovD_regS(cmp,flags,dst,src);
13342 %} 12970 %}
13343 %} 12971 %}
13344 12972
13345 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12973 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13346 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12974 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13347 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12975 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13348 ins_cost(200); 12976 ins_cost(200);
13349 expand %{ 12977 expand %{
13350 fcmovF_regS(cmp,flags,dst,src); 12978 fcmovFPR_regS(cmp,flags,dst,src);
13351 %} 12979 %}
13352 %} 12980 %}
13353 12981
13354 instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{ 12982 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13355 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12983 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13356 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12984 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13357 ins_cost(200); 12985 ins_cost(200);
13358 expand %{ 12986 expand %{
13359 fcmovX_regS(cmp,flags,dst,src); 12987 fcmovF_regS(cmp,flags,dst,src);
13360 %} 12988 %}
13361 %} 12989 %}
13362 12990
13363 //====== 12991 //======
13364 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12992 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
13449 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13077 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13450 ins_pipe( pipe_cmov_reg ); 13078 ins_pipe( pipe_cmov_reg );
13451 %} 13079 %}
13452 13080
13453 // Compare 2 longs and CMOVE doubles 13081 // Compare 2 longs and CMOVE doubles
13454 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13082 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13455 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13083 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13456 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13084 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13457 ins_cost(200); 13085 ins_cost(200);
13458 expand %{ 13086 expand %{
13459 fcmovD_regS(cmp,flags,dst,src); 13087 fcmovDPR_regS(cmp,flags,dst,src);
13460 %} 13088 %}
13461 %} 13089 %}
13462 13090
13463 // Compare 2 longs and CMOVE doubles 13091 // Compare 2 longs and CMOVE doubles
13464 instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{ 13092 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13465 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13093 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13466 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13094 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13467 ins_cost(200); 13095 ins_cost(200);
13468 expand %{ 13096 expand %{
13469 fcmovXD_regS(cmp,flags,dst,src); 13097 fcmovD_regS(cmp,flags,dst,src);
13470 %} 13098 %}
13471 %} 13099 %}
13472 13100
13473 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13101 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13474 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13102 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13475 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13103 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13476 ins_cost(200); 13104 ins_cost(200);
13477 expand %{ 13105 expand %{
13478 fcmovF_regS(cmp,flags,dst,src); 13106 fcmovFPR_regS(cmp,flags,dst,src);
13479 %} 13107 %}
13480 %} 13108 %}
13481 13109
13482 instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{ 13110 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13483 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13111 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13484 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13112 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13485 ins_cost(200); 13113 ins_cost(200);
13486 expand %{ 13114 expand %{
13487 fcmovX_regS(cmp,flags,dst,src); 13115 fcmovF_regS(cmp,flags,dst,src);
13488 %} 13116 %}
13489 %} 13117 %}
13490 13118
13491 //====== 13119 //======
13492 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13120 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
13582 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13210 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13583 ins_pipe( pipe_cmov_reg ); 13211 ins_pipe( pipe_cmov_reg );
13584 %} 13212 %}
13585 13213
13586 // Compare 2 longs and CMOVE doubles 13214 // Compare 2 longs and CMOVE doubles
13587 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13215 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13588 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13216 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13589 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13217 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13590 ins_cost(200); 13218 ins_cost(200);
13591 expand %{ 13219 expand %{
13592 fcmovD_regS(cmp,flags,dst,src); 13220 fcmovDPR_regS(cmp,flags,dst,src);
13593 %} 13221 %}
13594 %} 13222 %}
13595 13223
13596 // Compare 2 longs and CMOVE doubles 13224 // Compare 2 longs and CMOVE doubles
13597 instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{ 13225 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13598 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13226 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13599 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13227 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13600 ins_cost(200); 13228 ins_cost(200);
13601 expand %{ 13229 expand %{
13602 fcmovXD_regS(cmp,flags,dst,src); 13230 fcmovD_regS(cmp,flags,dst,src);
13603 %} 13231 %}
13604 %} 13232 %}
13605 13233
13606 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13234 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13607 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13235 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13608 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13236 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13609 ins_cost(200); 13237 ins_cost(200);
13610 expand %{ 13238 expand %{
13611 fcmovF_regS(cmp,flags,dst,src); 13239 fcmovFPR_regS(cmp,flags,dst,src);
13612 %} 13240 %}
13613 %} 13241 %}
13614 13242
13615 13243
13616 instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{ 13244 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13617 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13245 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13618 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13246 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13619 ins_cost(200); 13247 ins_cost(200);
13620 expand %{ 13248 expand %{
13621 fcmovX_regS(cmp,flags,dst,src); 13249 fcmovF_regS(cmp,flags,dst,src);
13622 %} 13250 %}
13623 %} 13251 %}
13624 13252
13625 13253
13626 // ============================================================================ 13254 // ============================================================================