diff src/cpu/x86/vm/x86_64.ad @ 4759:127b3692c168

7116452: Add support for AVX instructions Summary: Added support for AVX extension to the x86 instruction set. Reviewed-by: never
author kvn
date Wed, 14 Dec 2011 14:54:38 -0800
parents db2e64ca2d5a
children 65149e74c706
line wrap: on
line diff
--- a/src/cpu/x86/vm/x86_64.ad	Tue Dec 13 17:10:52 2011 -0800
+++ b/src/cpu/x86/vm/x86_64.ad	Wed Dec 14 14:54:38 2011 -0800
@@ -552,7 +552,7 @@
 #define __ _masm.
 
 static int preserve_SP_size() {
-  return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
+  return 3;  // rex.w, op, rm(reg/reg)
 }
 
 // !!!!! Special hack to get all types of calls to specify the byte offset
@@ -797,48 +797,35 @@
   }
 }
 
-void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
-{
-  if (dstenc != srcenc) {
-    if (dstenc < 8) {
-      if (srcenc >= 8) {
-        emit_opcode(cbuf, Assembler::REX_B);
-        srcenc -= 8;
-      }
-    } else {
-      if (srcenc < 8) {
-        emit_opcode(cbuf, Assembler::REX_R);
-      } else {
-        emit_opcode(cbuf, Assembler::REX_RB);
-        srcenc -= 8;
-      }
-      dstenc -= 8;
-    }
-
-    emit_opcode(cbuf, 0x8B);
-    emit_rm(cbuf, 0x3, dstenc, srcenc);
-  }
-}
-
-void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
-  if( dst_encoding == src_encoding ) {
-    // reg-reg copy, use an empty encoding
-  } else {
-    MacroAssembler _masm(&cbuf);
-
-    __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
-  }
-}
-
 // This could be in MacroAssembler but it's fairly C2 specific
 void emit_cmpfp_fixup(MacroAssembler& _masm) {
   Label exit;
   __ jccb(Assembler::noParity, exit);
   __ pushf();
+  //
+  // comiss/ucomiss instructions set ZF,PF,CF flags and
+  // zero OF,AF,SF for NaN values.
+  // Fixup flags by zeroing ZF,PF so that compare of NaN
+  // values returns 'less than' result (CF is set).
+  // Leave the rest of flags unchanged.
+  //
+  //    7 6 5 4 3 2 1 0
+  //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
+  //    0 0 1 0 1 0 1 1   (0x2B)
+  //
   __ andq(Address(rsp, 0), 0xffffff2b);
   __ popf();
   __ bind(exit);
-  __ nop(); // (target for branch to avoid branch to branch)
+}
+
+void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
+  Label done;
+  __ movl(dst, -1);
+  __ jcc(Assembler::parity, done);
+  __ jcc(Assembler::below, done);
+  __ setb(Assembler::notEqual, dst);
+  __ movzbl(dst, dst);
+  __ bind(done);
 }
 
 
@@ -1274,16 +1261,8 @@
         // 64-bit
         int offset = ra_->reg2offset(src_first);
         if (cbuf) {
-          emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
-          if (Matcher::_regEncode[dst_first] >= 8) {
-            emit_opcode(*cbuf, Assembler::REX_R);
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
-          encode_RegMem(*cbuf,
-                        Matcher::_regEncode[dst_first],
-                        RSP_enc, 0x4, 0, offset,
-                        false);
+          MacroAssembler _masm(cbuf);
+          __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("%s  %s, [rsp + #%d]\t# spill",
@@ -1294,25 +1273,17 @@
         }
         return
           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[dst_first] < 8)
-           ? 5
-           : 6); // REX
+          ((Matcher::_regEncode[dst_first] >= 8)
+           ? 6
+           : (5 + ((UseAVX>0)?1:0))); // REX
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         int offset = ra_->reg2offset(src_first);
         if (cbuf) {
-          emit_opcode(*cbuf, 0xF3);
-          if (Matcher::_regEncode[dst_first] >= 8) {
-            emit_opcode(*cbuf, Assembler::REX_R);
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, 0x10);
-          encode_RegMem(*cbuf,
-                        Matcher::_regEncode[dst_first],
-                        RSP_enc, 0x4, 0, offset,
-                        false);
+          MacroAssembler _masm(cbuf);
+          __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movss   %s, [rsp + #%d]\t# spill",
@@ -1322,9 +1293,9 @@
         }
         return
           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[dst_first] < 8)
-           ? 5
-           : 6); // REX
+          ((Matcher::_regEncode[dst_first] >= 8)
+           ? 6
+           : (5 + ((UseAVX>0)?1:0))); // REX
       }
     }
   } else if (src_first_rc == rc_int) {
@@ -1450,25 +1421,8 @@
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
         if (cbuf) {
-          emit_opcode(*cbuf, 0x66);
-          if (Matcher::_regEncode[dst_first] < 8) {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_W);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_WB);
-            }
-          } else {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_WR);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_WRB);
-            }
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, 0x6E);
-          emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[dst_first] & 7,
-                  Matcher::_regEncode[src_first] & 7);
+          MacroAssembler _masm(cbuf);
+          __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movdq   %s, %s\t# spill",
@@ -1482,23 +1436,8 @@
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         if (cbuf) {
-          emit_opcode(*cbuf, 0x66);
-          if (Matcher::_regEncode[dst_first] < 8) {
-            if (Matcher::_regEncode[src_first] >= 8) {
-              emit_opcode(*cbuf, Assembler::REX_B);
-            }
-          } else {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_R);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_RB);
-            }
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, 0x6E);
-          emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[dst_first] & 7,
-                  Matcher::_regEncode[src_first] & 7);
+          MacroAssembler _masm(cbuf);
+          __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movdl   %s, %s\t# spill",
@@ -1507,9 +1446,9 @@
 #endif
         }
         return
-          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
-          ? 4
-          : 5; // REX
+          (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
+          ? 5
+          : (4 + ((UseAVX>0)?1:0)); // REX
       }
     }
   } else if (src_first_rc == rc_float) {
@@ -1521,16 +1460,8 @@
         // 64-bit
         int offset = ra_->reg2offset(dst_first);
         if (cbuf) {
-          emit_opcode(*cbuf, 0xF2);
-          if (Matcher::_regEncode[src_first] >= 8) {
-              emit_opcode(*cbuf, Assembler::REX_R);
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, 0x11);
-          encode_RegMem(*cbuf,
-                        Matcher::_regEncode[src_first],
-                        RSP_enc, 0x4, 0, offset,
-                        false);
+          MacroAssembler _masm(cbuf);
+          __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movsd   [rsp + #%d], %s\t# spill",
@@ -1540,25 +1471,17 @@
         }
         return
           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[src_first] < 8)
-           ? 5
-           : 6); // REX
+          ((Matcher::_regEncode[src_first] >= 8)
+           ? 6
+           : (5 + ((UseAVX>0)?1:0))); // REX
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         int offset = ra_->reg2offset(dst_first);
         if (cbuf) {
-          emit_opcode(*cbuf, 0xF3);
-          if (Matcher::_regEncode[src_first] >= 8) {
-              emit_opcode(*cbuf, Assembler::REX_R);
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, 0x11);
-          encode_RegMem(*cbuf,
-                        Matcher::_regEncode[src_first],
-                        RSP_enc, 0x4, 0, offset,
-                        false);
+          MacroAssembler _masm(cbuf);
+          __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movss   [rsp + #%d], %s\t# spill",
@@ -1568,9 +1491,9 @@
         }
         return
           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[src_first] < 8)
-           ? 5
-           : 6); // REX
+          ((Matcher::_regEncode[src_first] >=8)
+           ? 6
+           : (5 + ((UseAVX>0)?1:0))); // REX
       }
     } else if (dst_first_rc == rc_int) {
       // xmm -> gpr
@@ -1578,25 +1501,8 @@
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
         if (cbuf) {
-          emit_opcode(*cbuf, 0x66);
-          if (Matcher::_regEncode[dst_first] < 8) {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_W);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_WR); // attention!
-            }
-          } else {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_WB); // attention!
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_WRB);
-            }
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, 0x7E);
-          emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[src_first] & 7,
-                  Matcher::_regEncode[dst_first] & 7);
+          MacroAssembler _masm(cbuf);
+          __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movdq   %s, %s\t# spill",
@@ -1610,23 +1516,8 @@
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         if (cbuf) {
-          emit_opcode(*cbuf, 0x66);
-          if (Matcher::_regEncode[dst_first] < 8) {
-            if (Matcher::_regEncode[src_first] >= 8) {
-              emit_opcode(*cbuf, Assembler::REX_R); // attention!
-            }
-          } else {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_B); // attention!
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_RB);
-            }
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, 0x7E);
-          emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[src_first] & 7,
-                  Matcher::_regEncode[dst_first] & 7);
+          MacroAssembler _masm(cbuf);
+          __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movdl   %s, %s\t# spill",
@@ -1635,9 +1526,9 @@
 #endif
         }
         return
-          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
-          ? 4
-          : 5; // REX
+          (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
+          ? 5
+          : (4 + ((UseAVX>0)?1:0)); // REX
       }
     } else if (dst_first_rc == rc_float) {
       // xmm -> xmm
@@ -1645,23 +1536,8 @@
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
         if (cbuf) {
-          emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
-          if (Matcher::_regEncode[dst_first] < 8) {
-            if (Matcher::_regEncode[src_first] >= 8) {
-              emit_opcode(*cbuf, Assembler::REX_B);
-            }
-          } else {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_R);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_RB);
-            }
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
-          emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[dst_first] & 7,
-                  Matcher::_regEncode[src_first] & 7);
+          MacroAssembler _masm(cbuf);
+          __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("%s  %s, %s\t# spill",
@@ -1671,32 +1547,16 @@
 #endif
         }
         return
-          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
-          ? 4
-          : 5; // REX
+          (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
+          ? 5
+          : (4 + ((UseAVX>0)?1:0)); // REX
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         if (cbuf) {
-          if (!UseXmmRegToRegMoveAll)
-            emit_opcode(*cbuf, 0xF3);
-          if (Matcher::_regEncode[dst_first] < 8) {
-            if (Matcher::_regEncode[src_first] >= 8) {
-              emit_opcode(*cbuf, Assembler::REX_B);
-            }
-          } else {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_R);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_RB);
-            }
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
-          emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[dst_first] & 7,
-                  Matcher::_regEncode[src_first] & 7);
+          MacroAssembler _masm(cbuf);
+          __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("%s  %s, %s\t# spill",
@@ -1705,10 +1565,10 @@
                      Matcher::regName[src_first]);
 #endif
         }
-        return
-          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
-          ? (UseXmmRegToRegMoveAll ? 3 : 4)
-          : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
+        return ((UseAVX>0) ? 5:
+          ((Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
+           ? (UseXmmRegToRegMoveAll ? 4 : 5)
+           : (UseXmmRegToRegMoveAll ? 3 : 4))); // REX
       }
     }
   }
@@ -2205,47 +2065,6 @@
     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
   %}
 
-  enc_class cmpfp_fixup() %{
-      MacroAssembler _masm(&cbuf);
-      emit_cmpfp_fixup(_masm);
-  %}
-
-  enc_class cmpfp3(rRegI dst)
-  %{
-    int dstenc = $dst$$reg;
-
-    // movl $dst, -1
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0xB8 | (dstenc & 7));
-    emit_d32(cbuf, -1);
-
-    // jp,s done
-    emit_opcode(cbuf, 0x7A);
-    emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
-
-    // jb,s done
-    emit_opcode(cbuf, 0x72);
-    emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
-
-    // setne $dst
-    if (dstenc >= 4) {
-      emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x95);
-    emit_opcode(cbuf, 0xC0 | (dstenc & 7));
-
-    // movzbl $dst, $dst
-    if (dstenc >= 4) {
-      emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0xB6);
-    emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
-  %}
-
   enc_class cdql_enc(no_rax_rdx_RegI div)
   %{
     // Full implementation of Java idiv and irem; checks for
@@ -2472,55 +2291,6 @@
     emit_cc(cbuf, $secondary, $cop$$cmpcode);
   %}
 
-  enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
-  %{
-    // Invert sense of branch from sense of cmov
-    emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
-    emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
-                  ? (UseXmmRegToRegMoveAll ? 3 : 4)
-                  : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
-    // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
-    if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
-    if ($dst$$reg < 8) {
-      if ($src$$reg >= 8) {
-        emit_opcode(cbuf, Assembler::REX_B);
-      }
-    } else {
-      if ($src$$reg < 8) {
-        emit_opcode(cbuf, Assembler::REX_R);
-      } else {
-        emit_opcode(cbuf, Assembler::REX_RB);
-      }
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
-    emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
-  %}
-
-  enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
-  %{
-    // Invert sense of branch from sense of cmov
-    emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
-    emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
-
-    //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
-    emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
-    if ($dst$$reg < 8) {
-      if ($src$$reg >= 8) {
-        emit_opcode(cbuf, Assembler::REX_B);
-      }
-    } else {
-      if ($src$$reg < 8) {
-        emit_opcode(cbuf, Assembler::REX_R);
-      } else {
-        emit_opcode(cbuf, Assembler::REX_RB);
-      }
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
-    emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
-  %}
-
   enc_class enc_PartialSubtypeCheck()
   %{
     Register Rrdi = as_Register(RDI_enc); // result register
@@ -2751,68 +2521,6 @@
     }
   %}
 
-  // Encode a reg-reg copy.  If it is useless, then empty encoding.
-  enc_class enc_copy(rRegI dst, rRegI src)
-  %{
-    encode_copy(cbuf, $dst$$reg, $src$$reg);
-  %}
-
-  // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
-  enc_class enc_CopyXD( RegD dst, RegD src ) %{
-    encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
-  %}
-
-  enc_class enc_copy_always(rRegI dst, rRegI src)
-  %{
-    int srcenc = $src$$reg;
-    int dstenc = $dst$$reg;
-
-    if (dstenc < 8) {
-      if (srcenc >= 8) {
-        emit_opcode(cbuf, Assembler::REX_B);
-        srcenc -= 8;
-      }
-    } else {
-      if (srcenc < 8) {
-        emit_opcode(cbuf, Assembler::REX_R);
-      } else {
-        emit_opcode(cbuf, Assembler::REX_RB);
-        srcenc -= 8;
-      }
-      dstenc -= 8;
-    }
-
-    emit_opcode(cbuf, 0x8B);
-    emit_rm(cbuf, 0x3, dstenc, srcenc);
-  %}
-
-  enc_class enc_copy_wide(rRegL dst, rRegL src)
-  %{
-    int srcenc = $src$$reg;
-    int dstenc = $dst$$reg;
-
-    if (dstenc != srcenc) {
-      if (dstenc < 8) {
-        if (srcenc < 8) {
-          emit_opcode(cbuf, Assembler::REX_W);
-        } else {
-          emit_opcode(cbuf, Assembler::REX_WB);
-          srcenc -= 8;
-        }
-      } else {
-        if (srcenc < 8) {
-          emit_opcode(cbuf, Assembler::REX_WR);
-        } else {
-          emit_opcode(cbuf, Assembler::REX_WRB);
-          srcenc -= 8;
-        }
-        dstenc -= 8;
-      }
-      emit_opcode(cbuf, 0x8B);
-      emit_rm(cbuf, 0x3, dstenc, srcenc);
-    }
-  %}
-
   enc_class Con32(immI src)
   %{
     // Output immediate
@@ -3212,92 +2920,19 @@
   %}
 
   enc_class Push_ResultXD(regD dst) %{
-    int dstenc = $dst$$reg;
-
-    store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
-
-    // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
-    emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-    }
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
-    encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
-
-    // add rsp,8
-    emit_opcode(cbuf, Assembler::REX_W);
-    emit_opcode(cbuf,0x83);
-    emit_rm(cbuf,0x3, 0x0, RSP_enc);
-    emit_d8(cbuf,0x08);
+    MacroAssembler _masm(&cbuf);
+    __ fstp_d(Address(rsp, 0));
+    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
+    __ addptr(rsp, 8);
   %}
 
   enc_class Push_SrcXD(regD src) %{
-    int srcenc = $src$$reg;
-
-    // subq rsp,#8
-    emit_opcode(cbuf, Assembler::REX_W);
-    emit_opcode(cbuf, 0x83);
-    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
-    emit_d8(cbuf, 0x8);
-
-    // movsd [rsp],src
-    emit_opcode(cbuf, 0xF2);
-    if (srcenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x11);
-    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
-
-    // fldd [rsp]
-    emit_opcode(cbuf, 0x66);
-    emit_opcode(cbuf, 0xDD);
-    encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
-  %}
-
-
-  enc_class movq_ld(regD dst, memory mem) %{
     MacroAssembler _masm(&cbuf);
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-
-  enc_class movq_st(memory mem, regD src) %{
-    MacroAssembler _masm(&cbuf);
-    __ movq($mem$$Address, $src$$XMMRegister);
-  %}
-
-  enc_class pshufd_8x8(regF dst, regF src) %{
-    MacroAssembler _masm(&cbuf);
-
-    encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
-    __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
-    __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
-  %}
-
-  enc_class pshufd_4x16(regF dst, regF src) %{
-    MacroAssembler _masm(&cbuf);
-
-    __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
-  %}
-
-  enc_class pshufd(regD dst, regD src, int mode) %{
-    MacroAssembler _masm(&cbuf);
-
-    __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
-  %}
-
-  enc_class pxor(regD dst, regD src) %{
-    MacroAssembler _masm(&cbuf);
-
-    __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
-  %}
-
-  enc_class mov_i2x(regD dst, rRegI src) %{
-    MacroAssembler _masm(&cbuf);
-
-    __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
-  %}
+    __ subptr(rsp, 8);
+    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+    __ fld_d(Address(rsp, 0));
+  %}
+
 
   // obj: object to lock
   // box: box address (header location) -- killed
@@ -3534,303 +3169,6 @@
                    RELOC_DISP32);
   %}
 
-  enc_class absF_encoding(regF dst)
-  %{
-    int dstenc = $dst$$reg;
-    address signmask_address = (address) StubRoutines::x86::float_sign_mask();
-
-    cbuf.set_insts_mark();
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-      dstenc -= 8;
-    }
-    // XXX reg_mem doesn't support RIP-relative addressing yet
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x54);
-    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
-    emit_d32_reloc(cbuf, signmask_address);
-  %}
-
-  enc_class absD_encoding(regD dst)
-  %{
-    int dstenc = $dst$$reg;
-    address signmask_address = (address) StubRoutines::x86::double_sign_mask();
-
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, 0x66);
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-      dstenc -= 8;
-    }
-    // XXX reg_mem doesn't support RIP-relative addressing yet
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x54);
-    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
-    emit_d32_reloc(cbuf, signmask_address);
-  %}
-
-  enc_class negF_encoding(regF dst)
-  %{
-    int dstenc = $dst$$reg;
-    address signflip_address = (address) StubRoutines::x86::float_sign_flip();
-
-    cbuf.set_insts_mark();
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-      dstenc -= 8;
-    }
-    // XXX reg_mem doesn't support RIP-relative addressing yet
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x57);
-    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
-    emit_d32_reloc(cbuf, signflip_address);
-  %}
-
-  enc_class negD_encoding(regD dst)
-  %{
-    int dstenc = $dst$$reg;
-    address signflip_address = (address) StubRoutines::x86::double_sign_flip();
-
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, 0x66);
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-      dstenc -= 8;
-    }
-    // XXX reg_mem doesn't support RIP-relative addressing yet
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x57);
-    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
-    emit_d32_reloc(cbuf, signflip_address);
-  %}
-
-  enc_class f2i_fixup(rRegI dst, regF src)
-  %{
-    int dstenc = $dst$$reg;
-    int srcenc = $src$$reg;
-
-    // cmpl $dst, #0x80000000
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0x81);
-    emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
-    emit_d32(cbuf, 0x80000000);
-
-    // jne,s done
-    emit_opcode(cbuf, 0x75);
-    if (srcenc < 8 && dstenc < 8) {
-      emit_d8(cbuf, 0xF);
-    } else if (srcenc >= 8 && dstenc >= 8) {
-      emit_d8(cbuf, 0x11);
-    } else {
-      emit_d8(cbuf, 0x10);
-    }
-
-    // subq rsp, #8
-    emit_opcode(cbuf, Assembler::REX_W);
-    emit_opcode(cbuf, 0x83);
-    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
-    emit_d8(cbuf, 8);
-
-    // movss [rsp], $src
-    emit_opcode(cbuf, 0xF3);
-    if (srcenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x11);
-    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
-
-    // call f2i_fixup
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, 0xE8);
-    emit_d32_reloc(cbuf,
-                   (int)
-                   (StubRoutines::x86::f2i_fixup() - cbuf.insts_end() - 4),
-                   runtime_call_Relocation::spec(),
-                   RELOC_DISP32);
-
-    // popq $dst
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0x58 | (dstenc & 7));
-
-    // done:
-  %}
-
-  enc_class f2l_fixup(rRegL dst, regF src)
-  %{
-    int dstenc = $dst$$reg;
-    int srcenc = $src$$reg;
-    address const_address = (address) StubRoutines::x86::double_sign_flip();
-
-    // cmpq $dst, [0x8000000000000000]
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
-    emit_opcode(cbuf, 0x39);
-    // XXX reg_mem doesn't support RIP-relative addressing yet
-    emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
-    emit_d32_reloc(cbuf, const_address);
-
-
-    // jne,s done
-    emit_opcode(cbuf, 0x75);
-    if (srcenc < 8 && dstenc < 8) {
-      emit_d8(cbuf, 0xF);
-    } else if (srcenc >= 8 && dstenc >= 8) {
-      emit_d8(cbuf, 0x11);
-    } else {
-      emit_d8(cbuf, 0x10);
-    }
-
-    // subq rsp, #8
-    emit_opcode(cbuf, Assembler::REX_W);
-    emit_opcode(cbuf, 0x83);
-    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
-    emit_d8(cbuf, 8);
-
-    // movss [rsp], $src
-    emit_opcode(cbuf, 0xF3);
-    if (srcenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x11);
-    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
-
-    // call f2l_fixup
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, 0xE8);
-    emit_d32_reloc(cbuf,
-                   (int)
-                   (StubRoutines::x86::f2l_fixup() - cbuf.insts_end() - 4),
-                   runtime_call_Relocation::spec(),
-                   RELOC_DISP32);
-
-    // popq $dst
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0x58 | (dstenc & 7));
-
-    // done:
-  %}
-
-  enc_class d2i_fixup(rRegI dst, regD src)
-  %{
-    int dstenc = $dst$$reg;
-    int srcenc = $src$$reg;
-
-    // cmpl $dst, #0x80000000
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0x81);
-    emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
-    emit_d32(cbuf, 0x80000000);
-
-    // jne,s done
-    emit_opcode(cbuf, 0x75);
-    if (srcenc < 8 && dstenc < 8) {
-      emit_d8(cbuf, 0xF);
-    } else if (srcenc >= 8 && dstenc >= 8) {
-      emit_d8(cbuf, 0x11);
-    } else {
-      emit_d8(cbuf, 0x10);
-    }
-
-    // subq rsp, #8
-    emit_opcode(cbuf, Assembler::REX_W);
-    emit_opcode(cbuf, 0x83);
-    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
-    emit_d8(cbuf, 8);
-
-    // movsd [rsp], $src
-    emit_opcode(cbuf, 0xF2);
-    if (srcenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x11);
-    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
-
-    // call d2i_fixup
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, 0xE8);
-    emit_d32_reloc(cbuf,
-                   (int)
-                   (StubRoutines::x86::d2i_fixup() - cbuf.insts_end() - 4),
-                   runtime_call_Relocation::spec(),
-                   RELOC_DISP32);
-
-    // popq $dst
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0x58 | (dstenc & 7));
-
-    // done:
-  %}
-
-  enc_class d2l_fixup(rRegL dst, regD src)
-  %{
-    int dstenc = $dst$$reg;
-    int srcenc = $src$$reg;
-    address const_address = (address) StubRoutines::x86::double_sign_flip();
-
-    // cmpq $dst, [0x8000000000000000]
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
-    emit_opcode(cbuf, 0x39);
-    // XXX reg_mem doesn't support RIP-relative addressing yet
-    emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
-    emit_d32_reloc(cbuf, const_address);
-
-
-    // jne,s done
-    emit_opcode(cbuf, 0x75);
-    if (srcenc < 8 && dstenc < 8) {
-      emit_d8(cbuf, 0xF);
-    } else if (srcenc >= 8 && dstenc >= 8) {
-      emit_d8(cbuf, 0x11);
-    } else {
-      emit_d8(cbuf, 0x10);
-    }
-
-    // subq rsp, #8
-    emit_opcode(cbuf, Assembler::REX_W);
-    emit_opcode(cbuf, 0x83);
-    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
-    emit_d8(cbuf, 8);
-
-    // movsd [rsp], $src
-    emit_opcode(cbuf, 0xF2);
-    if (srcenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x11);
-    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
-
-    // call d2l_fixup
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, 0xE8);
-    emit_d32_reloc(cbuf,
-                   (int)
-                   (StubRoutines::x86::d2l_fixup() - cbuf.insts_end() - 4),
-                   runtime_call_Relocation::spec(),
-                   RELOC_DISP32);
-
-    // popq $dst
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0x58 | (dstenc & 7));
-
-    // done:
-  %}
 %}
 
 
@@ -6156,8 +5494,9 @@
 
   ins_cost(145); // XXX
   format %{ "movss   $dst, $mem\t# float" %}
-  opcode(0xF3, 0x0F, 0x10);
-  ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
+  ins_encode %{
+    __ movflt($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -6169,8 +5508,9 @@
 
   ins_cost(145); // XXX
   format %{ "movlpd  $dst, $mem\t# double" %}
-  opcode(0x66, 0x0F, 0x12);
-  ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -6181,8 +5521,9 @@
 
   ins_cost(145); // XXX
   format %{ "movsd   $dst, $mem\t# double" %}
-  opcode(0xF2, 0x0F, 0x10);
-  ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -6191,7 +5532,9 @@
   match(Set dst (Load8B mem));
   ins_cost(125);
   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -6200,7 +5543,9 @@
   match(Set dst (Load4S mem));
   ins_cost(125);
   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -6209,7 +5554,9 @@
   match(Set dst (Load4C mem));
   ins_cost(125);
   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -6218,16 +5565,20 @@
   match(Set dst (Load2I mem));
   ins_cost(125);
   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Load Aligned Packed Single to XMM
 instruct loadA2F(regD dst, memory mem) %{
   match(Set dst (Load2F mem));
-  ins_cost(145);
+  ins_cost(125);
   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -6540,8 +5891,9 @@
   ins_cost(100);
 
   format %{ "xorps   $dst, $dst\t# float 0.0" %}
-  opcode(0x0F, 0x57);
-  ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
+  ins_encode %{
+    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -6562,8 +5914,9 @@
   ins_cost(100);
 
   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
-  opcode(0x66, 0x0F, 0x57);
-  ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
+  ins_encode %{
+    __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -6606,8 +5959,9 @@
 
   ins_cost(125);
   format %{ "movss   $dst, $src\t# float stk" %}
-  opcode(0xF3, 0x0F, 0x10);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -6972,7 +6326,9 @@
   match(Set mem (Store8B mem src));
   ins_cost(145);
   format %{ "MOVQ  $mem,$src\t! packed8B" %}
-  ins_encode( movq_st(mem, src));
+  ins_encode %{
+    __ movq($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -6981,7 +6337,9 @@
   match(Set mem (Store4C mem src));
   ins_cost(145);
   format %{ "MOVQ  $mem,$src\t! packed4C" %}
-  ins_encode( movq_st(mem, src));
+  ins_encode %{
+    __ movq($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -6990,7 +6348,9 @@
   match(Set mem (Store2I mem src));
   ins_cost(145);
   format %{ "MOVQ  $mem,$src\t! packed2I" %}
-  ins_encode( movq_st(mem, src));
+  ins_encode %{
+    __ movq($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -7024,7 +6384,9 @@
   match(Set mem (Store2F mem src));
   ins_cost(145);
   format %{ "MOVQ  $mem,$src\t! packed2F" %}
-  ins_encode( movq_st(mem, src));
+  ins_encode %{
+    __ movq($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -7035,8 +6397,9 @@
 
   ins_cost(95); // XXX
   format %{ "movss   $mem, $src\t# float" %}
-  opcode(0xF3, 0x0F, 0x11);
-  ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
+  ins_encode %{
+    __ movflt($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -7072,8 +6435,9 @@
 
   ins_cost(95); // XXX
   format %{ "movsd   $mem, $src\t# double" %}
-  opcode(0xF2, 0x0F, 0x11);
-  ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
+  ins_encode %{
+    __ movdbl($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -7142,8 +6506,9 @@
 
   ins_cost(95); // XXX
   format %{ "movss   $dst, $src\t# float stk" %}
-  opcode(0xF3, 0x0F, 0x11);
-  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
+  ins_encode %{
+    __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -7153,8 +6518,9 @@
 
   ins_cost(95); // XXX
   format %{ "movsd   $dst, $src\t# double stk" %}
-  opcode(0xF2, 0x0F, 0x11);
-  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
+  ins_encode %{
+    __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -7451,7 +6817,11 @@
   match(Set dst (CastX2P src));
 
   format %{ "movq    $dst, $src\t# long->ptr" %}
-  ins_encode(enc_copy_wide(dst, src));
+  ins_encode %{
+    if ($dst$$reg != $src$$reg) {
+      __ movptr($dst$$Register, $src$$Register);
+    }
+  %}
   ins_pipe(ialu_reg_reg); // XXX
 %}
 
@@ -7460,7 +6830,11 @@
   match(Set dst (CastP2X src));
 
   format %{ "movq    $dst, $src\t# ptr -> long" %}
-  ins_encode(enc_copy_wide(dst, src));
+  ins_encode %{
+    if ($dst$$reg != $src$$reg) {
+      __ movptr($dst$$Register, $src$$Register);
+    }
+  %}
   ins_pipe(ialu_reg_reg); // XXX
 %}
 
@@ -7813,7 +7187,13 @@
   format %{ "jn$cop    skip\t# signed cmove float\n\t"
             "movss     $dst, $src\n"
     "skip:" %}
-  ins_encode(enc_cmovf_branch(cop, dst, src));
+  ins_encode %{
+    Label Lskip;
+    // Invert sense of branch from sense of CMOV
+    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+    __ bind(Lskip);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -7837,7 +7217,13 @@
   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
             "movss     $dst, $src\n"
     "skip:" %}
-  ins_encode(enc_cmovf_branch(cop, dst, src));
+  ins_encode %{
+    Label Lskip;
+    // Invert sense of branch from sense of CMOV
+    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+    __ bind(Lskip);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -7857,7 +7243,13 @@
   format %{ "jn$cop    skip\t# signed cmove double\n\t"
             "movsd     $dst, $src\n"
     "skip:" %}
-  ins_encode(enc_cmovd_branch(cop, dst, src));
+  ins_encode %{
+    Label Lskip;
+    // Invert sense of branch from sense of CMOV
+    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+    __ bind(Lskip);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -7869,7 +7261,13 @@
   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
             "movsd     $dst, $src\n"
     "skip:" %}
-  ins_encode(enc_cmovd_branch(cop, dst, src));
+  ins_encode %{
+    Label Lskip;
+    // Invert sense of branch from sense of CMOV
+    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+    __ bind(Lskip);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10191,17 +9589,18 @@
             "pushfq\t# saw NaN, set CF\n\t"
             "andq    [rsp], #0xffffff2b\n\t"
             "popfq\n"
-    "exit:   nop\t# avoid branch to branch" %}
-  opcode(0x0F, 0x2E);
-  ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
-             cmpfp_fixup);
+    "exit:" %}
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
+    emit_cmpfp_fixup(_masm);
+  %}
   ins_pipe(pipe_slow);
 %}
 
 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
   match(Set cr (CmpF src1 src2));
 
-  ins_cost(145);
+  ins_cost(100);
   format %{ "ucomiss $src1, $src2" %}
   ins_encode %{
     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
@@ -10219,10 +9618,11 @@
             "pushfq\t# saw NaN, set CF\n\t"
             "andq    [rsp], #0xffffff2b\n\t"
             "popfq\n"
-    "exit:   nop\t# avoid branch to branch" %}
-  opcode(0x0F, 0x2E);
-  ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
-             cmpfp_fixup);
+    "exit:" %}
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$Address);
+    emit_cmpfp_fixup(_masm);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10231,8 +9631,9 @@
 
   ins_cost(100);
   format %{ "ucomiss $src1, $src2" %}
-  opcode(0x0F, 0x2E);
-  ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$Address);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10245,7 +9646,7 @@
             "pushfq\t# saw NaN, set CF\n\t"
             "andq    [rsp], #0xffffff2b\n\t"
             "popfq\n"
-    "exit:   nop\t# avoid branch to branch" %}
+    "exit:" %}
   ins_encode %{
     __ ucomiss($src$$XMMRegister, $constantaddress($con));
     emit_cmpfp_fixup(_masm);
@@ -10273,10 +9674,11 @@
             "pushfq\t# saw NaN, set CF\n\t"
             "andq    [rsp], #0xffffff2b\n\t"
             "popfq\n"
-    "exit:   nop\t# avoid branch to branch" %}
-  opcode(0x66, 0x0F, 0x2E);
-  ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
-             cmpfp_fixup);
+    "exit:" %}
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
+    emit_cmpfp_fixup(_masm);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10301,10 +9703,11 @@
             "pushfq\t# saw NaN, set CF\n\t"
             "andq    [rsp], #0xffffff2b\n\t"
             "popfq\n"
-    "exit:   nop\t# avoid branch to branch" %}
-  opcode(0x66, 0x0F, 0x2E);
-  ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
-             cmpfp_fixup);
+    "exit:" %}
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$Address);
+    emit_cmpfp_fixup(_masm);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10313,8 +9716,9 @@
 
   ins_cost(100);
   format %{ "ucomisd $src1, $src2" %}
-  opcode(0x66, 0x0F, 0x2E);
-  ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$Address);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10327,7 +9731,7 @@
             "pushfq\t# saw NaN, set CF\n\t"
             "andq    [rsp], #0xffffff2b\n\t"
             "popfq\n"
-    "exit:   nop\t# avoid branch to branch" %}
+    "exit:" %}
   ins_encode %{
     __ ucomisd($src$$XMMRegister, $constantaddress($con));
     emit_cmpfp_fixup(_masm);
@@ -10359,10 +9763,10 @@
             "setne   $dst\n\t"
             "movzbl  $dst, $dst\n"
     "done:" %}
-
-  opcode(0x0F, 0x2E);
-  ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
-             cmpfp3(dst));
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
+    emit_cmpfp3(_masm, $dst$$Register);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10380,10 +9784,10 @@
             "setne   $dst\n\t"
             "movzbl  $dst, $dst\n"
     "done:" %}
-
-  opcode(0x0F, 0x2E);
-  ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
-             cmpfp3(dst));
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$Address);
+    emit_cmpfp3(_masm, $dst$$Register);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10401,15 +9805,8 @@
             "movzbl  $dst, $dst\n"
     "done:" %}
   ins_encode %{
-    Label L_done;
-    Register Rdst = $dst$$Register;
     __ ucomiss($src$$XMMRegister, $constantaddress($con));
-    __ movl(Rdst, -1);
-    __ jcc(Assembler::parity, L_done);
-    __ jcc(Assembler::below, L_done);
-    __ setb(Assembler::notEqual, Rdst);
-    __ movzbl(Rdst, Rdst);
-    __ bind(L_done);
+    emit_cmpfp3(_masm, $dst$$Register);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -10428,10 +9825,10 @@
             "setne   $dst\n\t"
             "movzbl  $dst, $dst\n"
     "done:" %}
-
-  opcode(0x66, 0x0F, 0x2E);
-  ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
-             cmpfp3(dst));
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
+    emit_cmpfp3(_masm, $dst$$Register);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10449,10 +9846,10 @@
             "setne   $dst\n\t"
             "movzbl  $dst, $dst\n"
     "done:" %}
-
-  opcode(0x66, 0x0F, 0x2E);
-  ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
-             cmpfp3(dst));
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$Address);
+    emit_cmpfp3(_masm, $dst$$Register);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10470,15 +9867,8 @@
             "movzbl  $dst, $dst\n"
     "done:" %}
   ins_encode %{
-    Register Rdst = $dst$$Register;
-    Label L_done;
     __ ucomisd($src$$XMMRegister, $constantaddress($con));
-    __ movl(Rdst, -1);
-    __ jcc(Assembler::parity, L_done);
-    __ jcc(Assembler::below, L_done);
-    __ setb(Assembler::notEqual, Rdst);
-    __ movzbl(Rdst, Rdst);
-    __ bind(L_done);
+    emit_cmpfp3(_masm, $dst$$Register);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -10489,8 +9879,9 @@
 
   format %{ "addss   $dst, $src" %}
   ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x58);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ addss($dst$$XMMRegister, $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10500,8 +9891,9 @@
 
   format %{ "addss   $dst, $src" %}
   ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x58);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ addss($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10521,8 +9913,9 @@
 
   format %{ "addsd   $dst, $src" %}
   ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x58);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ addsd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10532,8 +9925,9 @@
 
   format %{ "addsd   $dst, $src" %}
   ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x58);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ addsd($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10553,8 +9947,9 @@
 
   format %{ "subss   $dst, $src" %}
   ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x5C);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ subss($dst$$XMMRegister, $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10564,8 +9959,9 @@
 
   format %{ "subss   $dst, $src" %}
   ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x5C);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ subss($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10585,8 +9981,9 @@
 
   format %{ "subsd   $dst, $src" %}
   ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x5C);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ subsd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10596,8 +9993,9 @@
 
   format %{ "subsd   $dst, $src" %}
   ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x5C);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ subsd($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10617,8 +10015,9 @@
 
   format %{ "mulss   $dst, $src" %}
   ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x59);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ mulss($dst$$XMMRegister, $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10628,8 +10027,9 @@
 
   format %{ "mulss   $dst, $src" %}
   ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x59);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ mulss($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10649,8 +10049,9 @@
 
   format %{ "mulsd   $dst, $src" %}
   ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x59);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10660,8 +10061,9 @@
 
   format %{ "mulsd   $dst, $src" %}
   ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x59);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ mulsd($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10681,8 +10083,9 @@
 
   format %{ "divss   $dst, $src" %}
   ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x5E);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ divss($dst$$XMMRegister, $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10692,8 +10095,9 @@
 
   format %{ "divss   $dst, $src" %}
   ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x5E);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ divss($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10713,8 +10117,9 @@
 
   format %{ "divsd   $dst, $src" %}
   ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x5E);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ divsd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10724,8 +10129,9 @@
 
   format %{ "divsd   $dst, $src" %}
   ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x5E);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ divsd($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10745,8 +10151,9 @@
 
   format %{ "sqrtss  $dst, $src" %}
   ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x51);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10756,8 +10163,9 @@
 
   format %{ "sqrtss  $dst, $src" %}
   ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x51);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ sqrtss($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10777,8 +10185,9 @@
 
   format %{ "sqrtsd  $dst, $src" %}
   ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x51);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10788,8 +10197,9 @@
 
   format %{ "sqrtsd  $dst, $src" %}
   ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x51);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ sqrtsd($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10806,38 +10216,50 @@
 instruct absF_reg(regF dst)
 %{
   match(Set dst (AbsF dst));
-
+  ins_cost(150); // XXX
   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
-  ins_encode(absF_encoding(dst));
+  ins_encode %{
+    __ andps($dst$$XMMRegister,
+             ExternalAddress((address) StubRoutines::x86::float_sign_mask()));
+  %}
   ins_pipe(pipe_slow);
 %}
 
 instruct absD_reg(regD dst)
 %{
   match(Set dst (AbsD dst));
-
+  ins_cost(150); // XXX
   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
             "# abs double by sign masking" %}
-  ins_encode(absD_encoding(dst));
+  ins_encode %{
+    __ andpd($dst$$XMMRegister,
+             ExternalAddress((address) StubRoutines::x86::double_sign_mask()));
+  %}
   ins_pipe(pipe_slow);
 %}
 
 instruct negF_reg(regF dst)
 %{
   match(Set dst (NegF dst));
-
+  ins_cost(150); // XXX
   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
-  ins_encode(negF_encoding(dst));
+  ins_encode %{
+    __ xorps($dst$$XMMRegister,
+             ExternalAddress((address) StubRoutines::x86::float_sign_flip()));
+  %}
   ins_pipe(pipe_slow);
 %}
 
 instruct negD_reg(regD dst)
 %{
   match(Set dst (NegD dst));
-
+  ins_cost(150); // XXX
   format %{ "xorpd   $dst, [0x8000000000000000]\t"
             "# neg double by sign flipping" %}
-  ins_encode(negD_encoding(dst));
+  ins_encode %{
+    __ xorpd($dst$$XMMRegister,
+             ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10929,8 +10351,9 @@
   match(Set dst (ConvF2D src));
 
   format %{ "cvtss2sd $dst, $src" %}
-  opcode(0xF3, 0x0F, 0x5A);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -10939,8 +10362,9 @@
   match(Set dst (ConvF2D (LoadF src)));
 
   format %{ "cvtss2sd $dst, $src" %}
-  opcode(0xF3, 0x0F, 0x5A);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -10949,8 +10373,9 @@
   match(Set dst (ConvD2F src));
 
   format %{ "cvtsd2ss $dst, $src" %}
-  opcode(0xF2, 0x0F, 0x5A);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -10959,8 +10384,9 @@
   match(Set dst (ConvD2F (LoadD src)));
 
   format %{ "cvtsd2ss $dst, $src" %}
-  opcode(0xF2, 0x0F, 0x5A);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -10978,9 +10404,17 @@
             "call    f2i_fixup\n\t"
             "popq    $dst\n"
     "done:   "%}
-  opcode(0xF3, 0x0F, 0x2C);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
-             f2i_fixup(dst, src));
+  ins_encode %{
+    Label done;
+    __ cvttss2sil($dst$$Register, $src$$XMMRegister);
+    __ cmpl($dst$$Register, 0x80000000);
+    __ jccb(Assembler::notEqual, done);
+    __ subptr(rsp, 8);
+    __ movflt(Address(rsp, 0), $src$$XMMRegister);
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup())));
+    __ pop($dst$$Register);
+    __ bind(done);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10997,9 +10431,18 @@
             "call    f2l_fixup\n\t"
             "popq    $dst\n"
     "done:   "%}
-  opcode(0xF3, 0x0F, 0x2C);
-  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
-             f2l_fixup(dst, src));
+  ins_encode %{
+    Label done;
+    __ cvttss2siq($dst$$Register, $src$$XMMRegister);
+    __ cmp64($dst$$Register,
+             ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
+    __ jccb(Assembler::notEqual, done);
+    __ subptr(rsp, 8);
+    __ movflt(Address(rsp, 0), $src$$XMMRegister);
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup())));
+    __ pop($dst$$Register);
+    __ bind(done);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11016,9 +10459,17 @@
             "call    d2i_fixup\n\t"
             "popq    $dst\n"
     "done:   "%}
-  opcode(0xF2, 0x0F, 0x2C);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
-             d2i_fixup(dst, src));
+  ins_encode %{
+    Label done;
+    __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
+    __ cmpl($dst$$Register, 0x80000000);
+    __ jccb(Assembler::notEqual, done);
+    __ subptr(rsp, 8);
+    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup())));
+    __ pop($dst$$Register);
+    __ bind(done);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11035,9 +10486,18 @@
             "call    d2l_fixup\n\t"
             "popq    $dst\n"
     "done:   "%}
-  opcode(0xF2, 0x0F, 0x2C);
-  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
-             d2l_fixup(dst, src));
+  ins_encode %{
+    Label done;
+    __ cvttsd2siq($dst$$Register, $src$$XMMRegister);
+    __ cmp64($dst$$Register,
+             ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
+    __ jccb(Assembler::notEqual, done);
+    __ subptr(rsp, 8);
+    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())));
+    __ pop($dst$$Register);
+    __ bind(done);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11047,8 +10507,9 @@
   match(Set dst (ConvI2F src));
 
   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
-  opcode(0xF3, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11057,8 +10518,9 @@
   match(Set dst (ConvI2F (LoadI src)));
 
   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
-  opcode(0xF3, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11068,8 +10530,9 @@
   match(Set dst (ConvI2D src));
 
   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
-  opcode(0xF2, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11078,8 +10541,9 @@
   match(Set dst (ConvI2D (LoadI src)));
 
   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
-  opcode(0xF2, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11116,8 +10580,9 @@
   match(Set dst (ConvL2F src));
 
   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
-  opcode(0xF3, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11126,8 +10591,9 @@
   match(Set dst (ConvL2F (LoadL src)));
 
   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
-  opcode(0xF3, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11136,8 +10602,9 @@
   match(Set dst (ConvL2D src));
 
   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
-  opcode(0xF2, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11146,8 +10613,9 @@
   match(Set dst (ConvL2D (LoadL src)));
 
   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
-  opcode(0xF2, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11186,7 +10654,11 @@
   match(Set dst (AndL (ConvI2L src) mask));
 
   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
-  ins_encode(enc_copy(dst, src));
+  ins_encode %{
+    if ($dst$$reg != $src$$reg) {
+      __ movl($dst$$Register, $src$$Register);
+    }
+  %}
   ins_pipe(ialu_reg_reg);
 %}
 
@@ -11196,8 +10668,9 @@
   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
 
   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
-  opcode(0x8B);
-  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
+  ins_encode %{
+    __ movl($dst$$Register, $src$$Address);
+  %}
   ins_pipe(ialu_reg_mem);
 %}
 
@@ -11206,7 +10679,9 @@
   match(Set dst (AndL src mask));
 
   format %{ "movl    $dst, $src\t# zero-extend long" %}
-  ins_encode(enc_copy_always(dst, src));
+  ins_encode %{
+    __ movl($dst$$Register, $src$$Register);
+  %}
   ins_pipe(ialu_reg_reg);
 %}
 
@@ -11215,7 +10690,9 @@
   match(Set dst (ConvL2I src));
 
   format %{ "movl    $dst, $src\t# l2i" %}
-  ins_encode(enc_copy_always(dst, src));
+  ins_encode %{
+    __ movl($dst$$Register, $src$$Register);
+  %}
   ins_pipe(ialu_reg_reg);
 %}
 
@@ -11226,8 +10703,9 @@
 
   ins_cost(125);
   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
-  opcode(0x8B);
-  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
+  ins_encode %{
+    __ movl($dst$$Register, Address(rsp, $src$$disp));
+  %}
   ins_pipe(ialu_reg_mem);
 %}
 
@@ -11237,8 +10715,9 @@
 
   ins_cost(125);
   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
-  opcode(0xF3, 0x0F, 0x10);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11248,8 +10727,9 @@
 
   ins_cost(125);
   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
-  opcode(0x8B);
-  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
+  ins_encode %{
+    __ movq($dst$$Register, Address(rsp, $src$$disp));
+  %}
   ins_pipe(ialu_reg_mem);
 %}
 
@@ -11260,8 +10740,9 @@
 
   ins_cost(125);
   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
-  opcode(0x66, 0x0F, 0x12);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11272,8 +10753,9 @@
 
   ins_cost(125);
   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
-  opcode(0xF2, 0x0F, 0x10);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11284,8 +10766,9 @@
 
   ins_cost(95); // XXX
   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
-  opcode(0xF3, 0x0F, 0x11);
-  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
+  ins_encode %{
+    __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11295,8 +10778,9 @@
 
   ins_cost(100);
   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
-  opcode(0x89);
-  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
+  ins_encode %{
+    __ movl(Address(rsp, $dst$$disp), $src$$Register);
+  %}
   ins_pipe( ialu_mem_reg );
 %}
 
@@ -11306,8 +10790,9 @@
 
   ins_cost(95); // XXX
   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
-  opcode(0xF2, 0x0F, 0x11);
-  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
+  ins_encode %{
+    __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11317,8 +10802,9 @@
 
   ins_cost(100);
   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
-  opcode(0x89);
-  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
+  ins_encode %{
+    __ movq(Address(rsp, $dst$$disp), $src$$Register);
+  %}
   ins_pipe(ialu_mem_reg);
 %}
 
@@ -11327,7 +10813,9 @@
   effect(DEF dst, USE src);
   ins_cost(85);
   format %{ "movd    $dst,$src\t# MoveF2I" %}
-  ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
+  ins_encode %{
+    __ movdl($dst$$Register, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -11336,7 +10824,9 @@
   effect(DEF dst, USE src);
   ins_cost(85);
   format %{ "movd    $dst,$src\t# MoveD2L" %}
-  ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
+  ins_encode %{
+    __ movdq($dst$$Register, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -11346,7 +10836,9 @@
   effect(DEF dst, USE src);
   ins_cost(300);
   format %{ "movd    $dst,$src\t# MoveI2F" %}
-  ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -11355,7 +10847,9 @@
   effect(DEF dst, USE src);
   ins_cost(300);
   format %{ "movd    $dst,$src\t# MoveL2D" %}
-  ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
+  ins_encode %{
+     __ movdq($dst$$XMMRegister, $src$$Register);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -11365,7 +10859,13 @@
   format %{ "MOVDQA  $dst,$src\n\t"
             "PUNPCKLBW $dst,$dst\n\t"
             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
-  ins_encode( pshufd_8x8(dst, src));
+  ins_encode %{
+    if ($dst$$reg != $src$$reg) {
+      __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
+    }
+    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -11375,7 +10875,11 @@
   format %{ "MOVD    $dst,$src\n\t"
             "PUNPCKLBW $dst,$dst\n\t"
             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
-  ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -11383,7 +10887,9 @@
 instruct Repl8B_immI0(regD dst, immI0 zero) %{
   match(Set dst (Replicate8B zero));
   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11391,7 +10897,9 @@
 instruct Repl4S_reg(regD dst, regD src) %{
   match(Set dst (Replicate4S src));
   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
-  ins_encode( pshufd_4x16(dst, src));
+  ins_encode %{
+    __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11400,7 +10908,10 @@
   match(Set dst (Replicate4S src));
   format %{ "MOVD    $dst,$src\n\t"
             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
-  ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11408,7 +10919,9 @@
 instruct Repl4S_immI0(regD dst, immI0 zero) %{
   match(Set dst (Replicate4S zero));
   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11416,7 +10929,9 @@
 instruct Repl4C_reg(regD dst, regD src) %{
   match(Set dst (Replicate4C src));
   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
-  ins_encode( pshufd_4x16(dst, src));
+  ins_encode %{
+    __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11425,7 +10940,10 @@
   match(Set dst (Replicate4C src));
   format %{ "MOVD    $dst,$src\n\t"
             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
-  ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11433,7 +10951,9 @@
 instruct Repl4C_immI0(regD dst, immI0 zero) %{
   match(Set dst (Replicate4C zero));
   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11441,7 +10961,9 @@
 instruct Repl2I_reg(regD dst, regD src) %{
   match(Set dst (Replicate2I src));
   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
-  ins_encode( pshufd(dst, src, 0x00));
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11450,7 +10972,10 @@
   match(Set dst (Replicate2I src));
   format %{ "MOVD   $dst,$src\n\t"
             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
-  ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11458,7 +10983,9 @@
 instruct Repl2I_immI0(regD dst, immI0 zero) %{
   match(Set dst (Replicate2I zero));
   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11466,7 +10993,9 @@
 instruct Repl2F_reg(regD dst, regD src) %{
   match(Set dst (Replicate2F src));
   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
-  ins_encode( pshufd(dst, src, 0xe0));
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11474,7 +11003,9 @@
 instruct Repl2F_regF(regD dst, regF src) %{
   match(Set dst (Replicate2F src));
   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
-  ins_encode( pshufd(dst, src, 0xe0));
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11482,7 +11013,9 @@
 instruct Repl2F_immF0(regD dst, immF0 zero) %{
   match(Set dst (Replicate2F zero));
   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}