[AVX-512] Split the 3 different usages of the X86ISD::FSETCC opcode into 3 different opcodes.

It turns out isel is really not robust against having different type profiles for the same opcode. It turns out that if you put an illegal rounding mode(i.e. not CUR_DIRECTION or NO_EXC) on a comiss intrinsic we would generate the FSETCC form with the rounding mode added, but then pattern match to an instruction with ROUND_CUR_DIRECTION.

We can probably get away with just one FSETCCM opcode that always contains the rounding mode and explicitly put ROUND_CUR_DIRECTION in the pattern, but I'll leave that for future work.

With this change the clang tests for the comiss intrinsics that used an incorrect rounding mode of 3 properly fail isel instead of silently doing the wrong thing. Those clang tests will be fixed in a follow up commit and I also plan to add rounding mode checking to clang.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@282055 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 19e010a..9477f4d 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -16014,8 +16014,8 @@
 
     if (SSECC != 8) {
       if (Subtarget.hasAVX512()) {
-        SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CondOp0, CondOp1,
-                                  DAG.getConstant(SSECC, DL, MVT::i8));
+        SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::i1, CondOp0,
+                                  CondOp1, DAG.getConstant(SSECC, DL, MVT::i8));
         return DAG.getNode(X86ISD::SELECT, DL, VT, Cmp, Op1, Op2);
       }
 
@@ -17541,7 +17541,8 @@
   // The mask should be of type MVT::i1
   SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask);
 
-  if (Op.getOpcode() == X86ISD::FSETCC)
+  if (Op.getOpcode() == X86ISD::FSETCCM ||
+      Op.getOpcode() == X86ISD::FSETCCM_RND)
     return DAG.getNode(ISD::AND, dl, VT, Op, IMask);
   if (Op.getOpcode() == X86ISD::VFPCLASS ||
       Op.getOpcode() == X86ISD::VFPCLASSS)
@@ -18108,10 +18109,10 @@
       SDValue FCmp;
       if (cast<ConstantSDNode>(Sae)->getZExtValue() ==
           X86::STATIC_ROUNDING::CUR_DIRECTION)
-        FCmp = DAG.getNode(X86ISD::FSETCC, dl, MVT::i1, LHS, RHS,
+        FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::i1, LHS, RHS,
                                   DAG.getConstant(CondVal, dl, MVT::i8));
       else
-        FCmp = DAG.getNode(X86ISD::FSETCC, dl, MVT::i1, LHS, RHS,
+        FCmp = DAG.getNode(X86ISD::FSETCCM_RND, dl, MVT::i1, LHS, RHS,
                                   DAG.getConstant(CondVal, dl, MVT::i8), Sae);
       // AnyExt just uses KMOVW %kreg, %r32; ZeroExt emits "and $1, %reg"
       return DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, FCmp);
@@ -22437,6 +22438,8 @@
   case X86ISD::SETCC:              return "X86ISD::SETCC";
   case X86ISD::SETCC_CARRY:        return "X86ISD::SETCC_CARRY";
   case X86ISD::FSETCC:             return "X86ISD::FSETCC";
+  case X86ISD::FSETCCM:            return "X86ISD::FSETCCM";
+  case X86ISD::FSETCCM_RND:        return "X86ISD::FSETCCM_RND";
   case X86ISD::CMOV:               return "X86ISD::CMOV";
   case X86ISD::BRCOND:             return "X86ISD::BRCOND";
   case X86ISD::RET_FLAG:           return "X86ISD::RET_FLAG";
@@ -28483,7 +28486,7 @@
           // See X86ATTInstPrinter.cpp:printSSECC().
           unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
           if (Subtarget.hasAVX512()) {
-            SDValue FSetCC = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CMP00,
+            SDValue FSetCC = DAG.getNode(X86ISD::FSETCCM, DL, MVT::i1, CMP00,
                                          CMP01,
                                          DAG.getConstant(x86cc, DL, MVT::i8));
             if (N->getValueType(0) != MVT::i1)
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 3750281..ab32487 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -106,6 +106,10 @@
       /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
       FSETCC,
 
+      /// X86 FP SETCC, similar to above, but with output as an i1 mask and
+      /// with optional rounding mode.
+      FSETCCM, FSETCCM_RND,
+
       /// X86 conditional moves. Operand 0 and operand 1 are the two values
       /// to select from. Operand 2 is the condition code, and operand 3 is the
       /// flag operand produced by a CMP or TEST instruction. It also writes a
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index f8001fb..17c7e35 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -199,8 +199,8 @@
 def X86cmpm     : SDNode<"X86ISD::CMPM",     X86CmpMaskCC>;
 def X86cmpmRnd  : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>;
 def X86cmpmu    : SDNode<"X86ISD::CMPMU",    X86CmpMaskCC>;
-def X86cmpms    : SDNode<"X86ISD::FSETCC",   X86CmpMaskCCScalar>;
-def X86cmpmsRnd : SDNode<"X86ISD::FSETCC",   X86CmpMaskCCScalarRound>;
+def X86cmpms    : SDNode<"X86ISD::FSETCCM",   X86CmpMaskCCScalar>;
+def X86cmpmsRnd : SDNode<"X86ISD::FSETCCM_RND",   X86CmpMaskCCScalarRound>;
 
 def X86vshl    : SDNode<"X86ISD::VSHL",
                         SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index 6ab562c..ac0e1e8 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -436,10 +436,10 @@
   X86_INTRINSIC_DATA(avx512_mask_cmp_q_128,  CMP_MASK_CC, X86ISD::CMPM, 0),
   X86_INTRINSIC_DATA(avx512_mask_cmp_q_256,  CMP_MASK_CC, X86ISD::CMPM, 0),
   X86_INTRINSIC_DATA(avx512_mask_cmp_q_512,  CMP_MASK_CC, X86ISD::CMPM, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cmp_sd,     CMP_MASK_SCALAR_CC, X86ISD::FSETCC,
-                     X86ISD::FSETCC),
-  X86_INTRINSIC_DATA(avx512_mask_cmp_ss,     CMP_MASK_SCALAR_CC, X86ISD::FSETCC,
-                     X86ISD::FSETCC),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_sd,     CMP_MASK_SCALAR_CC,
+                     X86ISD::FSETCCM, X86ISD::FSETCCM_RND),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_ss,     CMP_MASK_SCALAR_CC,
+                     X86ISD::FSETCCM, X86ISD::FSETCCM_RND),
   X86_INTRINSIC_DATA(avx512_mask_cmp_w_128,  CMP_MASK_CC, X86ISD::CMPM, 0),
   X86_INTRINSIC_DATA(avx512_mask_cmp_w_256,  CMP_MASK_CC, X86ISD::CMPM, 0),
   X86_INTRINSIC_DATA(avx512_mask_cmp_w_512,  CMP_MASK_CC, X86ISD::CMPM, 0),