[X86] Auto upgrade old style VPCOM/VPCOMU intrinsics to generic integer comparisons

We were upgrading these to the new style VPCOM/VPCOMU intrinsics (which includes the condition code immediate), but we'll be getting rid of those shortly, so convert these to generics first.

This causes a couple of changes in the upgrade tests as signed/unsigned eq/ne are equivalent and we constant fold true/false codes, these changes are the same as what we already do for avx512 cmp/ucmp.

Noticed while cleaning up vector integer comparison costs for PR40376.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@351690 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp
index 321c8c3..b28e9f5 100644
--- a/lib/IR/AutoUpgrade.cpp
+++ b/lib/IR/AutoUpgrade.cpp
@@ -1052,6 +1052,45 @@
   return Res;
 }
 
+static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
+                              bool IsSigned) {
+  Type *Ty = CI.getType();
+  Value *LHS = CI.getArgOperand(0);
+  Value *RHS = CI.getArgOperand(1);
+
+  CmpInst::Predicate Pred;
+  switch (Imm) {
+  case 0x0:
+    Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+    break;
+  case 0x1:
+    Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+    break;
+  case 0x2:
+    Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+    break;
+  case 0x3:
+    Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
+    break;
+  case 0x4:
+    Pred = ICmpInst::ICMP_EQ;
+    break;
+  case 0x5:
+    Pred = ICmpInst::ICMP_NE;
+    break;
+  case 0x6:
+    return Constant::getNullValue(Ty); // FALSE
+  case 0x7:
+    return Constant::getAllOnesValue(Ty); // TRUE
+  default:
+    llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
+  }
+
+  Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
+  Value *Ext = Builder.CreateSExt(Cmp, Ty);
+  return Ext;
+}
+
 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
                                     bool IsShiftRight, bool ZeroMask) {
   Type *Ty = CI.getType();
@@ -1989,23 +2028,13 @@
                                                 ResultTy);
       Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
     } else if (IsX86 && Name.startswith("xop.vpcom")) {
-      Intrinsic::ID intID;
-      if (Name.endswith("ub"))
-        intID = Intrinsic::x86_xop_vpcomub;
-      else if (Name.endswith("uw"))
-        intID = Intrinsic::x86_xop_vpcomuw;
-      else if (Name.endswith("ud"))
-        intID = Intrinsic::x86_xop_vpcomud;
-      else if (Name.endswith("uq"))
-        intID = Intrinsic::x86_xop_vpcomuq;
-      else if (Name.endswith("b"))
-        intID = Intrinsic::x86_xop_vpcomb;
-      else if (Name.endswith("w"))
-        intID = Intrinsic::x86_xop_vpcomw;
-      else if (Name.endswith("d"))
-        intID = Intrinsic::x86_xop_vpcomd;
-      else if (Name.endswith("q"))
-        intID = Intrinsic::x86_xop_vpcomq;
+      bool IsSigned;
+      if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
+          Name.endswith("uq"))
+        IsSigned = false;
+      else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
+               Name.endswith("q"))
+        IsSigned = true;
       else
         llvm_unreachable("Unknown suffix");
 
@@ -2029,11 +2058,7 @@
         Imm = 7;
       else
         llvm_unreachable("Unknown condition");
-
-      Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
-      Rep =
-          Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
-                                     Builder.getInt8(Imm)});
+      Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
     } else if (IsX86 && Name.startswith("xop.vpcmov")) {
       Value *Sel = CI->getArgOperand(2);
       Value *NotSel = Builder.CreateNot(Sel);
diff --git a/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll b/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll
index 0f34836..a9f237a 100644
--- a/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll
+++ b/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll
@@ -129,7 +129,7 @@
 define <16 x i8> @test_int_x86_xop_vpcomequb(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomequb:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomequb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpcomeqb %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8> %a0, <16 x i8> %a1) ;
   ret <16 x i8> %res
@@ -139,7 +139,7 @@
 define <4 x i32> @test_int_x86_xop_vpcomequd(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomequd:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomequd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpcomeqd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32> %a0, <4 x i32> %a1) ;
   ret <4 x i32> %res
@@ -149,7 +149,7 @@
 define <2 x i64> @test_int_x86_xop_vpcomequq(<2 x i64> %a0, <2 x i64> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomequq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomequq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpcomeqq %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64> %a0, <2 x i64> %a1) ;
   ret <2 x i64> %res
@@ -159,7 +159,7 @@
 define <8 x i16> @test_int_x86_xop_vpcomequw(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomequw:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomequw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpcomeqw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a0, <8 x i16> %a1) ;
   ret <8 x i16> %res
@@ -169,7 +169,7 @@
 define <16 x i8> @test_int_x86_xop_vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomfalseb:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomfalseb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) ;
   ret <16 x i8> %res
@@ -179,7 +179,7 @@
 define <4 x i32> @test_int_x86_xop_vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomfalsed:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomfalsed %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) ;
   ret <4 x i32> %res
@@ -189,7 +189,7 @@
 define <2 x i64> @test_int_x86_xop_vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomfalseq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomfalseq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) ;
   ret <2 x i64> %res
@@ -199,7 +199,7 @@
 define <16 x i8> @test_int_x86_xop_vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomfalseub:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomfalseub %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) ;
   ret <16 x i8> %res
@@ -209,7 +209,7 @@
 define <4 x i32> @test_int_x86_xop_vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomfalseud:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomfalseud %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) ;
   ret <4 x i32> %res
@@ -219,7 +219,7 @@
 define <2 x i64> @test_int_x86_xop_vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomfalseuq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomfalseuq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) ;
   ret <2 x i64> %res
@@ -229,7 +229,7 @@
 define <8 x i16> @test_int_x86_xop_vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomfalseuw:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomfalseuw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) ;
   ret <8 x i16> %res
@@ -239,7 +239,7 @@
 define <8 x i16> @test_int_x86_xop_vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomfalsew:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomfalsew %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) ;
   ret <8 x i16> %res
@@ -599,7 +599,7 @@
 define <16 x i8> @test_int_x86_xop_vpcomneub(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomneub:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomnequb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpcomneqb %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8> %a0, <16 x i8> %a1) ;
   ret <16 x i8> %res
@@ -609,7 +609,7 @@
 define <4 x i32> @test_int_x86_xop_vpcomneud(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomneud:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomnequd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpcomneqd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32> %a0, <4 x i32> %a1) ;
   ret <4 x i32> %res
@@ -619,7 +619,7 @@
 define <2 x i64> @test_int_x86_xop_vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomneuq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomnequq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpcomneqq %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) ;
   ret <2 x i64> %res
@@ -629,7 +629,7 @@
 define <8 x i16> @test_int_x86_xop_vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomneuw:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomnequw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpcomneqw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) ;
   ret <8 x i16> %res
@@ -649,7 +649,7 @@
 define <16 x i8> @test_int_x86_xop_vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomtrueb:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomtrueb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) ;
   ret <16 x i8> %res
@@ -659,7 +659,7 @@
 define <4 x i32> @test_int_x86_xop_vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomtrued:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomtrued %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) ;
   ret <4 x i32> %res
@@ -669,7 +669,7 @@
 define <2 x i64> @test_int_x86_xop_vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomtrueq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomtrueq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) ;
   ret <2 x i64> %res
@@ -679,7 +679,7 @@
 define <16 x i8> @test_int_x86_xop_vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomtrueub:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomtrueub %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) ;
   ret <16 x i8> %res
@@ -689,7 +689,7 @@
 define <4 x i32> @test_int_x86_xop_vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomtrueud:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomtrueud %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) ;
   ret <4 x i32> %res
@@ -699,7 +699,7 @@
 define <2 x i64> @test_int_x86_xop_vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomtrueuq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomtrueuq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) ;
   ret <2 x i64> %res
@@ -709,7 +709,7 @@
 define <8 x i16> @test_int_x86_xop_vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomtrueuw:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomtrueuw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) ;
   ret <8 x i16> %res
@@ -719,7 +719,7 @@
 define <8 x i16> @test_int_x86_xop_vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_int_x86_xop_vpcomtruew:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcomtruew %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) ;
   ret <8 x i16> %res