[X86] Directly emit VPTERNLOG from canonicalizeBitSelect when possible. Seems to produce better results on some rotate tests. And is neutral for other tests.

commit: fed432523edfb29db0c4e28552695446d8cc4b1b [log] [tgz]
author: Craig Topper <craig.topper@intel.com> Fri Jul 03 22:08:28 2020 -0700
committer: Craig Topper <craig.topper@intel.com> Fri Jul 03 22:08:28 2020 -0700
tree: 92f575818ea8b0620cfcf98245ee330dac231b12
parent: c352e0885a6b402e5d92cb82f0d0c4e933626b45 [diff]
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0b84635..1146b61 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp

@@ -42782,6 +42782,16 @@
   }
 
   SDLoc DL(N);
+
+  if (UseVPTERNLOG) {
+    // Emit a VPTERNLOG node directly.
+    SDValue A = DAG.getBitcast(VT, N0.getOperand(1));
+    SDValue B = DAG.getBitcast(VT, N0.getOperand(0));
+    SDValue C = DAG.getBitcast(VT, N1.getOperand(0));
+    SDValue Imm = DAG.getTargetConstant(0xCA, DL, MVT::i8);
+    return DAG.getNode(X86ISD::VPTERNLOG, DL, VT, A, B, C, Imm);
+  }
+
   SDValue X = N->getOperand(0);
   SDValue Y =
       DAG.getNode(X86ISD::ANDNP, DL, VT, DAG.getBitcast(VT, N0.getOperand(1)),

diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll
index 1807646..2980a16 100644
--- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll
+++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll

@@ -1786,12 +1786,10 @@
 define <32 x i8> @splatconstant_rotate_mask_v32i8(<32 x i8> %a) nounwind "min-legal-vector-width"="256" {
 ; CHECK-LABEL: splatconstant_rotate_mask_v32i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmovdqa {{.*#+}} ymm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; CHECK-NEXT:    vpsllw $4, %ymm0, %ymm2
-; CHECK-NEXT:    vpand %ymm1, %ymm2, %ymm2
+; CHECK-NEXT:    vpsllw $4, %ymm0, %ymm1
 ; CHECK-NEXT:    vpsrlw $4, %ymm0, %ymm0
-; CHECK-NEXT:    vpandn %ymm0, %ymm1, %ymm0
-; CHECK-NEXT:    vpternlogq $168, {{.*}}(%rip), %ymm2, %ymm0
+; CHECK-NEXT:    vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0
+; CHECK-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
 ; CHECK-NEXT:    retq
   %shl = shl <32 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
   %lshr = lshr <32 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>

diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll
index d27d398..31fe575 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-256.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll

@@ -1802,11 +1802,9 @@
 ; AVX512VL-LABEL: splatconstant_rotate_mask_v32i8:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm1
-; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT:    vpand %ymm2, %ymm1, %ymm1
 ; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpandn %ymm0, %ymm2, %ymm0
-; AVX512VL-NEXT:    vpternlogq $168, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT:    vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: splatconstant_rotate_mask_v32i8:
commit	fed432523edfb29db0c4e28552695446d8cc4b1b	[log] [tgz]
author	Craig Topper <craig.topper@intel.com>	Fri Jul 03 22:08:28 2020 -0700
committer	Craig Topper <craig.topper@intel.com>	Fri Jul 03 22:08:28 2020 -0700
tree	92f575818ea8b0620cfcf98245ee330dac231b12
parent	c352e0885a6b402e5d92cb82f0d0c4e933626b45 [diff]