[DAGCombine] Simplify funnel shifts with undef/zero args to bitshifts Now that we have SimplifyDemandedBits support for funnel shifts (rL353539), we need to simplify funnel shifts back to bitshifts in cases where either argument has been folded to undef/zero. Differential Revision: https://reviews.llvm.org/D58009 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@353645 91177308-0d34-0410-b5e6-96231b3b80d8

commit: bdd96a725f7787ec0e6e232545bec8afcbd3d897 [log] [tgz]
author: Simon Pilgrim <llvm-dev@redking.me.uk> Sun Feb 10 17:04:00 2019 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> Sun Feb 10 17:04:00 2019 +0000
tree: bc578306ba98c3edf662fc08dd97722ab4f95e73
parent: 618693a06bf3a0d4d3d7e6c22798a69329c01db9 [diff]
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 96b6aec..be867b2 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

@@ -7127,13 +7127,52 @@
             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
       return IsFSHL ? N0 : N1;
 
-  // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
+  auto IsUndefOrZero = [](SDValue V) {
+    if (V.isUndef())
+      return true;
+    if (ConstantSDNode *Cst = isConstOrConstSplat(V, /*AllowUndefs*/true))
+      return Cst->getAPIntValue() == 0;
+    return false;
+  };
+
   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
+    EVT ShAmtTy = N2.getValueType();
+
+    // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
     if (Cst->getAPIntValue().uge(BitWidth)) {
       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
-                         DAG.getConstant(RotAmt, SDLoc(N), N2.getValueType()));
+                         DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
     }
+
+    unsigned ShAmt = Cst->getZExtValue();
+    if (ShAmt == 0)
+      return IsFSHL ? N0 : N1;
+
+    // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
+    // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
+    // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
+    // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
+    if (IsUndefOrZero(N0))
+      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
+                         DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
+                                         SDLoc(N), ShAmtTy));
+    if (IsUndefOrZero(N1))
+      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
+                         DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
+                                         SDLoc(N), ShAmtTy));
+  }
+
+  // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
+  // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
+  // iff We know the shift amount is in range.
+  // TODO: when is it worth doing SUB(BW, N2) as well?
+  if (isPowerOf2_32(BitWidth)) {
+    APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
+    if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
+      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
+    if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
+      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
   }
 
   // fold (fshl N0, N0, N2) -> (rotl N0, N2)

diff --git a/test/CodeGen/X86/funnel-shift.ll b/test/CodeGen/X86/funnel-shift.ll
index 531885d..517880f 100644
--- a/test/CodeGen/X86/funnel-shift.ll
+++ b/test/CodeGen/X86/funnel-shift.ll

@@ -404,12 +404,13 @@
 ; X32-SSE2-LABEL: fshl_i32_undef0_cst:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    shldl $9, %eax, %eax
+; X32-SSE2-NEXT:    shrl $23, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshl_i32_undef0_cst:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    shldl $9, %edi, %eax
+; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shrl $23, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 9)
   ret i32 %res
@@ -438,19 +439,18 @@
 ; X32-SSE2-LABEL: fshl_i32_undef1_msk:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT:    andl $7, %ecx
-; X32-SSE2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X32-SSE2-NEXT:    shldl %cl, %eax, %eax
+; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X32-SSE2-NEXT:    andb $7, %cl
+; X32-SSE2-NEXT:    shll %cl, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshl_i32_undef1_msk:
 ; X64-AVX2:       # %bb.0:
 ; X64-AVX2-NEXT:    movl %esi, %ecx
 ; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    andl $7, %ecx
+; X64-AVX2-NEXT:    andb $7, %cl
 ; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shldl %cl, %eax, %eax
+; X64-AVX2-NEXT:    shll %cl, %eax
 ; X64-AVX2-NEXT:    retq
   %m = and i32 %a1, 7
   %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 %m)
@@ -461,13 +461,13 @@
 ; X32-SSE2-LABEL: fshl_i32_undef1_cst:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    shldl $9, %eax, %eax
+; X32-SSE2-NEXT:    shll $9, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshl_i32_undef1_cst:
 ; X64-AVX2:       # %bb.0:
 ; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    shldl $9, %eax, %eax
+; X64-AVX2-NEXT:    shll $9, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 9)
   ret i32 %res
@@ -513,19 +513,18 @@
 ; X32-SSE2-LABEL: fshr_i32_undef0_msk:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT:    andl $7, %ecx
-; X32-SSE2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X32-SSE2-NEXT:    shrdl %cl, %eax, %eax
+; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X32-SSE2-NEXT:    andb $7, %cl
+; X32-SSE2-NEXT:    shrl %cl, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshr_i32_undef0_msk:
 ; X64-AVX2:       # %bb.0:
 ; X64-AVX2-NEXT:    movl %esi, %ecx
 ; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    andl $7, %ecx
+; X64-AVX2-NEXT:    andb $7, %cl
 ; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shrdl %cl, %eax, %eax
+; X64-AVX2-NEXT:    shrl %cl, %eax
 ; X64-AVX2-NEXT:    retq
   %m = and i32 %a1, 7
   %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 %m)
@@ -536,13 +535,13 @@
 ; X32-SSE2-LABEL: fshr_i32_undef0_cst:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    shrdl $9, %eax, %eax
+; X32-SSE2-NEXT:    shrl $9, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshr_i32_undef0_cst:
 ; X64-AVX2:       # %bb.0:
 ; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    shrdl $9, %eax, %eax
+; X64-AVX2-NEXT:    shrl $9, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 9)
   ret i32 %res
@@ -592,12 +591,13 @@
 ; X32-SSE2-LABEL: fshr_i32_undef1_cst:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    shrdl $9, %eax, %eax
+; X32-SSE2-NEXT:    shll $23, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshr_i32_undef1_cst:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    shrdl $9, %edi, %eax
+; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shll $23, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 9)
   ret i32 %res
@@ -645,15 +645,14 @@
 define i32 @fshl_i32_zero0_cst(i32 %a0) nounwind {
 ; X32-SSE2-LABEL: fshl_i32_zero0_cst:
 ; X32-SSE2:       # %bb.0:
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT:    xorl %eax, %eax
-; X32-SSE2-NEXT:    shldl $9, %ecx, %eax
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    shrl $23, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshl_i32_zero0_cst:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    xorl %eax, %eax
-; X64-AVX2-NEXT:    shldl $9, %edi, %eax
+; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shrl $23, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 0, i32 %a0, i32 9)
   ret i32 %res
@@ -683,15 +682,14 @@
 define i32 @fshl_i32_zero1_cst(i32 %a0) nounwind {
 ; X32-SSE2-LABEL: fshl_i32_zero1_cst:
 ; X32-SSE2:       # %bb.0:
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT:    xorl %eax, %eax
-; X32-SSE2-NEXT:    shrdl $23, %ecx, %eax
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    shll $9, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshl_i32_zero1_cst:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    xorl %eax, %eax
-; X64-AVX2-NEXT:    shrdl $23, %edi, %eax
+; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shll $9, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 %a0, i32 0, i32 9)
   ret i32 %res
@@ -721,15 +719,14 @@
 define i32 @fshr_i32_zero0_cst(i32 %a0) nounwind {
 ; X32-SSE2-LABEL: fshr_i32_zero0_cst:
 ; X32-SSE2:       # %bb.0:
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT:    xorl %eax, %eax
-; X32-SSE2-NEXT:    shldl $23, %ecx, %eax
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    shrl $9, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshr_i32_zero0_cst:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    xorl %eax, %eax
-; X64-AVX2-NEXT:    shldl $23, %edi, %eax
+; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shrl $9, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 0, i32 %a0, i32 9)
   ret i32 %res
@@ -758,15 +755,14 @@
 define i32 @fshr_i32_zero1_cst(i32 %a0) nounwind {
 ; X32-SSE2-LABEL: fshr_i32_zero1_cst:
 ; X32-SSE2:       # %bb.0:
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT:    xorl %eax, %eax
-; X32-SSE2-NEXT:    shrdl $9, %ecx, %eax
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    shll $23, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshr_i32_zero1_cst:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    xorl %eax, %eax
-; X64-AVX2-NEXT:    shrdl $9, %edi, %eax
+; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shll $23, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 %a0, i32 0, i32 9)
   ret i32 %res
commit	bdd96a725f7787ec0e6e232545bec8afcbd3d897	[log] [tgz]
author	Simon Pilgrim <llvm-dev@redking.me.uk>	Sun Feb 10 17:04:00 2019 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	Sun Feb 10 17:04:00 2019 +0000
tree	bc578306ba98c3edf662fc08dd97722ab4f95e73
parent	618693a06bf3a0d4d3d7e6c22798a69329c01db9 [diff]