[InstCombine] Try to narrow expr if trunc cannot be removed.

Narrowing an input expression of a truncate to a type larger than the
result of the truncate won't allow removing the truncate, but it may
enable further optimizations, e.g. allowing for larger vectorization
factors.

For now this is intentionally limited to integer types only, to avoid
producing new vector ops that might not be suitable for the target.

If we know that the only user is a trunc, we can also be allow more
cases, e.g. also shortening expressions with some additional shifts.

I would appreciate feedback on the best place to do such a narrowing.

This fixes PR43580.

Reviewers: spatel, RKSimon, lebedev.ri, xbolva00

Reviewed By: lebedev.ri

Differential Revision: https://reviews.llvm.org/D82973
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 7b3c503..3639edb 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -715,6 +715,25 @@
     return replaceInstUsesWith(Trunc, Res);
   }
 
+  // For integer types, check if we can shorten the entire input expression to
+  // DestWidth * 2, which won't allow removing the truncate, but reducing the
+  // width may enable further optimizations, e.g. allowing for larger
+  // vectorization factors.
+  if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {
+    if (DestWidth * 2 < SrcWidth) {
+      auto *NewDestTy = DestITy->getExtendedType();
+      if (shouldChangeType(SrcTy, NewDestTy) &&
+          canEvaluateTruncated(Src, NewDestTy, *this, &Trunc)) {
+        LLVM_DEBUG(
+            dbgs() << "ICE: EvaluateInDifferentType converting expression type"
+                      " to reduce the width of operand of"
+                   << Trunc << '\n');
+        Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);
+        return new TruncInst(Res, DestTy);
+      }
+    }
+  }
+
   // Test if the trunc is the user of a select which is part of a
   // minimum or maximum operation. If so, don't do any more simplification.
   // Even simplifying demanded bits can break the canonical form of a
diff --git a/llvm/test/Transforms/InstCombine/trunc-binop-ext.ll b/llvm/test/Transforms/InstCombine/trunc-binop-ext.ll
index dfc4160..5701531 100644
--- a/llvm/test/Transforms/InstCombine/trunc-binop-ext.ll
+++ b/llvm/test/Transforms/InstCombine/trunc-binop-ext.ll
@@ -318,11 +318,11 @@
 ; Test cases for PR43580
 define i8 @narrow_zext_ashr_keep_trunc(i8 %i1, i8 %i2) {
 ; CHECK-LABEL: @narrow_zext_ashr_keep_trunc(
-; CHECK-NEXT:    [[I1_EXT:%.*]] = sext i8 [[I1:%.*]] to i32
-; CHECK-NEXT:    [[I2_EXT:%.*]] = sext i8 [[I2:%.*]] to i32
-; CHECK-NEXT:    [[SUB:%.*]] = add nsw i32 [[I1_EXT]], [[I2_EXT]]
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[SUB]], 1
-; CHECK-NEXT:    [[T:%.*]] = trunc i32 [[TMP1]] to i8
+; CHECK-NEXT:    [[I1_EXT:%.*]] = sext i8 [[I1:%.*]] to i16
+; CHECK-NEXT:    [[I2_EXT:%.*]] = sext i8 [[I2:%.*]] to i16
+; CHECK-NEXT:    [[SUB:%.*]] = add nsw i16 [[I1_EXT]], [[I2_EXT]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i16 [[SUB]], 1
+; CHECK-NEXT:    [[T:%.*]] = trunc i16 [[TMP1]] to i8
 ; CHECK-NEXT:    ret i8 [[T]]
 ;
   %i1.ext = sext i8 %i1 to i32
@@ -335,11 +335,11 @@
 
 define i8 @narrow_zext_ashr_keep_trunc2(i9 %i1, i9 %i2) {
 ; CHECK-LABEL: @narrow_zext_ashr_keep_trunc2(
-; CHECK-NEXT:    [[I1_EXT1:%.*]] = zext i9 [[I1:%.*]] to i64
-; CHECK-NEXT:    [[I2_EXT2:%.*]] = zext i9 [[I2:%.*]] to i64
-; CHECK-NEXT:    [[SUB:%.*]] = add nuw nsw i64 [[I1_EXT1]], [[I2_EXT2]]
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[SUB]], 1
-; CHECK-NEXT:    [[T:%.*]] = trunc i64 [[TMP1]] to i8
+; CHECK-NEXT:    [[I1_EXT1:%.*]] = zext i9 [[I1:%.*]] to i16
+; CHECK-NEXT:    [[I2_EXT2:%.*]] = zext i9 [[I2:%.*]] to i16
+; CHECK-NEXT:    [[SUB:%.*]] = add nuw nsw i16 [[I1_EXT1]], [[I2_EXT2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i16 [[SUB]], 1
+; CHECK-NEXT:    [[T:%.*]] = trunc i16 [[TMP1]] to i8
 ; CHECK-NEXT:    ret i8 [[T]]
 ;
   %i1.ext = sext i9 %i1 to i64
@@ -352,11 +352,11 @@
 
 define i7 @narrow_zext_ashr_keep_trunc3(i8 %i1, i8 %i2) {
 ; CHECK-LABEL: @narrow_zext_ashr_keep_trunc3(
-; CHECK-NEXT:    [[I1_EXT1:%.*]] = zext i8 [[I1:%.*]] to i64
-; CHECK-NEXT:    [[I2_EXT2:%.*]] = zext i8 [[I2:%.*]] to i64
-; CHECK-NEXT:    [[SUB:%.*]] = add nuw nsw i64 [[I1_EXT1]], [[I2_EXT2]]
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[SUB]], 1
-; CHECK-NEXT:    [[T:%.*]] = trunc i64 [[TMP1]] to i7
+; CHECK-NEXT:    [[I1_EXT1:%.*]] = zext i8 [[I1:%.*]] to i14
+; CHECK-NEXT:    [[I2_EXT2:%.*]] = zext i8 [[I2:%.*]] to i14
+; CHECK-NEXT:    [[SUB:%.*]] = add nuw nsw i14 [[I1_EXT1]], [[I2_EXT2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i14 [[SUB]], 1
+; CHECK-NEXT:    [[T:%.*]] = trunc i14 [[TMP1]] to i7
 ; CHECK-NEXT:    ret i7 [[T]]
 ;
   %i1.ext = sext i8 %i1 to i64