[SCCP] Correct range calculation for get.vector.length to use getUnsignedMax instead of getUpper. (#176493)
getUpper returns 1 more than the maxium value included in the range.
This may be 0. We should not use this in a umin. Instead we should
get the maximum value included in the range and use that for the umin.
Then convert that to Upper for the new range by adding 1.
The test was manually reduced from a downstream failure, but I couldn't
get it behave exactly the same way without more instructions. It should
be enough to show an incorrect range being calculated.
Fixes #176471
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index 90ee55b..0ac9941 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -2121,9 +2121,10 @@
MaxLanes.multiply(getVScaleRange(II->getFunction(), BitWidth));
// The result is always less than both Count and MaxLanes.
- ConstantRange Result(
+ ConstantRange Result = ConstantRange::getNonEmpty(
APInt::getZero(BitWidth),
- APIntOps::umin(Count.getUpper(), MaxLanes.getUpper()));
+ APIntOps::umin(Count.getUnsignedMax(), MaxLanes.getUnsignedMax()) +
+ 1);
// If Count <= MaxLanes, getvectorlength(Count, MaxLanes) = Count
if (Count.icmp(CmpInst::ICMP_ULE, MaxLanes))
diff --git a/llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll b/llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll
index d074116..002cf9b 100644
--- a/llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll
+++ b/llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll
@@ -109,7 +109,7 @@
; Can't simplify because %iv isn't guaranteed <= max lanes.
define i32 @count_le_max_lanes_scalable_unknown() {
-; CHECK-LABEL: define range(i32 0, -1) i32 @count_le_max_lanes_scalable_unknown() {
+; CHECK-LABEL: define i32 @count_le_max_lanes_scalable_unknown() {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
@@ -145,3 +145,42 @@
%res = icmp ule i32 %x, 3
ret i1 %res
}
+
+; This test case was manually reduced from a downstream failure where the
+; intrinsic call was constant folded to 4. Reproducing this exactly requires
+; very specific visitation order. The reduction here was only able to show an
+; incorrect result range of [4, 13) being calculated. The correct result range
+; must contain [4, 4097).
+define i32 @incorrect_result_range(i32 %x) vscale_range(16, 1024) {
+; CHECK-LABEL: define range(i32 0, 4097) i32 @incorrect_result_range(
+; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[OFFSET:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[OFFSET_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[OFFSET]], 3
+; CHECK-NEXT: [[LEN:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[ADD]], i32 4, i1 true)
+; CHECK-NEXT: [[OFFSET_NEXT]] = add nuw nsw i32 [[OFFSET]], 4
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[X]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret i32 [[LEN]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [0, %entry], [%iv.next, %loop]
+ %offset = phi i32 [1, %entry], [%offset.next, %loop]
+ %add = add nuw nsw i32 %offset, 3
+ %len = call i32 @llvm.experimental.get.vector.length(i32 %add, i32 4, i1 true)
+ %offset.next = add nuw nsw i32 %offset, 4
+ %iv.next = add nuw nsw i32 %iv, 4
+ %ec = icmp eq i32 %iv.next, %x
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret i32 %len
+}