blob: 8b9a5268990411709b80e27276e5134bf5535a1d [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 6
; RUN: opt -passes=loop-vectorize -enable-mem-access-versioning -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s
; Check that we version this loop with speculating the value 1 for symbolic
; strides. This also checks that the symbolic stride information is correctly
; propagated to the memcheck generation. Without this the loop wouldn't
; vectorize because we couldn't determine the array bounds for the required
; memchecks.
define void @test(ptr noalias %A, i64 %AStride, ptr noalias %B, i32 %BStride, ptr noalias %C, i64 %CStride) {
; CHECK-LABEL: define void @test(
; CHECK-SAME: ptr noalias [[A:%.*]], i64 [[ASTRIDE:%.*]], ptr noalias [[B:%.*]], i32 [[BSTRIDE:%.*]], ptr noalias [[C:%.*]], i64 [[CSTRIDE:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
; CHECK: [[VECTOR_SCEVCHECK]]:
; CHECK-NEXT: [[IDENT_CHECK1:%.*]] = icmp ne i32 [[BSTRIDE]], 1
; CHECK-NEXT: [[IDENT_CHECK2:%.*]] = icmp ne i64 [[CSTRIDE]], 1
; CHECK-NEXT: [[DIFF_CHECK6:%.*]] = icmp ne i64 [[ASTRIDE]], 1
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = or i1 [[IDENT_CHECK1]], [[IDENT_CHECK2]]
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK6]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INDEX]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP5]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = mul nsw <2 x i32> [[WIDE_LOAD7]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: store <2 x i32> [[TMP8]], ptr [[TMP9]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br [[EXIT:label %.*]]
; CHECK: [[SCALAR_PH]]:
;
entry:
br label %loop
loop:
%iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
%iv.trunc = trunc i64 %iv to i32
%mul = mul i32 %iv.trunc, %BStride
%mul64 = zext i32 %mul to i64
%gep.x = getelementptr inbounds i32, ptr %B, i64 %mul64
%0 = load i32, ptr %gep.x, align 4
%mul2 = mul nsw i64 %iv, %CStride
%gep.c = getelementptr inbounds i32, ptr %C, i64 %mul2
%1 = load i32, ptr %gep.c, align 4
%mul4 = mul nsw i32 %1, %0
%mul3 = mul nsw i64 %iv, %AStride
%gep.a = getelementptr inbounds i32, ptr %A, i64 %mul3
store i32 %mul4, ptr %gep.a, align 4
%iv.next = add nuw nsw i64 %iv, 1
%ec = icmp eq i64 %iv.next, 100
br i1 %ec, label %exit, label %loop
exit:
ret void
}
; We used to crash on this function because we removed the fptosi cast when
; replacing the symbolic stride '%conv'.
; PR18480
define void @fn1(ptr noalias %x, ptr noalias %c, double %a) {
; CHECK-LABEL: define void @fn1(
; CHECK-SAME: ptr noalias [[X:%.*]], ptr noalias [[C:%.*]], double [[A:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CONV:%.*]] = fptosi double [[A]] to i32
; CHECK-NEXT: [[CONV2:%.*]] = add i32 [[CONV]], 4
; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV2]], 0
; CHECK-NEXT: br i1 [[CMP8]], label %[[LOOP_PREHEADER:.*]], [[EXIT:label %.*]]
; CHECK: [[LOOP_PREHEADER]]:
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[CONV2]] to i64
; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
; CHECK: [[VECTOR_SCEVCHECK]]:
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[CONV]], 1
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDEX]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP2]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP3]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[C]], i64 [[INDEX]]
; CHECK-NEXT: store <2 x double> [[WIDE_LOAD]], ptr [[TMP4]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
;
entry:
%conv = fptosi double %a to i32
%conv2 = add i32 %conv, 4
%cmp8 = icmp sgt i32 %conv2, 0
br i1 %cmp8, label %loop, label %exit
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%0 = trunc i64 %iv to i32
%mul = mul nsw i32 %0, %conv
%mul.ext = sext i32 %mul to i64
%gep.x = getelementptr inbounds double, ptr %x, i64 %mul.ext
%1 = load double, ptr %gep.x, align 8
%gep.c = getelementptr inbounds double, ptr %c, i64 %iv
store double %1, ptr %gep.c, align 8
%iv.next = add nuw nsw i64 %iv, 1
%iv.trunc = trunc i64 %iv.next to i32
%ec = icmp eq i32 %iv.trunc, %conv2
br i1 %ec, label %exit, label %loop
exit:
ret void
}
; Make sure we do not crash when the stride is poison.
; Test for https://github.com/llvm/llvm-project/issues/162922.
define void @stride_poison(ptr %dst) mustprogress {
; CHECK-LABEL: define void @stride_poison(
; CHECK-SAME: ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 poison, i64 1)
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 99, [[UMAX]]
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[N_VEC]], poison
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], poison
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], poison
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], poison
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]]
; CHECK-NEXT: store i8 0, ptr [[TMP5]], align 1
; CHECK-NEXT: store i8 0, ptr [[TMP6]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.dst = getelementptr i8, ptr %dst, i64 %iv
store i8 0, ptr %gep.dst, align 1
%iv.next = add nuw nsw i64 %iv, poison
%ec = icmp samesign ult i64 %iv, 100
br i1 %ec, label %loop, label %exit
exit:
ret void
}