blob: f5b4577cbfe0e1795976707b6214054a24864b50 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-target-supports-masked-memory-ops \
; RUN: -tail-folding-policy=must-fold-tail \
; RUN: -force-tail-folding-style=data-without-lane-mask -S %s | FileCheck %s
define void @tc_17_without_runtime_check(ptr noalias %A, ptr noalias %B) {
; CHECK-LABEL: define void @tc_17_without_runtime_check(
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 16)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i32 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VEC_IND]], ptr align 4 [[TMP1]], <4 x i1> [[TMP0]])
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 17)
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i32 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP2]], ptr align 4 [[TMP3]], <4 x i1> [[TMP0]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%gep.A = getelementptr i32, ptr %A, i32 %iv
store i32 %iv, ptr %gep.A
%v = add i32 %iv, 17
%q = getelementptr i32, ptr %B, i32 %iv
store i32 %v, ptr %q
%iv.next = add i32 %iv, 1
%ec = icmp eq i32 %iv.next, 17
br i1 %ec, label %exit, label %loop
exit:
ret void
}
define void @tc_17_with_runtime_check(ptr %A, ptr %B) {
; CHECK-LABEL: define void @tc_17_with_runtime_check(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[A2:%.*]] = ptrtoaddr ptr [[A]] to i64
; CHECK-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64
; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[B1]], [[A2]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 16)
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A]], i32 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VEC_IND]], ptr align 4 [[TMP2]], <4 x i1> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 17)
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[B]], i32 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP3]], ptr align 4 [[TMP4]], <4 x i1> [[TMP1]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i32 [[IV]]
; CHECK-NEXT: store i32 [[IV]], ptr [[GEP_A]], align 4
; CHECK-NEXT: [[V:%.*]] = add i32 [[IV]], 17
; CHECK-NEXT: [[Q:%.*]] = getelementptr i32, ptr [[B]], i32 [[IV]]
; CHECK-NEXT: store i32 [[V]], ptr [[Q]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 17
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%gep.A = getelementptr i32, ptr %A, i32 %iv
store i32 %iv, ptr %gep.A
%v = add i32 %iv, 17
%q = getelementptr i32, ptr %B, i32 %iv
store i32 %v, ptr %q
%iv.next = add i32 %iv, 1
%ec = icmp eq i32 %iv.next, 17
br i1 %ec, label %exit, label %loop
exit:
ret void
}
define void @tc_20_without_runtime_checks(ptr noalias %A, ptr noalias %B) {
; CHECK-LABEL: define void @tc_20_without_runtime_checks(
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i32 [[INDEX]]
; CHECK-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP0]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 17)
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[B]], i32 [[INDEX]]
; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%gep.A = getelementptr i32, ptr %A, i32 %iv
store i32 %iv, ptr %gep.A
%v = add i32 %iv, 17
%q = getelementptr i32, ptr %B, i32 %iv
store i32 %v, ptr %q
%iv.next = add i32 %iv, 1
%ec = icmp eq i32 %iv.next, 20
br i1 %ec, label %exit, label %loop
exit:
ret void
}
define void @tc_20_with_runtime_checks(ptr %A, ptr %B) {
; CHECK-LABEL: define void @tc_20_with_runtime_checks(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[A2:%.*]] = ptrtoaddr ptr [[A]] to i64
; CHECK-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64
; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[B1]], [[A2]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i32 [[INDEX]]
; CHECK-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 17)
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i32 [[INDEX]]
; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i32 [[IV]]
; CHECK-NEXT: store i32 [[IV]], ptr [[GEP_A]], align 4
; CHECK-NEXT: [[V:%.*]] = add i32 [[IV]], 17
; CHECK-NEXT: [[Q:%.*]] = getelementptr i32, ptr [[B]], i32 [[IV]]
; CHECK-NEXT: store i32 [[V]], ptr [[Q]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 20
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%gep.A = getelementptr i32, ptr %A, i32 %iv
store i32 %iv, ptr %gep.A
%v = add i32 %iv, 17
%q = getelementptr i32, ptr %B, i32 %iv
store i32 %v, ptr %q
%iv.next = add i32 %iv, 1
%ec = icmp eq i32 %iv.next, 20
br i1 %ec, label %exit, label %loop
exit:
ret void
}
define void @tc_23_without_runtime_checks(ptr noalias %A, ptr noalias %B) {
; CHECK-LABEL: define void @tc_23_without_runtime_checks(
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 22)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i32 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VEC_IND]], ptr align 4 [[TMP1]], <4 x i1> [[TMP0]])
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 17)
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i32 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP2]], ptr align 4 [[TMP3]], <4 x i1> [[TMP0]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 24
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%gep.A = getelementptr i32, ptr %A, i32 %iv
store i32 %iv, ptr %gep.A
%v = add i32 %iv, 17
%q = getelementptr i32, ptr %B, i32 %iv
store i32 %v, ptr %q
%iv.next = add i32 %iv, 1
%ec = icmp eq i32 %iv.next, 23
br i1 %ec, label %exit, label %loop
exit:
ret void
}
define void @tc_23_with_runtime_checks(ptr %A, ptr %B) {
; CHECK-LABEL: define void @tc_23_with_runtime_checks(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[A2:%.*]] = ptrtoaddr ptr [[A]] to i64
; CHECK-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64
; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[B1]], [[A2]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 22)
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A]], i32 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VEC_IND]], ptr align 4 [[TMP2]], <4 x i1> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 17)
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[B]], i32 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP3]], ptr align 4 [[TMP4]], <4 x i1> [[TMP1]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 24
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i32 [[IV]]
; CHECK-NEXT: store i32 [[IV]], ptr [[GEP_A]], align 4
; CHECK-NEXT: [[V:%.*]] = add i32 [[IV]], 17
; CHECK-NEXT: [[Q:%.*]] = getelementptr i32, ptr [[B]], i32 [[IV]]
; CHECK-NEXT: store i32 [[V]], ptr [[Q]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 23
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%gep.A = getelementptr i32, ptr %A, i32 %iv
store i32 %iv, ptr %gep.A
%v = add i32 %iv, 17
%q = getelementptr i32, ptr %B, i32 %iv
store i32 %v, ptr %q
%iv.next = add i32 %iv, 1
%ec = icmp eq i32 %iv.next, 23
br i1 %ec, label %exit, label %loop
exit:
ret void
}