blob: 1e63271cf26d51f09abb6537526e200cf0a1558d [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
define void @test(i32 %tc, ptr %p) {
; CHECK-LABEL: @test(
; CHECK-NEXT: br label [[FOR_BODY_LR_PH_I_I_I:%.*]]
; CHECK: for.body.lr.ph.i.i.i:
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TC:%.*]], -1
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_I_I_I:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH_I_I_I]] ]
; CHECK-NEXT: br label [[FOR_BODY_I_I_I:%.*]]
; CHECK: for.body.i.i.i:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC_I_I_I:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: br label [[FOR_INC_I_I_I]]
; CHECK: for.inc.i.i.i:
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[TC]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY_I_I_I]], label [[FOR_END_I_I_I]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: for.end.i.i.i:
; CHECK-NEXT: [[LCSSA:%.*]] = phi ptr [ [[P:%.*]], [[FOR_INC_I_I_I]] ], [ [[P]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: unreachable
;
br label %for.body.lr.ph.i.i.i
for.body.lr.ph.i.i.i:
br label %for.body.i.i.i
for.body.i.i.i:
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc.i.i.i ], [ 0, %for.body.lr.ph.i.i.i ]
br label %for.inc.i.i.i
for.inc.i.i.i:
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp ne i32 %lftr.wideiv, %tc
br i1 %exitcond, label %for.body.i.i.i, label %for.end.i.i.i
for.end.i.i.i:
%lcssa = phi ptr [ %p, %for.inc.i.i.i ]
unreachable
}
; PR16139
define void @test2(ptr %x) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: indirectbr ptr [[X:%.*]], [label [[L0:%.*]], label [[L1:%.*]]]
; CHECK: L0:
; CHECK-NEXT: br label [[L0]]
; CHECK: L1:
; CHECK-NEXT: ret void
;
entry:
indirectbr ptr %x, [ label %L0, label %L1 ]
L0:
br label %L0
L1:
ret void
}
; This loop has different uniform instructions before and after LCSSA.
define void @test3(ptr %p) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD41:%.*]] = add i32 3, 3
; CHECK-NEXT: [[IDXPROM4736:%.*]] = zext i32 [[ADD41]] to i64
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY1:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[TMP0:%.*]] = add i32 6, 1
; CHECK-NEXT: [[TMP1:%.*]] = add i32 7, 1
; CHECK-NEXT: [[TMP2:%.*]] = add i32 8, 1
; CHECK-NEXT: [[TMP3:%.*]] = add i32 9, 1
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP1]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP2]], i32 2
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP3]], i32 3
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P:%.*]], i64 0, i64 6
; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX48]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]]
; CHECK: pred.store.if1:
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P]], i64 0, i64 7
; CHECK-NEXT: store i8 0, ptr [[TMP15]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE3]]
; CHECK: pred.store.continue2:
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
; CHECK: pred.store.if3:
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P]], i64 0, i64 8
; CHECK-NEXT: store i8 0, ptr [[TMP17]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE5]]
; CHECK: pred.store.continue4:
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]]
; CHECK: pred.store.if5:
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P]], i64 0, i64 9
; CHECK-NEXT: store i8 0, ptr [[TMP19]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE7]]
; CHECK: pred.store.continue6:
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
; CHECK-NEXT: [[FIRST_INACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> <i1 false, i1 false, i1 true, i1 true>, i1 false)
; CHECK-NEXT: [[LAST_ACTIVE_LANE:%.*]] = sub i64 [[FIRST_INACTIVE_LANE]], 1
; CHECK-NEXT: [[INC46_LCSSA:%.*]] = extractelement <4 x i32> [[TMP11]], i64 [[LAST_ACTIVE_LANE]]
; CHECK-NEXT: br label [[WHILE_END:%.*]]
; CHECK: while.end:
; CHECK-NEXT: [[ADD58:%.*]] = add i32 [[INC46_LCSSA]], 4
; CHECK-NEXT: ret void
;
entry:
%add41 = add i32 3, 3
%idxprom4736 = zext i32 %add41 to i64
br label %while.body
while.body:
%idxprom4738 = phi i64 [ %idxprom47, %while.body ], [ %idxprom4736, %entry ]
%pos.337 = phi i32 [ %inc46, %while.body ], [ %add41, %entry ]
%inc46 = add i32 %pos.337, 1
%arrayidx48 = getelementptr inbounds [1024 x i8], ptr %p, i64 0, i64 %idxprom4738
store i8 0, ptr %arrayidx48, align 1
%and43 = and i32 %inc46, 3
%cmp44 = icmp eq i32 %and43, 0
%idxprom47 = zext i32 %inc46 to i64
br i1 %cmp44, label %while.end, label %while.body
while.end:
%add58 = add i32 %inc46, 4
ret void
}
; Make sure LV doesn't crash on IR where some LCSSA uses are unreachable.
define i32 @pr57508(ptr %src) {
; CHECK-LABEL: @pr57508(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2000
; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 2000, [[SCALAR_PH]] ]
; CHECK-NEXT: [[LOCAL:%.*]] = phi i32 [ [[LOCAL_NEXT:%.*]], [[LOOP]] ], [ 2000, [[SCALAR_PH]] ]
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[LOCAL_NEXT]] = add i32 [[LOCAL]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 2000
; CHECK-NEXT: br i1 [[EC]], label [[LOOP_EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: loop.exit:
; CHECK-NEXT: unreachable
; CHECK: bb:
; CHECK-NEXT: [[LOCAL_USE:%.*]] = add i32 poison, 1
; CHECK-NEXT: ret i32 [[LOCAL_USE]]
;
entry:
br label %loop
loop:
%iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
%local = phi i32 [ %local.next, %loop ], [ 0, %entry ]
%iv.next = add nuw nsw i64 %iv, 1
%local.next = add i32 %local, 1
%ec = icmp eq i64 %iv, 2000
br i1 %ec, label %loop.exit, label %loop
loop.exit:
unreachable
bb:
%local.use = add i32 %local, 1
ret i32 %local.use
}
; Test that exit phi extracts are inserted after the definition of the value
; being extracted. This used to crash due to dominance violation when the sunk
; select was generated after the extractelement for the exit phi.
define i32 @exit_phi_sunk_def(ptr noalias %src, ptr noalias %dst) {
; CHECK-LABEL: @exit_phi_sunk_def(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
; CHECK-NEXT: store i32 1, ptr [[DST:%.*]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
; CHECK: pred.store.if1:
; CHECK-NEXT: store i32 2, ptr [[DST]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
; CHECK: pred.store.continue2:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
; CHECK: pred.store.if3:
; CHECK-NEXT: store i32 3, ptr [[DST]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
; CHECK: pred.store.continue4:
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
; CHECK: pred.store.if5:
; CHECK-NEXT: store i32 4, ptr [[DST]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue6:
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[TMP1]], <4 x i32> zeroinitializer, <4 x i32> splat (i32 2)
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x i32> [[SEL]], i32 0
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret i32 [[EXT]]
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%ld = load i32, ptr %src, align 4
%cmp = icmp eq i32 %ld, 0
%sel = select i1 %cmp, i32 0, i32 2
%iv.next = add nuw nsw i32 %iv, 1
store i32 %iv.next, ptr %dst, align 4
%exit.cond = icmp ult i32 %iv, 2
br i1 %exit.cond, label %loop, label %exit
exit:
ret i32 %sel
}