| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S %s | FileCheck %s |
| |
| target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" |
| |
| define void @test(i32 %tc, ptr %p) { |
| ; CHECK-LABEL: @test( |
| ; CHECK-NEXT: br label [[FOR_BODY_LR_PH_I_I_I:%.*]] |
| ; CHECK: for.body.lr.ph.i.i.i: |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TC:%.*]], -1 |
| ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 |
| ; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 |
| ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4 |
| ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] |
| ; CHECK: vector.ph: |
| ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4 |
| ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] |
| ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] |
| ; CHECK: vector.body: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] |
| ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
| ; CHECK: middle.block: |
| ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] |
| ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_I_I_I:%.*]], label [[SCALAR_PH]] |
| ; CHECK: scalar.ph: |
| ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH_I_I_I]] ] |
| ; CHECK-NEXT: br label [[FOR_BODY_I_I_I:%.*]] |
| ; CHECK: for.body.i.i.i: |
| ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC_I_I_I:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] |
| ; CHECK-NEXT: br label [[FOR_INC_I_I_I]] |
| ; CHECK: for.inc.i.i.i: |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 |
| ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 |
| ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[TC]] |
| ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY_I_I_I]], label [[FOR_END_I_I_I]], !llvm.loop [[LOOP3:![0-9]+]] |
| ; CHECK: for.end.i.i.i: |
| ; CHECK-NEXT: [[LCSSA:%.*]] = phi ptr [ [[P:%.*]], [[FOR_INC_I_I_I]] ], [ [[P]], [[MIDDLE_BLOCK]] ] |
| ; CHECK-NEXT: unreachable |
| ; |
| br label %for.body.lr.ph.i.i.i |
| |
| for.body.lr.ph.i.i.i: |
| br label %for.body.i.i.i |
| |
| for.body.i.i.i: |
| %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc.i.i.i ], [ 0, %for.body.lr.ph.i.i.i ] |
| br label %for.inc.i.i.i |
| |
| for.inc.i.i.i: |
| %indvars.iv.next = add i64 %indvars.iv, 1 |
| %lftr.wideiv = trunc i64 %indvars.iv.next to i32 |
| %exitcond = icmp ne i32 %lftr.wideiv, %tc |
| br i1 %exitcond, label %for.body.i.i.i, label %for.end.i.i.i |
| |
| for.end.i.i.i: |
| %lcssa = phi ptr [ %p, %for.inc.i.i.i ] |
| unreachable |
| } |
| |
| ; PR16139 |
| define void @test2(ptr %x) { |
| ; CHECK-LABEL: @test2( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: indirectbr ptr [[X:%.*]], [label [[L0:%.*]], label [[L1:%.*]]] |
| ; CHECK: L0: |
| ; CHECK-NEXT: br label [[L0]] |
| ; CHECK: L1: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| indirectbr ptr %x, [ label %L0, label %L1 ] |
| |
| L0: |
| br label %L0 |
| |
| L1: |
| ret void |
| } |
| |
| ; This loop has different uniform instructions before and after LCSSA. |
| define void @test3(ptr %p) { |
| ; CHECK-LABEL: @test3( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[ADD41:%.*]] = add i32 3, 3 |
| ; CHECK-NEXT: [[IDXPROM4736:%.*]] = zext i32 [[ADD41]] to i64 |
| ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] |
| ; CHECK: vector.ph: |
| ; CHECK-NEXT: br label [[VECTOR_BODY1:%.*]] |
| ; CHECK: vector.body: |
| ; CHECK-NEXT: [[TMP0:%.*]] = add i32 6, 1 |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 7, 1 |
| ; CHECK-NEXT: [[TMP2:%.*]] = add i32 8, 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = add i32 9, 1 |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0 |
| ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP1]], i32 1 |
| ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP2]], i32 2 |
| ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP3]], i32 3 |
| ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] |
| ; CHECK: pred.store.if: |
| ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P:%.*]], i64 0, i64 6 |
| ; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX48]], align 1 |
| ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] |
| ; CHECK: pred.store.continue: |
| ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]] |
| ; CHECK: pred.store.if1: |
| ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P]], i64 0, i64 7 |
| ; CHECK-NEXT: store i8 0, ptr [[TMP15]], align 1 |
| ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE3]] |
| ; CHECK: pred.store.continue2: |
| ; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] |
| ; CHECK: pred.store.if3: |
| ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P]], i64 0, i64 8 |
| ; CHECK-NEXT: store i8 0, ptr [[TMP17]], align 1 |
| ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE5]] |
| ; CHECK: pred.store.continue4: |
| ; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] |
| ; CHECK: pred.store.if5: |
| ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P]], i64 0, i64 9 |
| ; CHECK-NEXT: store i8 0, ptr [[TMP19]], align 1 |
| ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE7]] |
| ; CHECK: pred.store.continue6: |
| ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] |
| ; CHECK: middle.block: |
| ; CHECK-NEXT: [[FIRST_INACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> <i1 false, i1 false, i1 true, i1 true>, i1 false) |
| ; CHECK-NEXT: [[LAST_ACTIVE_LANE:%.*]] = sub i64 [[FIRST_INACTIVE_LANE]], 1 |
| ; CHECK-NEXT: [[INC46_LCSSA:%.*]] = extractelement <4 x i32> [[TMP11]], i64 [[LAST_ACTIVE_LANE]] |
| ; CHECK-NEXT: br label [[WHILE_END:%.*]] |
| ; CHECK: while.end: |
| ; CHECK-NEXT: [[ADD58:%.*]] = add i32 [[INC46_LCSSA]], 4 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %add41 = add i32 3, 3 |
| %idxprom4736 = zext i32 %add41 to i64 |
| br label %while.body |
| |
| while.body: |
| %idxprom4738 = phi i64 [ %idxprom47, %while.body ], [ %idxprom4736, %entry ] |
| %pos.337 = phi i32 [ %inc46, %while.body ], [ %add41, %entry ] |
| %inc46 = add i32 %pos.337, 1 |
| %arrayidx48 = getelementptr inbounds [1024 x i8], ptr %p, i64 0, i64 %idxprom4738 |
| store i8 0, ptr %arrayidx48, align 1 |
| %and43 = and i32 %inc46, 3 |
| %cmp44 = icmp eq i32 %and43, 0 |
| %idxprom47 = zext i32 %inc46 to i64 |
| br i1 %cmp44, label %while.end, label %while.body |
| |
| while.end: |
| %add58 = add i32 %inc46, 4 |
| ret void |
| } |
| |
| ; Make sure LV doesn't crash on IR where some LCSSA uses are unreachable. |
| define i32 @pr57508(ptr %src) { |
| ; CHECK-LABEL: @pr57508( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] |
| ; CHECK: vector.ph: |
| ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] |
| ; CHECK: vector.body: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2000 |
| ; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] |
| ; CHECK: middle.block: |
| ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] |
| ; CHECK: scalar.ph: |
| ; CHECK-NEXT: br label [[LOOP:%.*]] |
| ; CHECK: loop: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 2000, [[SCALAR_PH]] ] |
| ; CHECK-NEXT: [[LOCAL:%.*]] = phi i32 [ [[LOCAL_NEXT:%.*]], [[LOOP]] ], [ 2000, [[SCALAR_PH]] ] |
| ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 |
| ; CHECK-NEXT: [[LOCAL_NEXT]] = add i32 [[LOCAL]], 1 |
| ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 2000 |
| ; CHECK-NEXT: br i1 [[EC]], label [[LOOP_EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] |
| ; CHECK: loop.exit: |
| ; CHECK-NEXT: unreachable |
| ; CHECK: bb: |
| ; CHECK-NEXT: [[LOCAL_USE:%.*]] = add i32 poison, 1 |
| ; CHECK-NEXT: ret i32 [[LOCAL_USE]] |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] |
| %local = phi i32 [ %local.next, %loop ], [ 0, %entry ] |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %local.next = add i32 %local, 1 |
| %ec = icmp eq i64 %iv, 2000 |
| br i1 %ec, label %loop.exit, label %loop |
| |
| loop.exit: |
| unreachable |
| |
| bb: |
| %local.use = add i32 %local, 1 |
| ret i32 %local.use |
| } |
| |
| ; Test that exit phi extracts are inserted after the definition of the value |
| ; being extracted. This used to crash due to dominance violation when the sunk |
| ; select was generated after the extractelement for the exit phi. |
| define i32 @exit_phi_sunk_def(ptr noalias %src, ptr noalias %dst) { |
| ; CHECK-LABEL: @exit_phi_sunk_def( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] |
| ; CHECK: vector.ph: |
| ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] |
| ; CHECK: vector.body: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC:%.*]], align 4 |
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 |
| ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] |
| ; CHECK: pred.store.if: |
| ; CHECK-NEXT: store i32 1, ptr [[DST:%.*]], align 4 |
| ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] |
| ; CHECK: pred.store.continue: |
| ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] |
| ; CHECK: pred.store.if1: |
| ; CHECK-NEXT: store i32 2, ptr [[DST]], align 4 |
| ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] |
| ; CHECK: pred.store.continue2: |
| ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] |
| ; CHECK: pred.store.if3: |
| ; CHECK-NEXT: store i32 3, ptr [[DST]], align 4 |
| ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] |
| ; CHECK: pred.store.continue4: |
| ; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] |
| ; CHECK: pred.store.if5: |
| ; CHECK-NEXT: store i32 4, ptr [[DST]], align 4 |
| ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] |
| ; CHECK: pred.store.continue6: |
| ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] |
| ; CHECK: middle.block: |
| ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[TMP1]], <4 x i32> zeroinitializer, <4 x i32> splat (i32 2) |
| ; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x i32> [[SEL]], i32 0 |
| ; CHECK-NEXT: br label [[EXIT:%.*]] |
| ; CHECK: exit: |
| ; CHECK-NEXT: ret i32 [[EXT]] |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] |
| %ld = load i32, ptr %src, align 4 |
| %cmp = icmp eq i32 %ld, 0 |
| %sel = select i1 %cmp, i32 0, i32 2 |
| %iv.next = add nuw nsw i32 %iv, 1 |
| store i32 %iv.next, ptr %dst, align 4 |
| %exit.cond = icmp ult i32 %iv, 2 |
| br i1 %exit.cond, label %loop, label %exit |
| |
| exit: |
| ret i32 %sel |
| } |