blob: 60f39fe3d38649f357c494f934d88b4e1fc9af64 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6
; This test verifies that the loop vectorizer will not vectorizes low trip count
; loops that require runtime checks (Trip count is computed with profile info).
; REQUIRES: asserts
; RUN: opt < %s -passes=loop-vectorize -loop-vectorize-with-block-frequency -S | FileCheck %s
target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128"
@tab = common global [32 x i8] zeroinitializer, align 1
;.
; CHECK: @tab = common global [32 x i8] zeroinitializer, align 1
;.
define i32 @foo_low_trip_count1(i32 %bound) {
; Simple loop with low tripcount. Should not be vectorized.
; CHECK-LABEL: define i32 @foo_low_trip_count1(
; CHECK-SAME: i32 [[BOUND:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0
; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], [[BOUND]]
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !prof [[PROF0:![0-9]+]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: ret i32 0
;
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
%0 = load i8, ptr %arrayidx, align 1
%cmp1 = icmp eq i8 %0, 0
%. = select i1 %cmp1, i8 2, i8 1
store i8 %., ptr %arrayidx, align 1
%inc = add nsw i32 %i.08, 1
%exitcond = icmp eq i32 %i.08, %bound
br i1 %exitcond, label %for.end, label %for.body, !prof !1
for.end: ; preds = %for.body
ret i32 0
}
define i32 @foo_low_trip_count2(i32 %bound) !prof !0 {
; The loop has a same invocation count with the function, but has a low
; trip_count per invocation and not worth to vectorize.
; CHECK-LABEL: define i32 @foo_low_trip_count2(
; CHECK-SAME: i32 [[BOUND:%.*]]) !prof [[PROF1:![0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0
; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], [[BOUND]]
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !prof [[PROF0]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: ret i32 0
;
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
%0 = load i8, ptr %arrayidx, align 1
%cmp1 = icmp eq i8 %0, 0
%. = select i1 %cmp1, i8 2, i8 1
store i8 %., ptr %arrayidx, align 1
%inc = add nsw i32 %i.08, 1
%exitcond = icmp eq i32 %i.08, %bound
br i1 %exitcond, label %for.end, label %for.body, !prof !1
for.end: ; preds = %for.body
ret i32 0
}
define i32 @foo_low_trip_count3(i1 %cond, i32 %bound) !prof !0 {
; The loop has low invocation count compare to the function invocation count,
; but has a high trip count per invocation. Vectorize it.
; The original loop has latchExitWeight=10 and backedgeTakenWeight=10,000,
; therefore estimatedBackedgeTakenCount=1,000 and estimatedTripCount=1,001.
; Vectorizing by 4 produces estimatedTripCounts of 1,001/4=250 and 1,001%4=1
; for vectorized and remainder loops, respectively, therefore their
; estimatedBackedgeTakenCounts are 249 and 0, and so the weights recorded with
; loop invocation weights of 10 are the above {10, 2490} and {10, 0}. This
; explains the values for PROF4 and PROF10
; CHECK-LABEL: define i32 @foo_low_trip_count3(
; CHECK-SAME: i1 [[COND:%.*]], i32 [[BOUND:%.*]]) !prof [[PROF1]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br i1 [[COND]], label %[[FOR_PREHEADER:.*]], label %[[FOR_END:.*]], !prof [[PROF2:![0-9]+]]
; CHECK: [[FOR_PREHEADER]]:
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BOUND]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF3:![0-9]+]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i8> splat (i8 2), <4 x i8> splat (i8 1)
; CHECK-NEXT: store <4 x i8> [[TMP3]], ptr [[TMP1]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF4:![0-9]+]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]], !prof [[PROF9:![0-9]+]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_PREHEADER]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP5]], 0
; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], [[BOUND]]
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: [[FOR_END_LOOPEXIT]]:
; CHECK-NEXT: br label %[[FOR_END]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: ret i32 0
;
entry:
br i1 %cond, label %for.preheader, label %for.end, !prof !2
for.preheader:
br label %for.body
for.body: ; preds = %for.body, %entry
%i.08 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
%0 = load i8, ptr %arrayidx, align 1
%cmp1 = icmp eq i8 %0, 0
%. = select i1 %cmp1, i8 2, i8 1
store i8 %., ptr %arrayidx, align 1
%inc = add nsw i32 %i.08, 1
%exitcond = icmp eq i32 %i.08, %bound
br i1 %exitcond, label %for.end, label %for.body, !prof !3
for.end: ; preds = %for.body
ret i32 0
}
define i32 @foo_low_trip_count_icmp_sgt(i32 %bound) {
; Simple loop with low tripcount and inequality test for exit.
; Should not be vectorized.
; CHECK-LABEL: define i32 @foo_low_trip_count_icmp_sgt(
; CHECK-SAME: i32 [[BOUND:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0
; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i32 [[I_08]], [[BOUND]]
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !prof [[PROF0]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: ret i32 0
;
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
%0 = load i8, ptr %arrayidx, align 1
%cmp1 = icmp eq i8 %0, 0
%. = select i1 %cmp1, i8 2, i8 1
store i8 %., ptr %arrayidx, align 1
%inc = add nsw i32 %i.08, 1
%exitcond = icmp sgt i32 %i.08, %bound
br i1 %exitcond, label %for.end, label %for.body, !prof !1
for.end: ; preds = %for.body
ret i32 0
}
define i32 @const_low_trip_count() {
; Simple loop with constant, small trip count and no profiling info.
; CHECK-LABEL: define i32 @const_low_trip_count() {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0
; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I_08]], 2
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: ret i32 0
;
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
%0 = load i8, ptr %arrayidx, align 1
%cmp1 = icmp eq i8 %0, 0
%. = select i1 %cmp1, i8 2, i8 1
store i8 %., ptr %arrayidx, align 1
%inc = add nsw i32 %i.08, 1
%exitcond = icmp slt i32 %i.08, 2
br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret i32 0
}
define i32 @const_large_trip_count() {
; Simple loop with constant large trip count and no profiling info.
; CHECK-LABEL: define i32 @const_large_trip_count() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> splat (i8 2), <4 x i8> splat (i8 1)
; CHECK-NEXT: store <4 x i8> [[TMP2]], ptr [[TMP0]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 1000, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP4]], 0
; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I_08]], 1000
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: ret i32 0
;
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
%0 = load i8, ptr %arrayidx, align 1
%cmp1 = icmp eq i8 %0, 0
%. = select i1 %cmp1, i8 2, i8 1
store i8 %., ptr %arrayidx, align 1
%inc = add nsw i32 %i.08, 1
%exitcond = icmp slt i32 %i.08, 1000
br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret i32 0
}
define i32 @const_small_trip_count_step() {
; Simple loop with static, small trip count and no profiling info.
; CHECK-LABEL: define i32 @const_small_trip_count_step() {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0
; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 5
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I_08]], 10
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: ret i32 0
;
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
%0 = load i8, ptr %arrayidx, align 1
%cmp1 = icmp eq i8 %0, 0
%. = select i1 %cmp1, i8 2, i8 1
store i8 %., ptr %arrayidx, align 1
%inc = add nsw i32 %i.08, 5
%exitcond = icmp slt i32 %i.08, 10
br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret i32 0
}
define i32 @const_trip_over_profile() !prof !0 {
; constant trip count takes precedence over profile data
; CHECK-LABEL: define i32 @const_trip_over_profile(
; CHECK-SAME: ) !prof [[PROF1]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> splat (i8 2), <4 x i8> splat (i8 1)
; CHECK-NEXT: store <4 x i8> [[TMP2]], ptr [[TMP0]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF15:![0-9]+]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 1000, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP4]], 0
; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I_08]], 1000
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]], !prof [[PROF17:![0-9]+]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: ret i32 0
;
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
%0 = load i8, ptr %arrayidx, align 1
%cmp1 = icmp eq i8 %0, 0
%. = select i1 %cmp1, i8 2, i8 1
store i8 %., ptr %arrayidx, align 1
%inc = add nsw i32 %i.08, 1
%exitcond = icmp slt i32 %i.08, 1000
br i1 %exitcond, label %for.body, label %for.end, !prof !1
for.end: ; preds = %for.body
ret i32 0
}
!0 = !{!"function_entry_count", i64 100}
!1 = !{!"branch_weights", i32 100, i32 0}
!2 = !{!"branch_weights", i32 10, i32 90}
!3 = !{!"branch_weights", i32 10, i32 10000}
;.
; CHECK: [[PROF0]] = !{!"branch_weights", i32 100, i32 0}
; CHECK: [[PROF1]] = !{!"function_entry_count", i64 100}
; CHECK: [[PROF2]] = !{!"branch_weights", i32 10, i32 90}
; CHECK: [[PROF3]] = !{!"branch_weights", i32 1, i32 127}
; CHECK: [[PROF4]] = !{!"branch_weights", i32 10, i32 2490}
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META6:![0-9]+]], [[META7:![0-9]+]], [[META8:![0-9]+]]}
; CHECK: [[META6]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META7]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[META8]] = !{!"llvm.loop.estimated_trip_count", i32 250}
; CHECK: [[PROF9]] = !{!"branch_weights", i32 1, i32 3}
; CHECK: [[PROF10]] = !{!"branch_weights", i32 10, i32 0}
; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META7]], [[META6]], [[META12:![0-9]+]]}
; CHECK: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 1}
; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META6]], [[META7]]}
; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META7]], [[META6]]}
; CHECK: [[PROF15]] = !{!"branch_weights", i32 1, i32 249}
; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META6]], [[META7]], [[META8]]}
; CHECK: [[PROF17]] = !{!"branch_weights", i32 0, i32 1}
; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META7]], [[META6]], [[META12]]}
;.