| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6 |
| ; This test verifies that the loop vectorizer will not vectorizes low trip count |
| ; loops that require runtime checks (Trip count is computed with profile info). |
| ; REQUIRES: asserts |
| ; RUN: opt < %s -passes=loop-vectorize -loop-vectorize-with-block-frequency -S | FileCheck %s |
| |
| target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128" |
| |
| @tab = common global [32 x i8] zeroinitializer, align 1 |
| |
| ;. |
| ; CHECK: @tab = common global [32 x i8] zeroinitializer, align 1 |
| ;. |
| define i32 @foo_low_trip_count1(i32 %bound) { |
| ; Simple loop with low tripcount. Should not be vectorized. |
| ; CHECK-LABEL: define i32 @foo_low_trip_count1( |
| ; CHECK-SAME: i32 [[BOUND:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[FOR_BODY:.*]] |
| ; CHECK: [[FOR_BODY]]: |
| ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 |
| ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0 |
| ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 |
| ; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 |
| ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 |
| ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], [[BOUND]] |
| ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !prof [[PROF0:![0-9]+]] |
| ; CHECK: [[FOR_END]]: |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| |
| entry: |
| br label %for.body |
| |
| for.body: ; preds = %for.body, %entry |
| %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08 |
| %0 = load i8, ptr %arrayidx, align 1 |
| %cmp1 = icmp eq i8 %0, 0 |
| %. = select i1 %cmp1, i8 2, i8 1 |
| store i8 %., ptr %arrayidx, align 1 |
| %inc = add nsw i32 %i.08, 1 |
| %exitcond = icmp eq i32 %i.08, %bound |
| br i1 %exitcond, label %for.end, label %for.body, !prof !1 |
| |
| for.end: ; preds = %for.body |
| ret i32 0 |
| } |
| |
| define i32 @foo_low_trip_count2(i32 %bound) !prof !0 { |
| ; The loop has a same invocation count with the function, but has a low |
| ; trip_count per invocation and not worth to vectorize. |
| ; CHECK-LABEL: define i32 @foo_low_trip_count2( |
| ; CHECK-SAME: i32 [[BOUND:%.*]]) !prof [[PROF1:![0-9]+]] { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[FOR_BODY:.*]] |
| ; CHECK: [[FOR_BODY]]: |
| ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 |
| ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0 |
| ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 |
| ; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 |
| ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 |
| ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], [[BOUND]] |
| ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !prof [[PROF0]] |
| ; CHECK: [[FOR_END]]: |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| |
| entry: |
| br label %for.body |
| |
| for.body: ; preds = %for.body, %entry |
| %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08 |
| %0 = load i8, ptr %arrayidx, align 1 |
| %cmp1 = icmp eq i8 %0, 0 |
| %. = select i1 %cmp1, i8 2, i8 1 |
| store i8 %., ptr %arrayidx, align 1 |
| %inc = add nsw i32 %i.08, 1 |
| %exitcond = icmp eq i32 %i.08, %bound |
| br i1 %exitcond, label %for.end, label %for.body, !prof !1 |
| |
| for.end: ; preds = %for.body |
| ret i32 0 |
| } |
| |
| define i32 @foo_low_trip_count3(i1 %cond, i32 %bound) !prof !0 { |
| ; The loop has low invocation count compare to the function invocation count, |
| ; but has a high trip count per invocation. Vectorize it. |
| ; The original loop has latchExitWeight=10 and backedgeTakenWeight=10,000, |
| ; therefore estimatedBackedgeTakenCount=1,000 and estimatedTripCount=1,001. |
| ; Vectorizing by 4 produces estimatedTripCounts of 1,001/4=250 and 1,001%4=1 |
| ; for vectorized and remainder loops, respectively, therefore their |
| ; estimatedBackedgeTakenCounts are 249 and 0, and so the weights recorded with |
| ; loop invocation weights of 10 are the above {10, 2490} and {10, 0}. This |
| ; explains the values for PROF4 and PROF10 |
| ; CHECK-LABEL: define i32 @foo_low_trip_count3( |
| ; CHECK-SAME: i1 [[COND:%.*]], i32 [[BOUND:%.*]]) !prof [[PROF1]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br i1 [[COND]], label %[[FOR_PREHEADER:.*]], label %[[FOR_END:.*]], !prof [[PROF2:![0-9]+]] |
| ; CHECK: [[FOR_PREHEADER]]: |
| ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BOUND]], 1 |
| ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4 |
| ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF3:![0-9]+]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4 |
| ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[INDEX]] |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 |
| ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer |
| ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i8> splat (i8 2), <4 x i8> splat (i8 1) |
| ; CHECK-NEXT: store <4 x i8> [[TMP3]], ptr [[TMP1]], align 1 |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 |
| ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] |
| ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF4:![0-9]+]], !llvm.loop [[LOOP5:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] |
| ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]], !prof [[PROF9:![0-9]+]] |
| ; CHECK: [[SCALAR_PH]]: |
| ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_PREHEADER]] ] |
| ; CHECK-NEXT: br label %[[FOR_BODY:.*]] |
| ; CHECK: [[FOR_BODY]]: |
| ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 |
| ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP5]], 0 |
| ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 |
| ; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 |
| ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 |
| ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], [[BOUND]] |
| ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]] |
| ; CHECK: [[FOR_END_LOOPEXIT]]: |
| ; CHECK-NEXT: br label %[[FOR_END]] |
| ; CHECK: [[FOR_END]]: |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| entry: |
| br i1 %cond, label %for.preheader, label %for.end, !prof !2 |
| |
| for.preheader: |
| br label %for.body |
| |
| for.body: ; preds = %for.body, %entry |
| %i.08 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ] |
| %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08 |
| %0 = load i8, ptr %arrayidx, align 1 |
| %cmp1 = icmp eq i8 %0, 0 |
| %. = select i1 %cmp1, i8 2, i8 1 |
| store i8 %., ptr %arrayidx, align 1 |
| %inc = add nsw i32 %i.08, 1 |
| %exitcond = icmp eq i32 %i.08, %bound |
| br i1 %exitcond, label %for.end, label %for.body, !prof !3 |
| |
| for.end: ; preds = %for.body |
| ret i32 0 |
| } |
| |
| define i32 @foo_low_trip_count_icmp_sgt(i32 %bound) { |
| ; Simple loop with low tripcount and inequality test for exit. |
| ; Should not be vectorized. |
| ; CHECK-LABEL: define i32 @foo_low_trip_count_icmp_sgt( |
| ; CHECK-SAME: i32 [[BOUND:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[FOR_BODY:.*]] |
| ; CHECK: [[FOR_BODY]]: |
| ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 |
| ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0 |
| ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 |
| ; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 |
| ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 |
| ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i32 [[I_08]], [[BOUND]] |
| ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !prof [[PROF0]] |
| ; CHECK: [[FOR_END]]: |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| |
| entry: |
| br label %for.body |
| |
| for.body: ; preds = %for.body, %entry |
| %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08 |
| %0 = load i8, ptr %arrayidx, align 1 |
| %cmp1 = icmp eq i8 %0, 0 |
| %. = select i1 %cmp1, i8 2, i8 1 |
| store i8 %., ptr %arrayidx, align 1 |
| %inc = add nsw i32 %i.08, 1 |
| %exitcond = icmp sgt i32 %i.08, %bound |
| br i1 %exitcond, label %for.end, label %for.body, !prof !1 |
| |
| for.end: ; preds = %for.body |
| ret i32 0 |
| } |
| |
| define i32 @const_low_trip_count() { |
| ; Simple loop with constant, small trip count and no profiling info. |
| ; CHECK-LABEL: define i32 @const_low_trip_count() { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[FOR_BODY:.*]] |
| ; CHECK: [[FOR_BODY]]: |
| ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 |
| ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0 |
| ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 |
| ; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 |
| ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 |
| ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I_08]], 2 |
| ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]] |
| ; CHECK: [[FOR_END]]: |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| |
| entry: |
| br label %for.body |
| |
| for.body: ; preds = %for.body, %entry |
| %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08 |
| %0 = load i8, ptr %arrayidx, align 1 |
| %cmp1 = icmp eq i8 %0, 0 |
| %. = select i1 %cmp1, i8 2, i8 1 |
| store i8 %., ptr %arrayidx, align 1 |
| %inc = add nsw i32 %i.08, 1 |
| %exitcond = icmp slt i32 %i.08, 2 |
| br i1 %exitcond, label %for.body, label %for.end |
| |
| for.end: ; preds = %for.body |
| ret i32 0 |
| } |
| |
| define i32 @const_large_trip_count() { |
| ; Simple loop with constant large trip count and no profiling info. |
| ; CHECK-LABEL: define i32 @const_large_trip_count() { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[INDEX]] |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 |
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer |
| ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> splat (i8 2), <4 x i8> splat (i8 1) |
| ; CHECK-NEXT: store <4 x i8> [[TMP2]], ptr [[TMP0]], align 1 |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 |
| ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 |
| ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: br label %[[SCALAR_PH:.*]] |
| ; CHECK: [[SCALAR_PH]]: |
| ; CHECK-NEXT: br label %[[FOR_BODY:.*]] |
| ; CHECK: [[FOR_BODY]]: |
| ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 1000, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 |
| ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP4]], 0 |
| ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 |
| ; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 |
| ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 |
| ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I_08]], 1000 |
| ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]], !llvm.loop [[LOOP14:![0-9]+]] |
| ; CHECK: [[FOR_END]]: |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| |
| entry: |
| br label %for.body |
| |
| for.body: ; preds = %for.body, %entry |
| %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08 |
| %0 = load i8, ptr %arrayidx, align 1 |
| %cmp1 = icmp eq i8 %0, 0 |
| %. = select i1 %cmp1, i8 2, i8 1 |
| store i8 %., ptr %arrayidx, align 1 |
| %inc = add nsw i32 %i.08, 1 |
| %exitcond = icmp slt i32 %i.08, 1000 |
| br i1 %exitcond, label %for.body, label %for.end |
| |
| for.end: ; preds = %for.body |
| ret i32 0 |
| } |
| |
| define i32 @const_small_trip_count_step() { |
| ; Simple loop with static, small trip count and no profiling info. |
| ; CHECK-LABEL: define i32 @const_small_trip_count_step() { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[FOR_BODY:.*]] |
| ; CHECK: [[FOR_BODY]]: |
| ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 |
| ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0 |
| ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 |
| ; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 |
| ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 5 |
| ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I_08]], 10 |
| ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]] |
| ; CHECK: [[FOR_END]]: |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| |
| entry: |
| br label %for.body |
| |
| for.body: ; preds = %for.body, %entry |
| %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08 |
| %0 = load i8, ptr %arrayidx, align 1 |
| %cmp1 = icmp eq i8 %0, 0 |
| %. = select i1 %cmp1, i8 2, i8 1 |
| store i8 %., ptr %arrayidx, align 1 |
| %inc = add nsw i32 %i.08, 5 |
| %exitcond = icmp slt i32 %i.08, 10 |
| br i1 %exitcond, label %for.body, label %for.end |
| |
| for.end: ; preds = %for.body |
| ret i32 0 |
| } |
| |
| define i32 @const_trip_over_profile() !prof !0 { |
| ; constant trip count takes precedence over profile data |
| ; CHECK-LABEL: define i32 @const_trip_over_profile( |
| ; CHECK-SAME: ) !prof [[PROF1]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[INDEX]] |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 |
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer |
| ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> splat (i8 2), <4 x i8> splat (i8 1) |
| ; CHECK-NEXT: store <4 x i8> [[TMP2]], ptr [[TMP0]], align 1 |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 |
| ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 |
| ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF15:![0-9]+]], !llvm.loop [[LOOP16:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: br label %[[SCALAR_PH:.*]] |
| ; CHECK: [[SCALAR_PH]]: |
| ; CHECK-NEXT: br label %[[FOR_BODY:.*]] |
| ; CHECK: [[FOR_BODY]]: |
| ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 1000, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 |
| ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP4]], 0 |
| ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 |
| ; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 |
| ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 |
| ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I_08]], 1000 |
| ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]], !prof [[PROF17:![0-9]+]], !llvm.loop [[LOOP18:![0-9]+]] |
| ; CHECK: [[FOR_END]]: |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| |
| entry: |
| br label %for.body |
| |
| for.body: ; preds = %for.body, %entry |
| %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08 |
| %0 = load i8, ptr %arrayidx, align 1 |
| %cmp1 = icmp eq i8 %0, 0 |
| %. = select i1 %cmp1, i8 2, i8 1 |
| store i8 %., ptr %arrayidx, align 1 |
| %inc = add nsw i32 %i.08, 1 |
| %exitcond = icmp slt i32 %i.08, 1000 |
| br i1 %exitcond, label %for.body, label %for.end, !prof !1 |
| |
| for.end: ; preds = %for.body |
| ret i32 0 |
| } |
| |
| !0 = !{!"function_entry_count", i64 100} |
| !1 = !{!"branch_weights", i32 100, i32 0} |
| !2 = !{!"branch_weights", i32 10, i32 90} |
| !3 = !{!"branch_weights", i32 10, i32 10000} |
| ;. |
| ; CHECK: [[PROF0]] = !{!"branch_weights", i32 100, i32 0} |
| ; CHECK: [[PROF1]] = !{!"function_entry_count", i64 100} |
| ; CHECK: [[PROF2]] = !{!"branch_weights", i32 10, i32 90} |
| ; CHECK: [[PROF3]] = !{!"branch_weights", i32 1, i32 127} |
| ; CHECK: [[PROF4]] = !{!"branch_weights", i32 10, i32 2490} |
| ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META6:![0-9]+]], [[META7:![0-9]+]], [[META8:![0-9]+]]} |
| ; CHECK: [[META6]] = !{!"llvm.loop.isvectorized", i32 1} |
| ; CHECK: [[META7]] = !{!"llvm.loop.unroll.runtime.disable"} |
| ; CHECK: [[META8]] = !{!"llvm.loop.estimated_trip_count", i32 250} |
| ; CHECK: [[PROF9]] = !{!"branch_weights", i32 1, i32 3} |
| ; CHECK: [[PROF10]] = !{!"branch_weights", i32 10, i32 0} |
| ; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META7]], [[META6]], [[META12:![0-9]+]]} |
| ; CHECK: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 1} |
| ; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META6]], [[META7]]} |
| ; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META7]], [[META6]]} |
| ; CHECK: [[PROF15]] = !{!"branch_weights", i32 1, i32 249} |
| ; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META6]], [[META7]], [[META8]]} |
| ; CHECK: [[PROF17]] = !{!"branch_weights", i32 0, i32 1} |
| ; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META7]], [[META6]], [[META12]]} |
| ;. |