| ; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace |
| ; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-allow-partial < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=PARTIAL-ALLOW |
| ; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-count=4 < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=USER-COUNT |
| ; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-count=9999 < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=USER-COUNT-EXCEED |
| ; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-peel-count=2 < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=EXPLICIT-PEEL |
| ; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-threshold=0 < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=ZERO-THRESH |
| ; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-full-max-count=2 < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=MAX-COUNT |
| ; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-allow-partial -unroll-partial-threshold=4 < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=PARTIAL-NOPROFIT |
| ; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-allow-remainder=false < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=PRAGMA-NOREMAINDER |
| |
| ; REQUIRES: asserts |
| |
| ; CHECK-LABEL:Loop Unroll: F[pragma_full_unroll_unknown_tc] Loop %for.body (depth=1) |
| ; CHECK-NEXT:Loop Size = 6 |
| ; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1 |
| ; CHECK-NEXT: Explicit unroll requested: pragma-full |
| ; CHECK-NEXT: Trying pragma unroll... |
| ; CHECK-NEXT: Not fully unrolling: unknown trip count. |
| ; CHECK-NEXT: Trying full unroll... |
| ; CHECK-NEXT: Trying upper-bound unroll... |
| ; CHECK-NEXT: Trying loop peeling... |
| ; CHECK-NEXT: Trying partial unroll... |
| ; CHECK-NEXT: Trying runtime unroll... |
| ; CHECK-NEXT: Will not try to unroll loop with runtime trip count because -unroll-runtime not given |
| ; CHECK-NEXT: Not unrolling: no viable strategy found. |
| |
| define i32 @pragma_full_unroll_unknown_tc(ptr %A, i32 %n) { |
| entry: |
| %cmp.entry = icmp sgt i32 %n, 0 |
| br i1 %cmp.entry, label %for.body, label %exit |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| %add = add i32 %sum, %load |
| %inc = add i32 %i, 1 |
| %cmp = icmp slt i32 %inc, %n |
| br i1 %cmp, label %for.body, label %exit, !llvm.loop !0 |
| |
| exit: |
| %result = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| ret i32 %result |
| } |
| |
| ; CHECK-LABEL:Loop Unroll: F[full_unroll_cost_exceeds] Loop %for.body (depth=1) |
| ; CHECK-NEXT:Loop Size = 6 |
| ; CHECK-NEXT: Computing unroll count: TripCount=100, MaxTripCount=0, TripMultiple=100 |
| ; CHECK-NEXT: Trying pragma unroll... |
| ; CHECK-NEXT: Trying full unroll... |
| ; CHECK-NEXT: Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit. |
| ; CHECK-NEXT: Not analyzing loop cost: trip count too large. |
| ; CHECK-NEXT: Trying upper-bound unroll... |
| ; CHECK-NEXT: Trying loop peeling... |
| ; CHECK-NEXT: Trying partial unroll... |
| ; CHECK-NEXT: Will not try to unroll partially because -unroll-allow-partial not given |
| ; CHECK-NEXT: Not unrolling: no viable strategy found. |
| |
| define i32 @full_unroll_cost_exceeds(ptr %A) { |
| entry: |
| br label %for.body |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| %add = add i32 %sum, %load |
| %inc = add i32 %i, 1 |
| %cmp = icmp ult i32 %inc, 100 |
| br i1 %cmp, label %for.body, label %exit |
| |
| exit: |
| ret i32 %add |
| } |
| |
| ; CHECK-LABEL:Loop Unroll: F[extended_convergence] Loop %for.body (depth=1) |
| ; CHECK-NEXT: Not unrolling: contains convergent operations. |
| |
| declare void @convergent_func() convergent |
| declare token @llvm.experimental.convergence.anchor() |
| |
| define i32 @extended_convergence(ptr %A, i32 %n) { |
| entry: |
| br label %for.body, !llvm.loop !1 |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| %tok = call token @llvm.experimental.convergence.anchor() |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| %add = add i32 %sum, %load |
| %inc = add i32 %i, 1 |
| %cmp = icmp slt i32 %inc, %n |
| br i1 %cmp, label %for.body, label %exit, !llvm.loop !1 |
| |
| exit: |
| call void @convergent_func() [ "convergencectrl"(token %tok) ] |
| ret i32 %add |
| } |
| |
| ; CHECK-LABEL:Loop Unroll: F[noduplicate_prevents_unroll] Loop %for.body (depth=1) |
| ; CHECK-NEXT: Not unrolling: contains non-duplicatable instructions. |
| |
| declare void @noduplicate_func() noduplicate |
| |
| define i32 @noduplicate_prevents_unroll(ptr %A) { |
| entry: |
| br label %for.body |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| call void @noduplicate_func() |
| %add = add i32 %sum, %load |
| %inc = add i32 %i, 1 |
| %cmp = icmp ult i32 %inc, 8 |
| br i1 %cmp, label %for.body, label %exit, !llvm.loop !1 |
| |
| exit: |
| ret i32 %add |
| } |
| |
| ; CHECK-LABEL:Loop Unroll: F[indirectbr_loop] Loop %for.body (depth=1) |
| ; CHECK-NEXT: Not unrolling loop which is not in loop-simplify form. |
| |
| define i32 @indirectbr_loop(ptr %A, ptr %target) { |
| entry: |
| indirectbr ptr %target, [label %for.body, label %exit] |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| %add = add i32 %sum, %load |
| %inc = add i32 %i, 1 |
| %cmp = icmp ult i32 %inc, 10 |
| br i1 %cmp, label %for.body, label %exit, !llvm.loop !1 |
| |
| exit: |
| %result = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| ret i32 %result |
| } |
| |
| ; CHECK-LABEL:Loop Unroll: F[inline_prevents_unroll] Loop %for.body (depth=1) |
| ; CHECK-NEXT:Loop Size = 8 |
| ; CHECK-NEXT: Not unrolling loop with inlinable calls. |
| |
| define internal i32 @single_use_helper(i32 %x) { |
| %add = add i32 %x, 42 |
| ret i32 %add |
| } |
| |
| define i32 @inline_prevents_unroll(ptr %A) { |
| entry: |
| br label %for.body |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| %helper_result = call i32 @single_use_helper(i32 %load) |
| %add = add i32 %sum, %helper_result |
| %inc = add i32 %i, 1 |
| %cmp = icmp ult i32 %inc, 10 |
| br i1 %cmp, label %for.body, label %exit, !llvm.loop !2 |
| |
| exit: |
| ret i32 %add |
| } |
| |
| ; CHECK-LABEL:Loop Unroll: F[full_unroll_profitability_analysis] Loop %for.body (depth=1) |
| ; CHECK-NEXT:Loop Size = {{[0-9]+}} |
| ; CHECK-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10 |
| ; CHECK-NEXT: Trying pragma unroll... |
| ; CHECK-NEXT: Trying full unroll... |
| ; CHECK-NEXT: Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit. |
| ; CHECK-NEXT: Starting LoopUnroll profitability analysis... |
| ; CHECK-NEXT: Analyzing iteration 0 |
| ; CHECK-NEXT: Analyzing iteration 1 |
| ; CHECK-NEXT: Analyzing iteration 2 |
| ; CHECK-NEXT: Analyzing iteration 3 |
| ; CHECK-NEXT: Analyzing iteration 4 |
| ; CHECK-NEXT: Analyzing iteration 5 |
| ; CHECK-NEXT: Analyzing iteration 6 |
| ; CHECK-NEXT: Analyzing iteration 7 |
| ; CHECK-NEXT: Analyzing iteration 8 |
| ; CHECK-NEXT: Analyzing iteration 9 |
| ; CHECK: Analysis finished: |
| ; CHECK-NEXT: UnrolledCost: {{[0-9]+}}, RolledDynamicCost: {{[0-9]+}} |
| ; CHECK-NEXT: Profitable after cost analysis. |
| ; CHECK-NEXT: Exiting block %for.body: TripCount=10, TripMultiple=0, BreakoutTrip=0 |
| ; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 10! |
| |
| define i32 @full_unroll_profitability_analysis(ptr %A, ptr %B) { |
| entry: |
| br label %for.body |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %result, %for.body ] |
| %idxA = getelementptr inbounds i32, ptr %A, i32 %i |
| %loadA = load i32, ptr %idxA |
| %idxB = getelementptr inbounds i32, ptr %B, i32 %i |
| %loadB = load i32, ptr %idxB |
| %mul1 = mul i32 %loadA, %loadB |
| %add1 = add i32 %mul1, %loadA |
| %mul2 = mul i32 %add1, %loadB |
| %sub1 = sub i32 %mul2, %loadA |
| %add2 = add i32 %sub1, %loadB |
| %mul3 = mul i32 %add2, %loadA |
| %sub2 = sub i32 %mul3, %loadB |
| %xor1 = xor i32 %sub2, %loadA |
| %or1 = or i32 %xor1, %loadB |
| %result = add i32 %sum, %or1 |
| %inc = add i32 %i, 1 |
| %cmp = icmp ult i32 %inc, 10 |
| br i1 %cmp, label %for.body, label %exit |
| |
| exit: |
| ret i32 %result |
| } |
| |
| ; CHECK-LABEL:Loop Unroll: F[cost_exceed_boosted_threshold] Loop %for.body (depth=1) |
| ; CHECK-NEXT:Loop Size = {{[0-9]+}} |
| ; CHECK-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10 |
| ; CHECK-NEXT: Trying pragma unroll... |
| ; CHECK-NEXT: Trying full unroll... |
| ; CHECK-NEXT: Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit. |
| ; CHECK-NEXT: Starting LoopUnroll profitability analysis... |
| ; CHECK-NEXT: Analyzing iteration 0 |
| ; CHECK-NEXT: Analyzing iteration 1 |
| ; CHECK-NEXT: Analyzing iteration 2 |
| ; CHECK-NEXT: Analyzing iteration 3 |
| ; CHECK-NEXT: Analyzing iteration 4 |
| ; CHECK-NEXT: Analyzing iteration 5 |
| ; CHECK-NEXT: Analyzing iteration 6 |
| ; CHECK-NEXT: Analyzing iteration 7 |
| ; CHECK-NEXT: Analyzing iteration 8 |
| ; CHECK-NEXT: Analyzing iteration 9 |
| ; CHECK: Analysis finished: |
| ; CHECK-NEXT: UnrolledCost: {{[0-9]+}}, RolledDynamicCost: {{[0-9]+}} |
| ; CHECK-NEXT: Not unrolling: cost {{[0-9]+}} >= boosted threshold {{[0-9]+}}. |
| ; CHECK-NEXT: Trying upper-bound unroll... |
| ; CHECK-NEXT: Trying loop peeling... |
| ; CHECK-NEXT: Trying partial unroll... |
| ; CHECK-NEXT: Will not try to unroll partially because -unroll-allow-partial not given |
| ; CHECK-NEXT: Not unrolling: no viable strategy found. |
| |
| define i32 @cost_exceed_boosted_threshold(ptr %A, ptr %B) { |
| entry: |
| br label %for.body |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %result, %for.body ] |
| %idxA = getelementptr inbounds i32, ptr %A, i32 %i |
| %loadA = load i32, ptr %idxA |
| %idxB = getelementptr inbounds i32, ptr %B, i32 %i |
| %loadB = load i32, ptr %idxB |
| %mul1 = mul i32 %loadA, %loadB |
| %add1 = add i32 %mul1, %loadA |
| %mul2 = mul i32 %add1, %loadB |
| %sub1 = sub i32 %mul2, %loadA |
| %add2 = add i32 %sub1, %loadB |
| %mul3 = mul i32 %add2, %loadA |
| %sub2 = sub i32 %mul3, %loadB |
| %xor1 = xor i32 %sub2, %loadA |
| %or1 = or i32 %xor1, %loadB |
| %and1 = and i32 %or1, %loadA |
| %shl1 = shl i32 %and1, 2 |
| %ashr1 = ashr i32 %shl1, 1 |
| %mul4 = mul i32 %ashr1, %loadB |
| %add3 = add i32 %mul4, %loadA |
| %xor2 = xor i32 %add3, %loadB |
| %result = add i32 %sum, %xor2 |
| %inc = add i32 %i, 1 |
| %cmp = icmp ult i32 %inc, 10 |
| br i1 %cmp, label %for.body, label %exit |
| |
| exit: |
| ret i32 %result |
| } |
| |
| ; CHECK-LABEL:Loop Unroll: F[full_unroll_size_under_threshold] Loop %for.body (depth=1) |
| ; CHECK-NEXT:Loop Size = 6 |
| ; CHECK-NEXT: Computing unroll count: TripCount=4, MaxTripCount=0, TripMultiple=4 |
| ; CHECK-NEXT: Trying pragma unroll... |
| ; CHECK-NEXT: Trying full unroll... |
| ; CHECK-NEXT: Unrolling: size {{[0-9]+}} < threshold {{[0-9]+}}. |
| ; CHECK-NEXT: Exiting block %for.body: TripCount=4, TripMultiple=0, BreakoutTrip=0 |
| ; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 4! |
| |
| define i32 @full_unroll_size_under_threshold(ptr %A) { |
| entry: |
| br label %for.body |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| %add = add i32 %sum, %load |
| %inc = add i32 %i, 1 |
| %cmp = icmp ult i32 %inc, 4 |
| br i1 %cmp, label %for.body, label %exit |
| |
| exit: |
| ret i32 %add |
| } |
| |
| ; CHECK-LABEL:Loop Unroll: F[pragma_full_known_tc] Loop %for.body (depth=1) |
| ; CHECK-NEXT:Loop Size = 6 |
| ; CHECK-NEXT: Computing unroll count: TripCount=6, MaxTripCount=0, TripMultiple=6 |
| ; CHECK-NEXT: Explicit unroll requested: pragma-full |
| ; CHECK-NEXT: Trying pragma unroll... |
| ; CHECK-NEXT: Fully unrolling with trip count: 6. |
| ; CHECK-NEXT: Exiting block %for.body: TripCount=6, TripMultiple=0, BreakoutTrip=0 |
| ; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 6! |
| |
| define i32 @pragma_full_known_tc(ptr %A) { |
| entry: |
| br label %for.body |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| %add = add i32 %sum, %load |
| %inc = add i32 %i, 1 |
| %cmp = icmp ult i32 %inc, 6 |
| br i1 %cmp, label %for.body, label %exit, !llvm.loop !0 |
| |
| exit: |
| ret i32 %add |
| } |
| |
| ; CHECK-LABEL:Loop Unroll: F[pragma_count_unroll] Loop %for.body (depth=1) |
| ; CHECK-NEXT:Loop Size = 6 |
| ; CHECK-NEXT: Computing unroll count: TripCount=12, MaxTripCount=0, TripMultiple=12 |
| ; CHECK-NEXT: Explicit unroll requested: pragma-count(3) |
| ; CHECK-NEXT: Trying pragma unroll... |
| ; CHECK-NEXT: Unrolling with pragma count: 3. |
| ; CHECK-NEXT: Exiting block %for.body: TripCount=12, TripMultiple=0, BreakoutTrip=0 |
| ; CHECK-NEXT:UNROLLING loop %for.body by 3! |
| |
| define i32 @pragma_count_unroll(ptr %A) { |
| entry: |
| br label %for.body |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| %add = add i32 %sum, %load |
| %inc = add i32 %i, 1 |
| %cmp = icmp ult i32 %inc, 12 |
| br i1 %cmp, label %for.body, label %exit, !llvm.loop !5 |
| |
| exit: |
| ret i32 %add |
| } |
| |
| ; CHECK-LABEL:Loop Unroll: F[no_viable_strategy] Loop %for.body (depth=1) |
| ; CHECK-NEXT:Loop Size = 6 |
| ; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1 |
| ; CHECK-NEXT: Trying pragma unroll... |
| ; CHECK-NEXT: Trying full unroll... |
| ; CHECK-NEXT: Trying upper-bound unroll... |
| ; CHECK-NEXT: Trying loop peeling... |
| ; CHECK-NEXT: Trying partial unroll... |
| ; CHECK-NEXT: Trying runtime unroll... |
| ; CHECK-NEXT: Will not try to unroll loop with runtime trip count because -unroll-runtime not given |
| ; CHECK-NEXT: Not unrolling: no viable strategy found. |
| |
| define i32 @no_viable_strategy(ptr %A, i32 %n) { |
| entry: |
| %cmp.entry = icmp sgt i32 %n, 0 |
| br i1 %cmp.entry, label %for.body, label %exit |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| %add = add i32 %sum, %load |
| %inc = add i32 %i, 1 |
| %cmp = icmp slt i32 %inc, %n |
| br i1 %cmp, label %for.body, label %exit |
| |
| exit: |
| %result = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| ret i32 %result |
| } |
| |
| ; CHECK-LABEL:Loop Unroll: F[disabled_by_metadata] Loop %for.body (depth=1) |
| ; CHECK-NEXT: Not unrolling: transformation disabled by metadata. |
| |
| define i32 @disabled_by_metadata(ptr %A) { |
| entry: |
| br label %for.body |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| %add = add i32 %sum, %load |
| %inc = add i32 %i, 1 |
| %cmp = icmp ult i32 %inc, 8 |
| br i1 %cmp, label %for.body, label %exit, !llvm.loop !7 |
| |
| exit: |
| ret i32 %add |
| } |
| |
| ; CHECK-LABEL:Loop Unroll: F[upper_bound_unroll] Loop %for.body (depth=1) |
| ; CHECK-NEXT:Loop Size = 6 |
| ; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=3, TripMultiple=1 |
| ; CHECK-NEXT: Explicit unroll requested: pragma-enable |
| ; CHECK-NEXT: Trying pragma unroll... |
| ; CHECK-NEXT: Unrolling with max trip count: 3. |
| ; CHECK-NEXT: Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1 |
| ; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 3! |
| |
| define i32 @upper_bound_unroll(ptr %A, i32 %n) { |
| entry: |
| %masked = and i32 %n, 3 |
| %cmp.entry = icmp sgt i32 %masked, 0 |
| br i1 %cmp.entry, label %for.body, label %exit |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| %add = add i32 %sum, %load |
| %inc = add i32 %i, 1 |
| %cmp = icmp slt i32 %inc, %masked |
| br i1 %cmp, label %for.body, label %exit, !llvm.loop !9 |
| |
| exit: |
| %result = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| ret i32 %result |
| } |
| |
| ; CHECK-LABEL:Loop Unroll: F[runtime_small_max_tc] Loop %for.body (depth=1) |
| ; CHECK-NEXT:Loop Size = 6 |
| ; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=3, TripMultiple=1 |
| ; CHECK-NEXT: Trying pragma unroll... |
| ; CHECK-NEXT: Trying full unroll... |
| ; CHECK-NEXT: Trying upper-bound unroll... |
| ; CHECK-NEXT: Trying loop peeling... |
| ; CHECK-NEXT: Trying partial unroll... |
| ; CHECK-NEXT: Trying runtime unroll... |
| ; CHECK-NEXT: Not runtime unrolling: max trip count 3 is small (< 8) and not forced. |
| ; CHECK-NEXT: Not unrolling: no viable strategy found. |
| |
| define i32 @runtime_small_max_tc(ptr %A, i32 %n) { |
| entry: |
| %masked = and i32 %n, 3 |
| %cmp.entry = icmp sgt i32 %masked, 0 |
| br i1 %cmp.entry, label %for.body, label %exit |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| %add = add i32 %sum, %load |
| %inc = add i32 %i, 1 |
| %cmp = icmp slt i32 %inc, %masked |
| br i1 %cmp, label %for.body, label %exit |
| |
| exit: |
| %result = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| ret i32 %result |
| } |
| |
| ; CHECK-LABEL:Loop Unroll: F[runtime_unroll_disabled_pragma] Loop %for.body (depth=1) |
| ; CHECK-NEXT:Loop Size = 6 |
| ; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1 |
| ; CHECK-NEXT: Trying pragma unroll... |
| ; CHECK-NEXT: Trying full unroll... |
| ; CHECK-NEXT: Trying upper-bound unroll... |
| ; CHECK-NEXT: Trying loop peeling... |
| ; CHECK-NEXT: Trying partial unroll... |
| ; CHECK-NEXT: Trying runtime unroll... |
| ; CHECK-NEXT: Not runtime unrolling: disabled by pragma. |
| ; CHECK-NEXT: Not unrolling: no viable strategy found. |
| |
| define i32 @runtime_unroll_disabled_pragma(ptr %A, i32 %n) { |
| entry: |
| %cmp.entry = icmp sgt i32 %n, 0 |
| br i1 %cmp.entry, label %for.body, label %exit |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| %add = add i32 %sum, %load |
| %inc = add i32 %i, 1 |
| %cmp = icmp slt i32 %inc, %n |
| br i1 %cmp, label %for.body, label %exit, !llvm.loop !11 |
| |
| exit: |
| %result = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| ret i32 %result |
| } |
| |
| ; CHECK-LABEL:Loop Unroll: F[heuristic_peel] Loop %for.header (depth=1) |
| ; CHECK-NEXT:Loop Size = 9 |
| ; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1 |
| ; CHECK-NEXT: Trying pragma unroll... |
| ; CHECK-NEXT: Trying full unroll... |
| ; CHECK-NEXT: Trying upper-bound unroll... |
| ; CHECK-NEXT: Trying loop peeling... |
| ; CHECK-NEXT: Peeling with count: 1. |
| ; CHECK-NEXT:PEELING loop %for.header with iteration count 1! |
| |
| declare void @foo() |
| |
| define void @heuristic_peel(ptr %A, i32 %n) { |
| entry: |
| %cmp.entry = icmp sgt i32 %n, 0 |
| br i1 %cmp.entry, label %for.header, label %exit |
| |
| for.header: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.latch ] |
| %cmp.zero = icmp eq i32 %i, 0 |
| br i1 %cmp.zero, label %then, label %for.latch |
| |
| then: |
| call void @foo() |
| br label %for.latch |
| |
| for.latch: |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| %inc = add i32 %i, 1 |
| %cmp = icmp slt i32 %inc, %n |
| br i1 %cmp, label %for.header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; CHECK-LABEL:Loop Unroll: F[runtime_unroll_simple] Loop %for.body (depth=1) |
| ; CHECK-NEXT:Loop Size = 6 |
| ; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1 |
| ; CHECK-NEXT: Explicit unroll requested: pragma-enable |
| ; CHECK-NEXT: Trying pragma unroll... |
| ; CHECK-NEXT: Trying full unroll... |
| ; CHECK-NEXT: Trying upper-bound unroll... |
| ; CHECK-NEXT: Trying loop peeling... |
| ; CHECK-NEXT: Trying partial unroll... |
| ; CHECK-NEXT: Trying runtime unroll... |
| ; CHECK-NEXT: Runtime unrolling with count: 8 |
| ; CHECK-NEXT: Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1 |
| ; CHECK:UNROLLING loop %for.body by 8 with run-time trip count! |
| |
| define i32 @runtime_unroll_simple(ptr %A, i32 %n) { |
| entry: |
| %cmp.entry = icmp sgt i32 %n, 0 |
| br i1 %cmp.entry, label %for.body, label %exit |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| %add = add i32 %sum, %load |
| %inc = add i32 %i, 1 |
| %cmp = icmp slt i32 %inc, %n |
| br i1 %cmp, label %for.body, label %exit, !llvm.loop !13 |
| |
| exit: |
| %result = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| ret i32 %result |
| } |
| |
| ; PARTIAL-ALLOW-LABEL:Loop Unroll: F[partial_unroll_cost_analysis] Loop %for.body (depth=1) |
| ; PARTIAL-ALLOW-NEXT:Loop Size = 6 |
| ; PARTIAL-ALLOW-NEXT: Computing unroll count: TripCount=200, MaxTripCount=0, TripMultiple=200 |
| ; PARTIAL-ALLOW-NEXT: Trying pragma unroll... |
| ; PARTIAL-ALLOW-NEXT: Trying full unroll... |
| ; PARTIAL-ALLOW-NEXT: Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit. |
| ; PARTIAL-ALLOW-NEXT: Not analyzing loop cost: trip count too large. |
| ; PARTIAL-ALLOW-NEXT: Trying upper-bound unroll... |
| ; PARTIAL-ALLOW-NEXT: Trying loop peeling... |
| ; PARTIAL-ALLOW-NEXT: Trying partial unroll... |
| ; PARTIAL-ALLOW-NEXT: Unrolled size exceeds threshold; reducing count from {{[0-9]+}} to {{[0-9]+}}. |
| ; PARTIAL-ALLOW-NEXT: Partially unrolling with count: {{[0-9]+}} |
| ; PARTIAL-ALLOW-NEXT: Exiting block %for.body: TripCount=200, TripMultiple=0, BreakoutTrip=0 |
| ; PARTIAL-ALLOW-NEXT:UNROLLING loop %for.body by {{[0-9]+}}! |
| |
| define i32 @partial_unroll_cost_analysis(ptr %A) { |
| entry: |
| br label %for.body |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| %add = add i32 %sum, %load |
| %inc = add i32 %i, 1 |
| %cmp = icmp ult i32 %inc, 200 |
| br i1 %cmp, label %for.body, label %exit |
| |
| exit: |
| ret i32 %add |
| } |
| |
| ; CHECK-LABEL:Loop Unroll: F[pragma_full_tc_too_large] Loop %for.body (depth=1) |
| ; CHECK-NEXT:Loop Size = 6 |
| ; CHECK-NEXT: Computing unroll count: TripCount=1000001, MaxTripCount=0, TripMultiple=1000001 |
| ; CHECK-NEXT: Explicit unroll requested: pragma-full |
| ; CHECK-NEXT: Trying pragma unroll... |
| ; CHECK-NEXT: Won't unroll; trip count is too large. |
| ; CHECK-NEXT: Trying full unroll... |
| ; CHECK-NEXT: Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit. |
| ; CHECK-NEXT: Not analyzing loop cost: trip count too large. |
| ; CHECK-NEXT: Trying upper-bound unroll... |
| ; CHECK-NEXT: Trying loop peeling... |
| ; CHECK-NEXT: Trying partial unroll... |
| ; CHECK-NEXT: Unrolled size exceeds threshold; reducing count from {{[0-9]+}} to {{[0-9]+}}. |
| ; CHECK-NEXT: Partially unrolling with count: {{[0-9]+}} |
| ; CHECK-NEXT: Exiting block %for.body: TripCount=1000001, TripMultiple=0, BreakoutTrip=0 |
| ; CHECK-NEXT:UNROLLING loop %for.body by {{[0-9]+}}! |
| |
| define i32 @pragma_full_tc_too_large(ptr %A) { |
| entry: |
| br label %for.body |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| %add = add i32 %sum, %load |
| %inc = add i32 %i, 1 |
| %cmp = icmp ult i32 %inc, 1000001 |
| br i1 %cmp, label %for.body, label %exit, !llvm.loop !12 |
| |
| exit: |
| ret i32 %add |
| } |
| |
| ; CHECK-LABEL:Loop Unroll: F[nested_loop_cost] Loop %inner (depth=2) |
| ; CHECK-NEXT:Loop Size = 6 |
| ; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1 |
| ; CHECK-NEXT: Trying pragma unroll... |
| ; CHECK-NEXT: Trying full unroll... |
| ; CHECK-NEXT: Trying upper-bound unroll... |
| ; CHECK-NEXT: Trying loop peeling... |
| ; CHECK-NEXT: Trying partial unroll... |
| ; CHECK-NEXT: Trying runtime unroll... |
| ; CHECK-NEXT: Will not try to unroll loop with runtime trip count because -unroll-runtime not given |
| ; CHECK-NEXT: Not unrolling: no viable strategy found. |
| ; CHECK-LABEL:Loop Unroll: F[nested_loop_cost] Loop %outer (depth=1) |
| ; CHECK-NEXT:Loop Size = {{[0-9]+}} |
| ; CHECK-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10 |
| ; CHECK-NEXT: Trying pragma unroll... |
| ; CHECK-NEXT: Trying full unroll... |
| ; CHECK-NEXT: Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit. |
| ; CHECK-NEXT: Not analyzing loop cost: not an innermost loop. |
| ; CHECK-NEXT: Trying upper-bound unroll... |
| ; CHECK-NEXT: Trying loop peeling... |
| ; CHECK-NEXT: Trying partial unroll... |
| ; CHECK-NEXT: Will not try to unroll partially because -unroll-allow-partial not given |
| ; CHECK-NEXT: Not unrolling: no viable strategy found. |
| |
| define i32 @nested_loop_cost(ptr %A, i32 %n) { |
| entry: |
| br label %outer |
| |
| outer: |
| %i = phi i32 [ 0, %entry ], [ %i.next, %inner.exit ] |
| %sum.outer = phi i32 [ 0, %entry ], [ %sum.inner.lcssa, %inner.exit ] |
| %idxA = getelementptr inbounds i32, ptr %A, i32 %i |
| %loadA = load i32, ptr %idxA |
| %mul1 = mul i32 %loadA, %sum.outer |
| %add1 = add i32 %mul1, %loadA |
| %mul2 = mul i32 %add1, %loadA |
| %sub1 = sub i32 %mul2, %loadA |
| %add2 = add i32 %sub1, %loadA |
| %mul3 = mul i32 %add2, %loadA |
| %sub2 = sub i32 %mul3, %loadA |
| %xor1 = xor i32 %sub2, %loadA |
| %or1 = or i32 %xor1, %loadA |
| %outer.sum = add i32 %sum.outer, %or1 |
| br label %inner |
| |
| inner: |
| %j = phi i32 [ 0, %outer ], [ %j.next, %inner ] |
| %sum.inner = phi i32 [ %outer.sum, %outer ], [ %inner.add, %inner ] |
| %idxB = getelementptr inbounds i32, ptr %A, i32 %j |
| %loadB = load i32, ptr %idxB |
| %inner.add = add i32 %sum.inner, %loadB |
| %j.next = add i32 %j, 1 |
| %inner.cmp = icmp slt i32 %j.next, %n |
| br i1 %inner.cmp, label %inner, label %inner.exit |
| |
| inner.exit: |
| %sum.inner.lcssa = phi i32 [ %inner.add, %inner ] |
| %i.next = add i32 %i, 1 |
| %outer.cmp = icmp ult i32 %i.next, 10 |
| br i1 %outer.cmp, label %outer, label %exit |
| |
| exit: |
| ret i32 %sum.inner.lcssa |
| } |
| |
| ; USER-COUNT-LABEL:Loop Unroll: F[user_count_unroll] Loop %for.body (depth=1) |
| ; USER-COUNT-NEXT:Loop Size = 6 |
| ; USER-COUNT-NEXT: Computing unroll count: TripCount=12, MaxTripCount=0, TripMultiple=12 |
| ; USER-COUNT-NEXT: Explicit unroll requested: user-count |
| ; USER-COUNT-NEXT: Trying pragma unroll... |
| ; USER-COUNT-NEXT: Unrolling with user-specified count: 4. |
| ; USER-COUNT-NEXT: Exiting block %for.body: TripCount=12, TripMultiple=0, BreakoutTrip=0 |
| ; USER-COUNT-NEXT:UNROLLING loop %for.body by 4! |
| ; |
| ; USER-COUNT-EXCEED-LABEL:Loop Unroll: F[user_count_unroll] Loop %for.body (depth=1) |
| ; USER-COUNT-EXCEED-NEXT:Loop Size = 6 |
| ; USER-COUNT-EXCEED-NEXT: Computing unroll count: TripCount=12, MaxTripCount=0, TripMultiple=12 |
| ; USER-COUNT-EXCEED-NEXT: Explicit unroll requested: user-count |
| ; USER-COUNT-EXCEED-NEXT: Trying pragma unroll... |
| ; USER-COUNT-EXCEED-NEXT: Not unrolling with user count 9999: exceeds threshold. |
| ; USER-COUNT-EXCEED-NEXT: Trying full unroll... |
| ; USER-COUNT-EXCEED-NEXT: Unrolling: size {{[0-9]+}} < threshold {{[0-9]+}}. |
| ; USER-COUNT-EXCEED-NEXT: Exiting block %for.body: TripCount=12, TripMultiple=0, BreakoutTrip=0 |
| ; USER-COUNT-EXCEED-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 12! |
| |
| define i32 @user_count_unroll(ptr %A) { |
| entry: |
| br label %for.body |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| %add = add i32 %sum, %load |
| %inc = add i32 %i, 1 |
| %cmp = icmp ult i32 %inc, 12 |
| br i1 %cmp, label %for.body, label %exit |
| |
| exit: |
| ret i32 %add |
| } |
| |
| ; EXPLICIT-PEEL-LABEL:Loop Unroll: F[explicit_peel_count] Loop %for.body (depth=1) |
| ; EXPLICIT-PEEL-NEXT:Loop Size = 6 |
| ; EXPLICIT-PEEL-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1 |
| ; EXPLICIT-PEEL-NEXT: Using explicit peel count: 2. |
| ; EXPLICIT-PEEL-NEXT:PEELING loop %for.body with iteration count 2! |
| |
| define i32 @explicit_peel_count(ptr %A, i32 %n) { |
| entry: |
| %cmp.entry = icmp sgt i32 %n, 0 |
| br i1 %cmp.entry, label %for.body, label %exit |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| %add = add i32 %sum, %load |
| %inc = add i32 %i, 1 |
| %cmp = icmp slt i32 %inc, %n |
| br i1 %cmp, label %for.body, label %exit |
| |
| exit: |
| %result = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| ret i32 %result |
| } |
| |
| ; ZERO-THRESH-LABEL:Loop Unroll: F[zero_thresh_unroll] Loop %for.body (depth=1) |
| ; ZERO-THRESH-NEXT: Not unrolling: all thresholds are zero. |
| |
| define i32 @zero_thresh_unroll(ptr %A) { |
| entry: |
| br label %for.body |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| %add = add i32 %sum, %load |
| %inc = add i32 %i, 1 |
| %cmp = icmp ult i32 %inc, 8 |
| br i1 %cmp, label %for.body, label %exit |
| |
| exit: |
| ret i32 %add |
| } |
| |
| ; MAX-COUNT-LABEL:Loop Unroll: F[max_count_unroll] Loop %for.body (depth=1) |
| ; MAX-COUNT-NEXT:Loop Size = 6 |
| ; MAX-COUNT-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10 |
| ; MAX-COUNT-NEXT: Trying pragma unroll... |
| ; MAX-COUNT-NEXT: Trying full unroll... |
| ; MAX-COUNT-NEXT: Not unrolling: trip count 10 exceeds max count 2. |
| ; MAX-COUNT-NEXT: Trying upper-bound unroll... |
| ; MAX-COUNT-NEXT: Trying loop peeling... |
| ; MAX-COUNT-NEXT: Trying partial unroll... |
| ; MAX-COUNT-NEXT: Will not try to unroll partially because -unroll-allow-partial not given |
| ; MAX-COUNT-NEXT: Not unrolling: no viable strategy found. |
| |
| define i32 @max_count_unroll(ptr %A) { |
| entry: |
| br label %for.body |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| %add = add i32 %sum, %load |
| %inc = add i32 %i, 1 |
| %cmp = icmp ult i32 %inc, 10 |
| br i1 %cmp, label %for.body, label %exit |
| |
| exit: |
| ret i32 %add |
| } |
| |
| ; PARTIAL-NOPROFIT-LABEL:Loop Unroll: F[partial_no_profit] Loop %for.body (depth=1) |
| ; PARTIAL-NOPROFIT-NEXT:Loop Size = 6 |
| ; PARTIAL-NOPROFIT-NEXT: Computing unroll count: TripCount=200, MaxTripCount=0, TripMultiple=200 |
| ; PARTIAL-NOPROFIT-NEXT: Trying pragma unroll... |
| ; PARTIAL-NOPROFIT-NEXT: Trying full unroll... |
| ; PARTIAL-NOPROFIT-NEXT: Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit. |
| ; PARTIAL-NOPROFIT-NEXT: Not analyzing loop cost: trip count too large. |
| ; PARTIAL-NOPROFIT-NEXT: Trying upper-bound unroll... |
| ; PARTIAL-NOPROFIT-NEXT: Trying loop peeling... |
| ; PARTIAL-NOPROFIT-NEXT: Trying partial unroll... |
| ; PARTIAL-NOPROFIT-NEXT: Unrolled size exceeds threshold; reducing count from {{[0-9]+}} to {{[0-9]+}}. |
| ; PARTIAL-NOPROFIT-NEXT: Will not partially unroll: no profitable count. |
| ; PARTIAL-NOPROFIT-NEXT: Partially unrolling with count: 0 |
| ; PARTIAL-NOPROFIT-NEXT: Not unrolling: no viable strategy found. |
| |
| define i32 @partial_no_profit(ptr %A) { |
| entry: |
| br label %for.body |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| %add = add i32 %sum, %load |
| %inc = add i32 %i, 1 |
| %cmp = icmp ult i32 %inc, 200 |
| br i1 %cmp, label %for.body, label %exit |
| |
| exit: |
| ret i32 %add |
| } |
| |
| ; PRAGMA-NOREMAINDER-LABEL:Loop Unroll: F[pragma_count_no_remainder] Loop %for.body (depth=1) |
| ; PRAGMA-NOREMAINDER-NEXT:Loop Size = 6 |
| ; PRAGMA-NOREMAINDER-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10 |
| ; PRAGMA-NOREMAINDER-NEXT: Explicit unroll requested: pragma-count(3) |
| ; PRAGMA-NOREMAINDER-NEXT: Trying pragma unroll... |
| ; PRAGMA-NOREMAINDER-NEXT: Not unrolling with pragma count 3: remainder not allowed, count does not divide trip multiple 10. |
| ; PRAGMA-NOREMAINDER-NEXT: Trying full unroll... |
| ; PRAGMA-NOREMAINDER-NEXT: Unrolling: size {{[0-9]+}} < threshold {{[0-9]+}}. |
| ; PRAGMA-NOREMAINDER-NEXT: Exiting block %for.body: TripCount=10, TripMultiple=0, BreakoutTrip=0 |
| ; PRAGMA-NOREMAINDER-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 10! |
| |
| define i32 @pragma_count_no_remainder(ptr %A) { |
| entry: |
| br label %for.body |
| |
| for.body: |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i |
| %load = load i32, ptr %arrayidx |
| %add = add i32 %sum, %load |
| %inc = add i32 %i, 1 |
| %cmp = icmp ult i32 %inc, 10 |
| br i1 %cmp, label %for.body, label %exit, !llvm.loop !14 |
| |
| exit: |
| ret i32 %add |
| } |
| |
| !0 = distinct !{!0, !3} |
| !1 = distinct !{!1, !4} |
| !2 = distinct !{!2, !4} |
| !3 = !{!"llvm.loop.unroll.full"} |
| !4 = !{!"llvm.loop.unroll.enable"} |
| !5 = distinct !{!5, !6} |
| !6 = !{!"llvm.loop.unroll.count", i32 3} |
| !7 = distinct !{!7, !8} |
| !8 = !{!"llvm.loop.unroll.disable"} |
| !9 = distinct !{!9, !4} |
| !10 = !{!"llvm.loop.unroll.runtime.disable"} |
| !11 = distinct !{!11, !10} |
| !12 = distinct !{!12, !3} |
| !13 = distinct !{!13, !4} |
| !14 = distinct !{!14, !6} |