| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt < %s -S -indvars -loop-unroll -verify-loop-info | FileCheck %s |
| ; |
| ; Unit tests for loop unrolling using ScalarEvolution to compute trip counts. |
| ; |
| ; Indvars is run first to generate an "old" SCEV result. Some unit |
| ; tests may check that SCEV is properly invalidated between passes. |
| |
| ; Completely unroll loops without a canonical IV. |
| define i32 @sansCanonical(i32* %base) nounwind { |
| ; CHECK-LABEL: @sansCanonical( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] |
| ; CHECK: while.body: |
| ; CHECK-NEXT: [[ADR:%.*]] = getelementptr inbounds i32, i32* [[BASE:%.*]], i64 9 |
| ; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[ADR]], align 8 |
| ; CHECK-NEXT: [[ADR_1:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 8 |
| ; CHECK-NEXT: [[TMP_1:%.*]] = load i32, i32* [[ADR_1]], align 8 |
| ; CHECK-NEXT: [[SUM_NEXT_1:%.*]] = add i32 [[TMP]], [[TMP_1]] |
| ; CHECK-NEXT: [[ADR_2:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 7 |
| ; CHECK-NEXT: [[TMP_2:%.*]] = load i32, i32* [[ADR_2]], align 8 |
| ; CHECK-NEXT: [[SUM_NEXT_2:%.*]] = add i32 [[SUM_NEXT_1]], [[TMP_2]] |
| ; CHECK-NEXT: [[ADR_3:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 6 |
| ; CHECK-NEXT: [[TMP_3:%.*]] = load i32, i32* [[ADR_3]], align 8 |
| ; CHECK-NEXT: [[SUM_NEXT_3:%.*]] = add i32 [[SUM_NEXT_2]], [[TMP_3]] |
| ; CHECK-NEXT: [[ADR_4:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 5 |
| ; CHECK-NEXT: [[TMP_4:%.*]] = load i32, i32* [[ADR_4]], align 8 |
| ; CHECK-NEXT: [[SUM_NEXT_4:%.*]] = add i32 [[SUM_NEXT_3]], [[TMP_4]] |
| ; CHECK-NEXT: [[ADR_5:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 4 |
| ; CHECK-NEXT: [[TMP_5:%.*]] = load i32, i32* [[ADR_5]], align 8 |
| ; CHECK-NEXT: [[SUM_NEXT_5:%.*]] = add i32 [[SUM_NEXT_4]], [[TMP_5]] |
| ; CHECK-NEXT: [[ADR_6:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 3 |
| ; CHECK-NEXT: [[TMP_6:%.*]] = load i32, i32* [[ADR_6]], align 8 |
| ; CHECK-NEXT: [[SUM_NEXT_6:%.*]] = add i32 [[SUM_NEXT_5]], [[TMP_6]] |
| ; CHECK-NEXT: [[ADR_7:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 2 |
| ; CHECK-NEXT: [[TMP_7:%.*]] = load i32, i32* [[ADR_7]], align 8 |
| ; CHECK-NEXT: [[SUM_NEXT_7:%.*]] = add i32 [[SUM_NEXT_6]], [[TMP_7]] |
| ; CHECK-NEXT: [[ADR_8:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 1 |
| ; CHECK-NEXT: [[TMP_8:%.*]] = load i32, i32* [[ADR_8]], align 8 |
| ; CHECK-NEXT: [[SUM_NEXT_8:%.*]] = add i32 [[SUM_NEXT_7]], [[TMP_8]] |
| ; CHECK-NEXT: ret i32 [[SUM_NEXT_8]] |
| ; |
| entry: |
| br label %while.body |
| |
| while.body: |
| %iv = phi i64 [ 10, %entry ], [ %iv.next, %while.body ] |
| %sum = phi i32 [ 0, %entry ], [ %sum.next, %while.body ] |
| %iv.next = add i64 %iv, -1 |
| %adr = getelementptr inbounds i32, i32* %base, i64 %iv.next |
| %tmp = load i32, i32* %adr, align 8 |
| %sum.next = add i32 %sum, %tmp |
| %iv.narrow = trunc i64 %iv.next to i32 |
| %cmp.i65 = icmp sgt i32 %iv.narrow, 0 |
| br i1 %cmp.i65, label %while.body, label %exit |
| |
| exit: |
| ret i32 %sum |
| } |
| |
| ; SCEV unrolling properly handles loops with multiple exits. In this |
| ; case, the computed trip count based on a canonical IV is *not* for a |
| ; latch block. |
| define i64 @earlyLoopTest(i64* %base) nounwind { |
| ; CHECK-LABEL: @earlyLoopTest( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label [[LOOP:%.*]] |
| ; CHECK: loop: |
| ; CHECK-NEXT: [[VAL:%.*]] = load i64, i64* [[BASE:%.*]], align 4 |
| ; CHECK-NEXT: br label [[TAIL:%.*]] |
| ; CHECK: tail: |
| ; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i64 [[VAL]], 0 |
| ; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP_1:%.*]], label [[EXIT2:%.*]] |
| ; CHECK: loop.1: |
| ; CHECK-NEXT: [[ADR_1:%.*]] = getelementptr i64, i64* [[BASE]], i64 1 |
| ; CHECK-NEXT: [[VAL_1:%.*]] = load i64, i64* [[ADR_1]], align 4 |
| ; CHECK-NEXT: [[S_NEXT_1:%.*]] = add i64 [[VAL]], [[VAL_1]] |
| ; CHECK-NEXT: br label [[TAIL_1:%.*]] |
| ; CHECK: tail.1: |
| ; CHECK-NEXT: [[CMP2_1:%.*]] = icmp ne i64 [[VAL_1]], 0 |
| ; CHECK-NEXT: br i1 [[CMP2_1]], label [[LOOP_2:%.*]], label [[EXIT2]] |
| ; CHECK: loop.2: |
| ; CHECK-NEXT: [[ADR_2:%.*]] = getelementptr i64, i64* [[BASE]], i64 2 |
| ; CHECK-NEXT: [[VAL_2:%.*]] = load i64, i64* [[ADR_2]], align 4 |
| ; CHECK-NEXT: [[S_NEXT_2:%.*]] = add i64 [[S_NEXT_1]], [[VAL_2]] |
| ; CHECK-NEXT: br label [[TAIL_2:%.*]] |
| ; CHECK: tail.2: |
| ; CHECK-NEXT: [[CMP2_2:%.*]] = icmp ne i64 [[VAL_2]], 0 |
| ; CHECK-NEXT: br i1 [[CMP2_2]], label [[LOOP_3:%.*]], label [[EXIT2]] |
| ; CHECK: loop.3: |
| ; CHECK-NEXT: [[ADR_3:%.*]] = getelementptr i64, i64* [[BASE]], i64 3 |
| ; CHECK-NEXT: [[VAL_3:%.*]] = load i64, i64* [[ADR_3]], align 4 |
| ; CHECK-NEXT: [[S_NEXT_3:%.*]] = add i64 [[S_NEXT_2]], [[VAL_3]] |
| ; CHECK-NEXT: br i1 false, label [[TAIL_3:%.*]], label [[EXIT1:%.*]] |
| ; CHECK: tail.3: |
| ; CHECK-NEXT: br label [[EXIT2]] |
| ; CHECK: exit1: |
| ; CHECK-NEXT: [[S_LCSSA:%.*]] = phi i64 [ [[S_NEXT_2]], [[LOOP_3]] ] |
| ; CHECK-NEXT: ret i64 [[S_LCSSA]] |
| ; CHECK: exit2: |
| ; CHECK-NEXT: [[S_NEXT_LCSSA1:%.*]] = phi i64 [ [[VAL]], [[TAIL]] ], [ [[S_NEXT_1]], [[TAIL_1]] ], [ [[S_NEXT_2]], [[TAIL_2]] ], [ [[S_NEXT_3]], [[TAIL_3]] ] |
| ; CHECK-NEXT: ret i64 [[S_NEXT_LCSSA1]] |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %inc, %tail ] |
| %s = phi i64 [ 0, %entry ], [ %s.next, %tail ] |
| %adr = getelementptr i64, i64* %base, i64 %iv |
| %val = load i64, i64* %adr |
| %s.next = add i64 %s, %val |
| %inc = add i64 %iv, 1 |
| %cmp = icmp ne i64 %inc, 4 |
| br i1 %cmp, label %tail, label %exit1 |
| |
| tail: |
| %cmp2 = icmp ne i64 %val, 0 |
| br i1 %cmp2, label %loop, label %exit2 |
| |
| exit1: |
| ret i64 %s |
| |
| exit2: |
| ret i64 %s.next |
| } |
| |
| ; SCEV properly unrolls multi-exit loops. |
| define i32 @multiExit(i32* %base) nounwind { |
| ; CHECK-LABEL: @multiExit( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label [[L1:%.*]] |
| ; CHECK: l1: |
| ; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[BASE:%.*]], align 4 |
| ; CHECK-NEXT: br i1 false, label [[L2:%.*]], label [[EXIT1:%.*]] |
| ; CHECK: l2: |
| ; CHECK-NEXT: ret i32 [[VAL]] |
| ; CHECK: exit1: |
| ; CHECK-NEXT: ret i32 1 |
| ; |
| entry: |
| br label %l1 |
| l1: |
| %iv1 = phi i32 [ 0, %entry ], [ %inc1, %l2 ] |
| %iv2 = phi i32 [ 0, %entry ], [ %inc2, %l2 ] |
| %inc1 = add i32 %iv1, 1 |
| %inc2 = add i32 %iv2, 1 |
| %adr = getelementptr i32, i32* %base, i32 %iv1 |
| %val = load i32, i32* %adr |
| %cmp1 = icmp slt i32 %iv1, 5 |
| br i1 %cmp1, label %l2, label %exit1 |
| l2: |
| %cmp2 = icmp slt i32 %iv2, 10 |
| br i1 %cmp2, label %l1, label %exit2 |
| exit1: |
| ret i32 1 |
| exit2: |
| ret i32 %val |
| } |
| |
| |
| ; SCEV can unroll a multi-exit loops even if the latch block has no |
| ; known trip count, but an early exit has a known trip count. In this |
| ; case we must be careful not to optimize the latch branch away. |
| define i32 @multiExitIncomplete(i32* %base) nounwind { |
| ; CHECK-LABEL: @multiExitIncomplete( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label [[L1:%.*]] |
| ; CHECK: l1: |
| ; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[BASE:%.*]], align 4 |
| ; CHECK-NEXT: br label [[L2:%.*]] |
| ; CHECK: l2: |
| ; CHECK-NEXT: br label [[L3:%.*]] |
| ; CHECK: l3: |
| ; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i32 [[VAL]], 0 |
| ; CHECK-NEXT: br i1 [[CMP3]], label [[L1_1:%.*]], label [[EXIT3:%.*]] |
| ; CHECK: l1.1: |
| ; CHECK-NEXT: [[ADR_1:%.*]] = getelementptr i32, i32* [[BASE]], i32 1 |
| ; CHECK-NEXT: [[VAL_1:%.*]] = load i32, i32* [[ADR_1]], align 4 |
| ; CHECK-NEXT: br label [[L2_1:%.*]] |
| ; CHECK: l2.1: |
| ; CHECK-NEXT: br label [[L3_1:%.*]] |
| ; CHECK: l3.1: |
| ; CHECK-NEXT: [[CMP3_1:%.*]] = icmp ne i32 [[VAL_1]], 0 |
| ; CHECK-NEXT: br i1 [[CMP3_1]], label [[L1_2:%.*]], label [[EXIT3]] |
| ; CHECK: l1.2: |
| ; CHECK-NEXT: [[ADR_2:%.*]] = getelementptr i32, i32* [[BASE]], i32 2 |
| ; CHECK-NEXT: [[VAL_2:%.*]] = load i32, i32* [[ADR_2]], align 4 |
| ; CHECK-NEXT: br label [[L2_2:%.*]] |
| ; CHECK: l2.2: |
| ; CHECK-NEXT: br label [[L3_2:%.*]] |
| ; CHECK: l3.2: |
| ; CHECK-NEXT: [[CMP3_2:%.*]] = icmp ne i32 [[VAL_2]], 0 |
| ; CHECK-NEXT: br i1 [[CMP3_2]], label [[L1_3:%.*]], label [[EXIT3]] |
| ; CHECK: l1.3: |
| ; CHECK-NEXT: [[ADR_3:%.*]] = getelementptr i32, i32* [[BASE]], i32 3 |
| ; CHECK-NEXT: [[VAL_3:%.*]] = load i32, i32* [[ADR_3]], align 4 |
| ; CHECK-NEXT: br label [[L2_3:%.*]] |
| ; CHECK: l2.3: |
| ; CHECK-NEXT: br label [[L3_3:%.*]] |
| ; CHECK: l3.3: |
| ; CHECK-NEXT: [[CMP3_3:%.*]] = icmp ne i32 [[VAL_3]], 0 |
| ; CHECK-NEXT: br i1 [[CMP3_3]], label [[L1_4:%.*]], label [[EXIT3]] |
| ; CHECK: l1.4: |
| ; CHECK-NEXT: [[ADR_4:%.*]] = getelementptr i32, i32* [[BASE]], i32 4 |
| ; CHECK-NEXT: [[VAL_4:%.*]] = load i32, i32* [[ADR_4]], align 4 |
| ; CHECK-NEXT: br label [[L2_4:%.*]] |
| ; CHECK: l2.4: |
| ; CHECK-NEXT: br label [[L3_4:%.*]] |
| ; CHECK: l3.4: |
| ; CHECK-NEXT: [[CMP3_4:%.*]] = icmp ne i32 [[VAL_4]], 0 |
| ; CHECK-NEXT: br i1 [[CMP3_4]], label [[L1_5:%.*]], label [[EXIT3]] |
| ; CHECK: l1.5: |
| ; CHECK-NEXT: br i1 false, label [[L2_5:%.*]], label [[EXIT1:%.*]] |
| ; CHECK: l2.5: |
| ; CHECK-NEXT: br i1 true, label [[L3_5:%.*]], label [[EXIT2:%.*]] |
| ; CHECK: l3.5: |
| ; CHECK-NEXT: br label [[EXIT3]] |
| ; CHECK: exit1: |
| ; CHECK-NEXT: ret i32 1 |
| ; CHECK: exit2: |
| ; CHECK-NEXT: ret i32 2 |
| ; CHECK: exit3: |
| ; CHECK-NEXT: ret i32 3 |
| ; |
| entry: |
| br label %l1 |
| l1: |
| %iv1 = phi i32 [ 0, %entry ], [ %inc1, %l3 ] |
| %iv2 = phi i32 [ 0, %entry ], [ %inc2, %l3 ] |
| %inc1 = add i32 %iv1, 1 |
| %inc2 = add i32 %iv2, 1 |
| %adr = getelementptr i32, i32* %base, i32 %iv1 |
| %val = load i32, i32* %adr |
| %cmp1 = icmp slt i32 %iv1, 5 |
| br i1 %cmp1, label %l2, label %exit1 |
| l2: |
| %cmp2 = icmp slt i32 %iv2, 10 |
| br i1 %cmp2, label %l3, label %exit2 |
| l3: |
| %cmp3 = icmp ne i32 %val, 0 |
| br i1 %cmp3, label %l1, label %exit3 |
| |
| exit1: |
| ret i32 1 |
| exit2: |
| ret i32 2 |
| exit3: |
| ret i32 3 |
| } |
| |
| ; When loop unroll merges a loop exit with one of its parent loop's |
| ; exits, SCEV must forget its ExitNotTaken info. |
| define void @nestedUnroll() nounwind { |
| ; CHECK-LABEL: @nestedUnroll( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label [[FOR_INC:%.*]] |
| ; CHECK: for.inc: |
| ; CHECK-NEXT: br label [[FOR_BODY38:%.*]] |
| ; CHECK: for.body38: |
| ; CHECK-NEXT: br label [[FOR_BODY43:%.*]] |
| ; CHECK: for.body43: |
| ; CHECK-NEXT: br label [[FOR_BODY87:%.*]] |
| ; CHECK: for.body87: |
| ; CHECK-NEXT: br label [[FOR_BODY87]] |
| ; |
| entry: |
| br label %for.inc |
| |
| for.inc: |
| br i1 false, label %for.inc, label %for.body38.preheader |
| |
| for.body38.preheader: |
| br label %for.body38 |
| |
| for.body38: |
| %i.113 = phi i32 [ %inc76, %for.inc74 ], [ 0, %for.body38.preheader ] |
| %mul48 = mul nsw i32 %i.113, 6 |
| br label %for.body43 |
| |
| for.body43: |
| %j.011 = phi i32 [ 0, %for.body38 ], [ %inc72, %for.body43 ] |
| %add49 = add nsw i32 %j.011, %mul48 |
| %sh_prom50 = zext i32 %add49 to i64 |
| %inc72 = add nsw i32 %j.011, 1 |
| br i1 false, label %for.body43, label %for.inc74 |
| |
| for.inc74: |
| %inc76 = add nsw i32 %i.113, 1 |
| br i1 false, label %for.body38, label %for.body87.preheader |
| |
| for.body87.preheader: |
| br label %for.body87 |
| |
| for.body87: |
| br label %for.body87 |
| } |
| |
| ; PR16130: clang produces incorrect code with loop/expression at -O2 |
| ; rdar:14036816 loop-unroll makes assumptions about undefined behavior |
| ; |
| ; The loop latch is assumed to exit after the first iteration because |
| ; of the induction variable's NSW flag. However, the loop latch's |
| ; equality test is skipped and the loop exits after the second |
| ; iteration via the early exit. So loop unrolling cannot assume that |
| ; the loop latch's exit count of zero is an upper bound on the number |
| ; of iterations. |
| define void @nsw_latch(i32* %a) nounwind { |
| ; CHECK-LABEL: @nsw_latch( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
| ; CHECK: for.body: |
| ; CHECK-NEXT: br label [[FOR_COND:%.*]] |
| ; CHECK: for.cond: |
| ; CHECK-NEXT: br i1 false, label [[RETURN:%.*]], label [[FOR_BODY_1:%.*]] |
| ; CHECK: for.body.1: |
| ; CHECK-NEXT: br i1 false, label [[FOR_COND_1:%.*]], label [[RETURN]] |
| ; CHECK: for.cond.1: |
| ; CHECK-NEXT: br label [[RETURN]] |
| ; CHECK: return: |
| ; CHECK-NEXT: [[B_03_LCSSA:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 8, [[FOR_BODY_1]] ], [ 0, [[FOR_COND_1]] ] |
| ; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY_1]] ], [ 0, [[FOR_COND_1]] ] |
| ; CHECK-NEXT: store i32 [[B_03_LCSSA]], i32* [[A:%.*]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| br label %for.body |
| |
| for.body: ; preds = %for.cond, %entry |
| %b.03 = phi i32 [ 0, %entry ], [ %add, %for.cond ] |
| %tobool = icmp eq i32 %b.03, 0 |
| %add = add nsw i32 %b.03, 8 |
| br i1 %tobool, label %for.cond, label %return |
| |
| for.cond: ; preds = %for.body |
| %cmp = icmp eq i32 %add, 13 |
| br i1 %cmp, label %return, label %for.body |
| |
| return: ; preds = %for.body, %for.cond |
| %b.03.lcssa = phi i32 [ %b.03, %for.body ], [ %b.03, %for.cond ] |
| %retval.0 = phi i32 [ 1, %for.body ], [ 0, %for.cond ] |
| store i32 %b.03.lcssa, i32* %a, align 4 |
| ret void |
| } |