| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S |
| ; RUN: FileCheck --input-file=%t %s |
| |
| @b = external dso_local global [5 x i32], align 16 |
| |
| ;; Not profitable to interchange, because the access is invariant to j loop. |
| ;; |
| ;; for(int i=0;i<4;i++) { |
| ;; for(int j=1;j<4;j++) { |
| ;; b[i] = .... |
| ;; } |
| ;; } |
| |
| ; CHECK: --- !Missed |
| ; CHECK-NEXT: Pass: loop-interchange |
| ; CHECK-NEXT: Name: InterchangeNotProfitable |
| ; CHECK-NEXT: Function: test1 |
| ; CHECK-NEXT: Args: |
| ; CHECK-NEXT: - String: Interchanging loops is not considered to improve cache locality nor vectorization. |
| |
| define void @test1() { |
| entry: |
| br label %for.body |
| |
| for.body: ; preds = %for.inc3, %entry |
| %inc41 = phi i32 [ %inc4, %for.inc3 ], [ undef, %entry ] |
| br label %for.body2 |
| |
| for.body2: ; preds = %for.inc, %for.body |
| %lsr.iv = phi i32 [ %lsr.iv.next, %for.inc ], [ 1, %for.body ] |
| br label %for.inc |
| |
| for.inc: ; preds = %for.body2 |
| %idxprom = sext i32 %inc41 to i64 |
| %arrayidx = getelementptr inbounds [5 x i32], ptr @b, i64 0, i64 %idxprom |
| %0 = load i32, ptr %arrayidx, align 4 |
| store i32 undef, ptr %arrayidx, align 4 |
| %cmp = icmp slt i32 %lsr.iv, 4 |
| %lsr.iv.next = add nuw nsw i32 %lsr.iv, 1 |
| br i1 %cmp, label %for.body2, label %for.cond1.for.end_crit_edge |
| |
| for.cond1.for.end_crit_edge: ; preds = %for.inc |
| br label %for.inc3 |
| |
| for.inc3: ; preds = %for.cond1.for.end_crit_edge |
| %inc4 = add nsw i32 %inc41, 1 |
| br i1 false, label %for.body, label %for.cond.for.end5_crit_edge |
| |
| for.cond.for.end5_crit_edge: ; preds = %for.inc3 |
| ret void |
| } |
| |
| |
| ; CHECK: --- !Missed |
| ; CHECK-NEXT: Pass: loop-interchange |
| ; CHECK-NEXT: Name: InterchangeNotProfitable |
| ; CHECK-NEXT: Function: test2 |
| ; CHECK-NEXT: Args: |
| ; CHECK-NEXT: - String: Interchanging loops is not considered to improve cache locality nor vectorization. |
| |
| define void @test2() { |
| entry: |
| br label %for.body |
| |
| for.body: ; preds = %for.inc3, %entry |
| %inc41 = phi i32 [ %inc4, %for.inc3 ], [ undef, %entry ] |
| br label %for.body2 |
| |
| for.body2: ; preds = %for.inc, %for.body |
| %lsr.iv = phi i32 [ %lsr.iv.next, %for.inc ], [ 1, %for.body ] |
| br label %for.inc |
| |
| for.inc: ; preds = %for.body2 |
| %idxprom = sext i32 %inc41 to i64 |
| %arrayidx = getelementptr inbounds [5 x i32], ptr @b, i64 0, i64 %idxprom |
| %0 = load i32, ptr %arrayidx, align 4 |
| %cmp = icmp slt i32 %lsr.iv, 4 |
| %cmp.zext = zext i1 %cmp to i32 |
| store i32 %cmp.zext, ptr %arrayidx, align 4 |
| %lsr.iv.next = add nuw nsw i32 %lsr.iv, 1 |
| br i1 %cmp, label %for.body2, label %for.cond1.for.end_crit_edge |
| |
| for.cond1.for.end_crit_edge: ; preds = %for.inc |
| br label %for.inc3 |
| |
| for.inc3: ; preds = %for.cond1.for.end_crit_edge |
| %inc4 = add nsw i32 %inc41, 1 |
| br i1 false, label %for.body, label %for.cond.for.end5_crit_edge |
| |
| for.cond.for.end5_crit_edge: ; preds = %for.inc3 |
| ret void |
| } |