blob: ec12ff2878743b918ed6dafa48ceba931f3f512a [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt < %s -passes=loop-interchange -loop-interchange-profitabilities=ignore -S | FileCheck %s
; The middle loop %for.j guards the inner loop %for.k: %for.k runs only when
; %i != 0 (the outer loop's IV), and its exit %k.next == %i is only well-defined
; under that guard. Interchanging %for.j and %for.k would run %for.k on every
; iteration and spin when %i == 0, so the pass must not interchange this nest.
;
; Pseudo code:
; for (i = 0; i < 3; i++)
; for (j = 0; j < 2; j++)
; if (i != 0) // guard on the outer IV
; for (k = 0; ; k++) { // terminates only when i != 0
; y[j][i][k] = x[i][k][j] + w[i][k][j];
; if (k + 1 == i) break;
; }
@x = global [3 x [3 x [3 x i32]]] zeroinitializer
@w = global [3 x [3 x [3 x i32]]] zeroinitializer
@y = global [3 x [3 x [3 x i32]]] zeroinitializer
define i32 @main() {
; CHECK-LABEL: define i32 @main() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[FOR_I_PREHEADER:.*]]
; CHECK: [[FOR_I_PREHEADER]]:
; CHECK-NEXT: br label %[[FOR_I:.*]]
; CHECK: [[FOR_I]]:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], %[[FOR_I_INC:.*]] ], [ 0, %[[FOR_I_PREHEADER]] ]
; CHECK-NEXT: [[I_IS_ZERO:%.*]] = icmp eq i32 [[I]], 0
; CHECK-NEXT: [[XBASE:%.*]] = getelementptr [9 x i32], ptr @x, i32 [[I]]
; CHECK-NEXT: [[WBASE:%.*]] = getelementptr [9 x i32], ptr @w, i32 [[I]]
; CHECK-NEXT: [[YBASE:%.*]] = getelementptr [3 x i32], ptr @y, i32 [[I]]
; CHECK-NEXT: br label %[[FOR_J_PREHEADER:.*]]
; CHECK: [[FOR_J_PREHEADER]]:
; CHECK-NEXT: br label %[[FOR_J:.*]]
; CHECK: [[FOR_J]]:
; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[TMP0:%.*]], %[[FOR_J_INC_SPLIT:.*]] ], [ 0, %[[FOR_J_PREHEADER]] ]
; CHECK-NEXT: br i1 [[I_IS_ZERO]], label %[[FOR_J_INC:.*]], label %[[FOR_K_PH:.*]]
; CHECK: [[FOR_K_PH]]:
; CHECK-NEXT: [[XP:%.*]] = getelementptr i32, ptr [[XBASE]], i32 [[J]]
; CHECK-NEXT: [[WP:%.*]] = getelementptr i32, ptr [[WBASE]], i32 [[J]]
; CHECK-NEXT: [[YP:%.*]] = getelementptr [9 x i32], ptr [[YBASE]], i32 [[J]]
; CHECK-NEXT: br label %[[FOR_K:.*]]
; CHECK: [[FOR_K]]:
; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, %[[FOR_K_PH]] ], [ [[K_NEXT:%.*]], %[[FOR_K]] ]
; CHECK-NEXT: [[XK:%.*]] = getelementptr [3 x i32], ptr [[XP]], i32 [[K]]
; CHECK-NEXT: [[XV:%.*]] = load i32, ptr [[XK]], align 4
; CHECK-NEXT: [[WK:%.*]] = getelementptr [3 x i32], ptr [[WP]], i32 [[K]]
; CHECK-NEXT: [[WV:%.*]] = load i32, ptr [[WK]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[XV]], [[WV]]
; CHECK-NEXT: [[YK:%.*]] = getelementptr i32, ptr [[YP]], i32 [[K]]
; CHECK-NEXT: store i32 [[ADD]], ptr [[YK]], align 4
; CHECK-NEXT: [[K_NEXT]] = add i32 [[K]], 1
; CHECK-NEXT: [[K_DONE:%.*]] = icmp eq i32 [[K_NEXT]], [[I]]
; CHECK-NEXT: br i1 [[K_DONE]], label %[[FOR_J_INC_LOOPEXIT:.*]], label %[[FOR_K]]
; CHECK: [[FOR_J_INC_LOOPEXIT]]:
; CHECK-NEXT: br label %[[FOR_J_INC]]
; CHECK: [[FOR_J_INC]]:
; CHECK-NEXT: [[J_NEXT:%.*]] = add i32 [[J]], 1
; CHECK-NEXT: [[J_CMP:%.*]] = icmp eq i32 [[J]], 0
; CHECK-NEXT: br label %[[FOR_J_INC_SPLIT]]
; CHECK: [[FOR_J_INC_SPLIT]]:
; CHECK-NEXT: [[TMP0]] = add i32 [[J]], 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[J]], 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_J]], label %[[FOR_I_INC]]
; CHECK: [[FOR_I_INC]]:
; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
; CHECK-NEXT: [[I_DONE:%.*]] = icmp eq i32 [[I_NEXT]], 3
; CHECK-NEXT: br i1 [[I_DONE]], label %[[EXIT:.*]], label %[[FOR_I]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i32 0
;
entry:
br label %for.i
for.i:
%i = phi i32 [ %i.next, %for.i.inc ], [ 0, %entry ]
%i.is.zero = icmp eq i32 %i, 0
%xbase = getelementptr [9 x i32], ptr @x, i32 %i
%wbase = getelementptr [9 x i32], ptr @w, i32 %i
%ybase = getelementptr [3 x i32], ptr @y, i32 %i
br label %for.j
for.j:
%j = phi i32 [ %j.next, %for.j.inc ], [ 0, %for.i ]
br i1 %i.is.zero, label %for.j.inc, label %for.k.ph
for.k.ph:
%xp = getelementptr i32, ptr %xbase, i32 %j
%wp = getelementptr i32, ptr %wbase, i32 %j
%yp = getelementptr [9 x i32], ptr %ybase, i32 %j
br label %for.k
for.k:
%k = phi i32 [ 0, %for.k.ph ], [ %k.next, %for.k ]
%xk = getelementptr [3 x i32], ptr %xp, i32 %k
%xv = load i32, ptr %xk, align 4
%wk = getelementptr [3 x i32], ptr %wp, i32 %k
%wv = load i32, ptr %wk, align 4
%add = add i32 %xv, %wv
%yk = getelementptr i32, ptr %yp, i32 %k
store i32 %add, ptr %yk, align 4
%k.next = add i32 %k, 1
%k.done = icmp eq i32 %k.next, %i
br i1 %k.done, label %for.j.inc, label %for.k
for.j.inc:
%j.next = add i32 %j, 1
%j.cmp = icmp eq i32 %j, 0
br i1 %j.cmp, label %for.j, label %for.i.inc
for.i.inc:
%i.next = add i32 %i, 1
%i.done = icmp eq i32 %i.next, 3
br i1 %i.done, label %exit, label %for.i
exit:
ret i32 0
}