| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt < %s -passes=loop-interchange -loop-interchange-profitabilities=ignore -S | FileCheck %s |
| |
| ; The middle loop %for.j guards the inner loop %for.k: %for.k runs only when |
| ; %i != 0 (the outer loop's IV), and its exit %k.next == %i is only well-defined |
| ; under that guard. Interchanging %for.j and %for.k would run %for.k on every |
| ; iteration and spin when %i == 0, so the pass must not interchange this nest. |
| ; |
| ; Pseudo code: |
| ; for (i = 0; i < 3; i++) |
| ; for (j = 0; j < 2; j++) |
| ; if (i != 0) // guard on the outer IV |
| ; for (k = 0; ; k++) { // terminates only when i != 0 |
| ; y[j][i][k] = x[i][k][j] + w[i][k][j]; |
| ; if (k + 1 == i) break; |
| ; } |
| |
| @x = global [3 x [3 x [3 x i32]]] zeroinitializer |
| @w = global [3 x [3 x [3 x i32]]] zeroinitializer |
| @y = global [3 x [3 x [3 x i32]]] zeroinitializer |
| |
| define i32 @main() { |
| ; CHECK-LABEL: define i32 @main() { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br label %[[FOR_I_PREHEADER:.*]] |
| ; CHECK: [[FOR_I_PREHEADER]]: |
| ; CHECK-NEXT: br label %[[FOR_I:.*]] |
| ; CHECK: [[FOR_I]]: |
| ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], %[[FOR_I_INC:.*]] ], [ 0, %[[FOR_I_PREHEADER]] ] |
| ; CHECK-NEXT: [[I_IS_ZERO:%.*]] = icmp eq i32 [[I]], 0 |
| ; CHECK-NEXT: [[XBASE:%.*]] = getelementptr [9 x i32], ptr @x, i32 [[I]] |
| ; CHECK-NEXT: [[WBASE:%.*]] = getelementptr [9 x i32], ptr @w, i32 [[I]] |
| ; CHECK-NEXT: [[YBASE:%.*]] = getelementptr [3 x i32], ptr @y, i32 [[I]] |
| ; CHECK-NEXT: br label %[[FOR_J_PREHEADER:.*]] |
| ; CHECK: [[FOR_J_PREHEADER]]: |
| ; CHECK-NEXT: br label %[[FOR_J:.*]] |
| ; CHECK: [[FOR_J]]: |
| ; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[TMP0:%.*]], %[[FOR_J_INC_SPLIT:.*]] ], [ 0, %[[FOR_J_PREHEADER]] ] |
| ; CHECK-NEXT: br i1 [[I_IS_ZERO]], label %[[FOR_J_INC:.*]], label %[[FOR_K_PH:.*]] |
| ; CHECK: [[FOR_K_PH]]: |
| ; CHECK-NEXT: [[XP:%.*]] = getelementptr i32, ptr [[XBASE]], i32 [[J]] |
| ; CHECK-NEXT: [[WP:%.*]] = getelementptr i32, ptr [[WBASE]], i32 [[J]] |
| ; CHECK-NEXT: [[YP:%.*]] = getelementptr [9 x i32], ptr [[YBASE]], i32 [[J]] |
| ; CHECK-NEXT: br label %[[FOR_K:.*]] |
| ; CHECK: [[FOR_K]]: |
| ; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, %[[FOR_K_PH]] ], [ [[K_NEXT:%.*]], %[[FOR_K]] ] |
| ; CHECK-NEXT: [[XK:%.*]] = getelementptr [3 x i32], ptr [[XP]], i32 [[K]] |
| ; CHECK-NEXT: [[XV:%.*]] = load i32, ptr [[XK]], align 4 |
| ; CHECK-NEXT: [[WK:%.*]] = getelementptr [3 x i32], ptr [[WP]], i32 [[K]] |
| ; CHECK-NEXT: [[WV:%.*]] = load i32, ptr [[WK]], align 4 |
| ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[XV]], [[WV]] |
| ; CHECK-NEXT: [[YK:%.*]] = getelementptr i32, ptr [[YP]], i32 [[K]] |
| ; CHECK-NEXT: store i32 [[ADD]], ptr [[YK]], align 4 |
| ; CHECK-NEXT: [[K_NEXT]] = add i32 [[K]], 1 |
| ; CHECK-NEXT: [[K_DONE:%.*]] = icmp eq i32 [[K_NEXT]], [[I]] |
| ; CHECK-NEXT: br i1 [[K_DONE]], label %[[FOR_J_INC_LOOPEXIT:.*]], label %[[FOR_K]] |
| ; CHECK: [[FOR_J_INC_LOOPEXIT]]: |
| ; CHECK-NEXT: br label %[[FOR_J_INC]] |
| ; CHECK: [[FOR_J_INC]]: |
| ; CHECK-NEXT: [[J_NEXT:%.*]] = add i32 [[J]], 1 |
| ; CHECK-NEXT: [[J_CMP:%.*]] = icmp eq i32 [[J]], 0 |
| ; CHECK-NEXT: br label %[[FOR_J_INC_SPLIT]] |
| ; CHECK: [[FOR_J_INC_SPLIT]]: |
| ; CHECK-NEXT: [[TMP0]] = add i32 [[J]], 1 |
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[J]], 0 |
| ; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_J]], label %[[FOR_I_INC]] |
| ; CHECK: [[FOR_I_INC]]: |
| ; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 |
| ; CHECK-NEXT: [[I_DONE:%.*]] = icmp eq i32 [[I_NEXT]], 3 |
| ; CHECK-NEXT: br i1 [[I_DONE]], label %[[EXIT:.*]], label %[[FOR_I]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| entry: |
| br label %for.i |
| |
| for.i: |
| %i = phi i32 [ %i.next, %for.i.inc ], [ 0, %entry ] |
| %i.is.zero = icmp eq i32 %i, 0 |
| %xbase = getelementptr [9 x i32], ptr @x, i32 %i |
| %wbase = getelementptr [9 x i32], ptr @w, i32 %i |
| %ybase = getelementptr [3 x i32], ptr @y, i32 %i |
| br label %for.j |
| |
| for.j: |
| %j = phi i32 [ %j.next, %for.j.inc ], [ 0, %for.i ] |
| br i1 %i.is.zero, label %for.j.inc, label %for.k.ph |
| |
| for.k.ph: |
| %xp = getelementptr i32, ptr %xbase, i32 %j |
| %wp = getelementptr i32, ptr %wbase, i32 %j |
| %yp = getelementptr [9 x i32], ptr %ybase, i32 %j |
| br label %for.k |
| |
| for.k: |
| %k = phi i32 [ 0, %for.k.ph ], [ %k.next, %for.k ] |
| %xk = getelementptr [3 x i32], ptr %xp, i32 %k |
| %xv = load i32, ptr %xk, align 4 |
| %wk = getelementptr [3 x i32], ptr %wp, i32 %k |
| %wv = load i32, ptr %wk, align 4 |
| %add = add i32 %xv, %wv |
| %yk = getelementptr i32, ptr %yp, i32 %k |
| store i32 %add, ptr %yk, align 4 |
| %k.next = add i32 %k, 1 |
| %k.done = icmp eq i32 %k.next, %i |
| br i1 %k.done, label %for.j.inc, label %for.k |
| |
| for.j.inc: |
| %j.next = add i32 %j, 1 |
| %j.cmp = icmp eq i32 %j, 0 |
| br i1 %j.cmp, label %for.j, label %for.i.inc |
| |
| for.i.inc: |
| %i.next = add i32 %i, 1 |
| %i.done = icmp eq i32 %i.next, 3 |
| br i1 %i.done, label %exit, label %for.i |
| |
| exit: |
| ret i32 0 |
| } |