| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
| ; NOTE: Support the reduction in the inner loop. |
| ; RUN: opt < %s -passes="loop-interchange" -loop-interchange-reduction-to-mem -loop-interchange-profitabilities=ignore -S | FileCheck %s |
| |
| ; for (int i = 0; i < n; i++) { |
| ; r = 0; |
| ; for (int j = 0; j < n; j++) |
| ; r = r + a[j][i] * b[j][i]; |
| ; s[i] = r; |
| ; } |
| |
| define void @func(ptr noalias readonly %a, ptr noalias readonly %b, ptr noalias writeonly %s, i64 %n) { |
| ; CHECK-LABEL: define void @func( |
| ; CHECK-SAME: ptr noalias readonly [[A:%.*]], ptr noalias readonly [[B:%.*]], ptr noalias writeonly [[S:%.*]], i64 [[N:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[N]], 0 |
| ; CHECK-NEXT: br i1 [[CMP]], label %[[INNERLOOP_PREHEADER:.*]], label %[[EXIT:.*]] |
| ; CHECK: [[OUTERLOOPHEADER_PREHEADER:.*]]: |
| ; CHECK-NEXT: br label %[[OUTERLOOP_HEADER:.*]] |
| ; CHECK: [[OUTERLOOP_HEADER]]: |
| ; CHECK-NEXT: [[INDEX_I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[OUTERLOOP_LATCH:.*]] ], [ 0, %[[OUTERLOOPHEADER_PREHEADER]] ] |
| ; CHECK-NEXT: [[ADDR_S:%.*]] = getelementptr inbounds nuw double, ptr [[S]], i64 [[INDEX_I]] |
| ; CHECK-NEXT: [[ADDR_A:%.*]] = getelementptr inbounds nuw double, ptr [[A]], i64 [[INDEX_I]] |
| ; CHECK-NEXT: [[ADDR_B:%.*]] = getelementptr inbounds nuw double, ptr [[B]], i64 [[INDEX_I]] |
| ; CHECK-NEXT: br label %[[INNERLOOP_SPLIT1:.*]] |
| ; CHECK: [[INNERLOOP_PREHEADER]]: |
| ; CHECK-NEXT: br label %[[INNERLOOP:.*]] |
| ; CHECK: [[INNERLOOP]]: |
| ; CHECK-NEXT: [[INDEX_J:%.*]] = phi i64 [ [[J_NEXT:%.*]], %[[INNERLOOP_SPLIT:.*]] ], [ 0, %[[INNERLOOP_PREHEADER]] ] |
| ; CHECK-NEXT: [[DEAD_REDUCTION:%.*]] = phi double [ [[ADD_LCSSA:%.*]], %[[INNERLOOP_SPLIT]] ], [ 0.000000e+00, %[[INNERLOOP_PREHEADER]] ] |
| ; CHECK-NEXT: [[FIRSTITER:%.*]] = phi i1 [ false, %[[INNERLOOP_SPLIT]] ], [ true, %[[INNERLOOP_PREHEADER]] ] |
| ; CHECK-NEXT: br label %[[OUTERLOOPHEADER_PREHEADER]] |
| ; CHECK: [[INNERLOOP_SPLIT1]]: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ADDR_S]], align 8 |
| ; CHECK-NEXT: [[NEW_VAR:%.*]] = select i1 [[FIRSTITER]], double 0.000000e+00, double [[TMP0]] |
| ; CHECK-NEXT: [[ADDR_A_J_I:%.*]] = getelementptr inbounds nuw double, ptr [[ADDR_A]], i64 [[INDEX_J]] |
| ; CHECK-NEXT: [[A_J_I:%.*]] = load double, ptr [[ADDR_A_J_I]], align 8 |
| ; CHECK-NEXT: [[ADDR_B_J_I:%.*]] = getelementptr inbounds nuw double, ptr [[ADDR_B]], i64 [[INDEX_J]] |
| ; CHECK-NEXT: [[B_J_I:%.*]] = load double, ptr [[ADDR_B_J_I]], align 8 |
| ; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[B_J_I]], [[A_J_I]] |
| ; CHECK-NEXT: [[ADD:%.*]] = fadd fast double [[MUL]], [[NEW_VAR]] |
| ; CHECK-NEXT: store double [[ADD]], ptr [[ADDR_S]], align 8 |
| ; CHECK-NEXT: [[DEAD_J_NEXT:%.*]] = add nuw nsw i64 [[INDEX_J]], 1 |
| ; CHECK-NEXT: [[DEAD_COND:%.*]] = icmp eq i64 [[DEAD_J_NEXT]], [[N]] |
| ; CHECK-NEXT: br label %[[OUTERLOOP_LATCH]] |
| ; CHECK: [[INNERLOOP_SPLIT]]: |
| ; CHECK-NEXT: [[ADD_LCSSA]] = phi double [ [[ADD]], %[[OUTERLOOP_LATCH]] ] |
| ; CHECK-NEXT: [[DEAD_LCSSA:%.*]] = phi double [ [[ADD]], %[[OUTERLOOP_LATCH]] ] |
| ; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[INDEX_J]], 1 |
| ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i64 [[J_NEXT]], [[N]] |
| ; CHECK-NEXT: br i1 [[CMP1]], label %[[EXIT_LOOPEXIT:.*]], label %[[INNERLOOP]] |
| ; CHECK: [[OUTERLOOP_LATCH]]: |
| ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[INDEX_I]], 1 |
| ; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] |
| ; CHECK-NEXT: br i1 [[CMP2]], label %[[INNERLOOP_SPLIT]], label %[[OUTERLOOP_HEADER]] |
| ; CHECK: [[EXIT_LOOPEXIT]]: |
| ; CHECK-NEXT: br label %[[EXIT]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %cmp = icmp sgt i64 %n, 0 |
| br i1 %cmp, label %outerloop_header, label %exit |
| |
| outerloop_header: |
| %index_i = phi i64 [ 0, %entry ], [ %index_i.next, %outerloop_latch ] |
| %addr_s = getelementptr inbounds nuw double, ptr %s, i64 %index_i |
| %invariant.gep.us = getelementptr inbounds nuw double, ptr %a, i64 %index_i |
| %invariant.gep32.us = getelementptr inbounds nuw double, ptr %b, i64 %index_i |
| br label %innerloop |
| |
| innerloop: |
| %index_j = phi i64 [ 0, %outerloop_header ], [ %index_j.next, %innerloop ] |
| %reduction = phi double [ 0.000000e+00, %outerloop_header ], [ %add, %innerloop ] |
| %addr_a_j_i = getelementptr inbounds nuw double, ptr %invariant.gep.us, i64 %index_j |
| %0 = load double, ptr %addr_a_j_i, align 8 |
| %addr_b_j_i = getelementptr inbounds nuw double, ptr %invariant.gep32.us, i64 %index_j |
| %1 = load double, ptr %addr_b_j_i, align 8 |
| %mul = fmul fast double %1, %0 |
| %add = fadd fast double %mul, %reduction |
| %index_j.next = add nuw nsw i64 %index_j, 1 |
| %cond1 = icmp eq i64 %index_j.next, %n |
| br i1 %cond1, label %outerloop_latch, label %innerloop |
| |
| outerloop_latch: |
| %lcssa = phi double [ %add, %innerloop ] |
| store double %lcssa, ptr %addr_s, align 8 |
| %index_i.next = add nuw nsw i64 %index_i, 1 |
| %cond2 = icmp eq i64 %index_i.next, %n |
| br i1 %cond2, label %exit, label %outerloop_header |
| |
| exit: |
| ret void |
| } |