| ; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -pass-remarks='loop-interchange' -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -disable-output -S |
| ; RUN: FileCheck --input-file=%t %s |
| |
| target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" |
| |
| ; This is a reduced test case for the example in "large-nested-6d.ll". For a |
| ; full description of the purpose this test and its complexities, see that file. |
| ; |
| ; This reproducer contains the perfectly nested sub part of that bigger loop |
| ; nest: |
| ; |
| ; for i=1 to NX |
| ; for j=1 to NY |
| ; for IL=1 to NX |
| ; load GlobC(i,IL,L) |
| ; load GlobG(i,IL,L) |
| ; load GlobE(i,IL,L) |
| ; load GlobI(i,IL,L) |
| ; for JL=1 to NY |
| ; load GlobD(j,JL,M) |
| ; load GlobH(j,JL,M) |
| ; load GlobF(j,JL,M) |
| ; load GlobJ(j,JL,M) |
| ; store GlobL(NY*i+j,NY*IL+JL) |
| ; End |
| ; End |
| ; End |
| ; End |
| ; |
| ; This reproducer is useful to focus on only on the 2nd challenge: the data |
| ; dependence analysis problem, and not worry about the rest of loop nest |
| ; structure. |
| ; |
| ; TODO: |
| ; |
| ; If loop-interchange is able to deal with imperfectly nested loops, this |
| ; test is redundant and we only need to keep "large-nested-6d.ll". |
| ; |
| ; CHECK: --- !Analysis |
| ; CHECK-NEXT: Pass: loop-interchange |
| ; CHECK-NEXT: Name: Dependence |
| ; CHECK-NEXT: Function: test |
| ; CHECK-NEXT: Args: |
| ; CHECK-NEXT: - String: Computed dependence info, invoking the transform. |
| ; CHECK-NEXT: ... |
| ; CHECK-NEXT: --- !Missed |
| ; CHECK-NEXT: Pass: loop-interchange |
| ; CHECK-NEXT: Name: Dependence |
| ; CHECK-NEXT: Function: test |
| ; CHECK-NEXT: Args: |
| ; CHECK-NEXT: - String: All loops have dependencies in all directions. |
| ; CHECK-NEXT: ... |
| |
| @GlobC = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| @GlobD = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| @GlobE = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| @GlobF = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| @GlobG = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| @GlobH = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| @GlobI = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| @GlobJ = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| @GlobL = local_unnamed_addr global [1000 x [1000 x double]] zeroinitializer |
| |
| define void @test(ptr noalias readonly captures(none) %0, ptr noalias readonly captures(none) %1, ptr noalias captures(none) %2, ptr noalias captures(none) %3, ptr noalias readonly captures(none) %4, ptr noalias readonly captures(none) %5, ptr noalias readonly captures(none) %6, ptr noalias readonly captures(none) %7, ptr noalias readonly captures(none) %8, ptr noalias readonly captures(none) %9) { |
| entry: |
| %17 = load i32, ptr %7, align 4 |
| %18 = sext i32 %17 to i64 |
| %20 = load i32, ptr %8, align 4 |
| %21 = sext i32 %20 to i64 |
| %cmp1 = icmp sgt i32 %17, 0 |
| %cmp2 = icmp sgt i32 %20, 0 |
| %cond = and i1 %cmp1, %cmp2 |
| br i1 %cond, label %preheader, label %exit |
| |
| preheader: |
| br label %i.header |
| |
| i.header: |
| %i = phi i64 [ %i.next, %i.latch ], [ 1, %preheader ] |
| %92 = add nsw i64 -55, %i |
| %93 = add nsw i64 %i, -1 |
| %94 = mul nsw i64 %93, %21 |
| %invariant.gep = getelementptr double, ptr @GlobL, i64 %94 |
| br label %j.header |
| |
| j.header: |
| %j = phi i64 [ %j.next, %j.latch ], [ 1, %i.header ] |
| %95 = add nsw i64 -55, %j |
| %gep358 = getelementptr double, ptr %invariant.gep, i64 %j |
| br label %IL.header |
| |
| IL.header: |
| %IL = phi i64 [ %IL.next, %IL.latch ], [ 1, %j.header ] |
| %96 = mul nuw nsw i64 %IL, 54 |
| %97 = add nsw i64 %92, %96 |
| %98 = getelementptr double, ptr @GlobC, i64 %97 |
| %99 = load double, ptr %98, align 8 |
| %100 = getelementptr double, ptr @GlobG, i64 %97 |
| %101 = load double, ptr %100, align 8 |
| %102 = getelementptr double, ptr @GlobE, i64 %97 |
| %103 = load double, ptr %102, align 8 |
| %104 = getelementptr double, ptr @GlobI, i64 %97 |
| %105 = load double, ptr %104, align 8 |
| %106 = add nsw i64 %IL, -1 |
| %107 = mul nsw i64 %106, %21 |
| br label %JL.body |
| |
| JL.body: |
| %JL = phi i64 [ %JL.next, %JL.body ], [ 1, %IL.header ] |
| %109 = mul nuw nsw i64 %JL, 54 |
| %110 = add nsw i64 %95, %109 |
| %111 = getelementptr double, ptr @GlobD, i64 %110 |
| %112 = load double, ptr %111, align 8 |
| %113 = fmul fast double %112, %99 |
| %114 = getelementptr double, ptr @GlobH, i64 %110 |
| %115 = load double, ptr %114, align 8 |
| %116 = fmul fast double %115, %101 |
| %117 = fadd fast double %116, %113 |
| %118 = getelementptr double, ptr @GlobF, i64 %110 |
| %119 = load double, ptr %118, align 8 |
| %120 = fmul fast double %119, %103 |
| %121 = fadd fast double %117, %120 |
| %122 = getelementptr double, ptr @GlobJ, i64 %110 |
| %123 = load double, ptr %122, align 8 |
| %124 = fmul fast double %123, %105 |
| %125 = fadd fast double %121, %124 |
| %126 = add nsw i64 %JL, %107 |
| %.idx247.us.us.us.us.us.us = mul nsw i64 %126, 8000 |
| %gep.us.us.us.us.us.us = getelementptr i8, ptr %gep358, i64 %.idx247.us.us.us.us.us.us |
| %127 = getelementptr i8, ptr %gep.us.us.us.us.us.us, i64 -8008 |
| store double %125, ptr %127, align 8 |
| %JL.next = add nuw nsw i64 %JL, 1 |
| %exitcond.not = icmp eq i64 %JL, %21 |
| br i1 %exitcond.not, label %IL.latch, label %JL.body |
| |
| IL.latch: |
| %IL.next = add nuw nsw i64 %IL, 1 |
| %exitcond320.not = icmp eq i64 %IL, %18 |
| br i1 %exitcond320.not, label %j.latch, label %IL.header |
| |
| j.latch: |
| %j.next = add nuw nsw i64 %j, 1 |
| %exitcond324.not = icmp eq i64 %j, %21 |
| br i1 %exitcond324.not, label %i.latch, label %j.header |
| |
| i.latch: |
| %i.next = add nuw nsw i64 %i, 1 |
| %exitcond328.not = icmp eq i64 %i, %18 |
| br i1 %exitcond328.not, label %exit, label %i.header |
| |
| exit: |
| ret void |
| } |
| |
| |