| ; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -pass-remarks='loop-interchange' -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -disable-output -S |
| ; RUN: FileCheck --input-file=%t %s |
| |
| target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" |
| |
| ; The IR test case below is a full and representative motivating example |
| ; for loop-interchange containing a more complex loop nest structure that |
| ; corresponds to this pseudo-code: |
| ; |
| ; for L=1 to NX |
| ; for M=1 to NY |
| ; for i=1 to NX |
| ; for j=1 to NY |
| ; for IL=1 to NX |
| ; load GlobC(i,IL,L) |
| ; load GlobG(i,IL,L) |
| ; load GlobE(i,IL,L) |
| ; load GlobI(i,IL,L) |
| ; for JL=1 to NY |
| ; load GlobD(j,JL,M) |
| ; load GlobH(j,JL,M) |
| ; load GlobF(j,JL,M) |
| ; load GlobJ(j,JL,M) |
| ; store GlobL(NY*i+j,NY*IL+JL) |
| ; End |
| ; End |
| ; End |
| ; End |
| ; // Stmt 2 |
| ; // Stmt 3 |
| ; // Stmt 4 |
| ; End |
| ; End |
| ; |
| ; It is important to note here that this comes from Fortran code, which uses a |
| ; column-major data layout, so loops 'j' and 'JL' should be interchanged. I.e. |
| ; in the IR below, basic block JL.body is part of the loop that we would like |
| ; like to see interchanged as there are 4 loads and 1 store that are |
| ; unit-strided over 'j', so making 'j' loop the innermost is preferable here. |
| ; |
| ; TODO: |
| ; |
| ; There are a few issues that prevent loop-interchange to perform its |
| ; transformation on this test case: |
| ; |
| ; 1. LoopNest checks: the first check that is perform is whether loop 'L.header' |
| ; and 'M.header' are perfectly nested, which they are not. It needs to be |
| ; investigate why the whole loop nest rooted under L is rejected as a |
| ; candidate. |
| ; |
| ; 2. DependenceAnalysis: it finds this dependency: |
| ; |
| ; Found output dependency between Src and Dst |
| ; Src: store double %46, ptr %48, align 8 |
| ; Dst: store double %46, ptr %48, align 8 |
| ; |
| ; |
| ; CHECK: --- !Missed |
| ; CHECK-NEXT: Pass: loop-interchange |
| ; CHECK-NEXT: Name: UnsupportedLoopNestDepth |
| ; CHECK-NEXT: Function: test |
| ; CHECK-NEXT: Args: |
| ; CHECK-NEXT: - String: 'Unsupported depth of loop nest, the supported range is [' |
| ; CHECK-NEXT: - String: '2' |
| ; CHECK-NEXT: - String: ', ' |
| ; CHECK-NEXT: - String: '10' |
| ; CHECK-NEXT: - String: "].\n" |
| ; CHECK-NEXT: ... |
| ; CHECK-NEXT: --- !Analysis |
| ; CHECK-NEXT: Pass: loop-interchange |
| ; CHECK-NEXT: Name: Dependence |
| ; CHECK-NEXT: Function: test |
| ; CHECK-NEXT: Args: |
| ; CHECK-NEXT: - String: Computed dependence info, invoking the transform. |
| ; CHECK-NEXT: ... |
| ; CHECK-NEXT: --- !Missed |
| ; CHECK-NEXT: Pass: loop-interchange |
| ; CHECK-NEXT: Name: Dependence |
| ; CHECK-NEXT: Function: test |
| ; CHECK-NEXT: Args: |
| ; CHECK-NEXT: - String: Cannot interchange loops due to dependences. |
| ; CHECK-NEXT: ... |
| ; CHECK-NEXT: --- !Missed |
| ; CHECK-NEXT: Pass: loop-interchange |
| ; CHECK-NEXT: Name: UnsupportedLoopNestDepth |
| ; CHECK-NEXT: Function: test |
| ; CHECK-NEXT: Args: |
| ; CHECK-NEXT: - String: 'Unsupported depth of loop nest, the supported range is [' |
| ; CHECK-NEXT: - String: '2' |
| ; CHECK-NEXT: - String: ', ' |
| ; CHECK-NEXT: - String: '10' |
| ; CHECK-NEXT: - String: "].\n" |
| ; CHECK-NEXT: ... |
| ; CHECK-NEXT: --- !Analysis |
| ; CHECK-NEXT: Pass: loop-interchange |
| ; CHECK-NEXT: Name: Dependence |
| ; CHECK-NEXT: Function: test |
| ; CHECK-NEXT: Args: |
| ; CHECK-NEXT: - String: Computed dependence info, invoking the transform. |
| ; CHECK-NEXT: ... |
| ; CHECK-NEXT: --- !Missed |
| ; CHECK-NEXT: Pass: loop-interchange |
| ; CHECK-NEXT: Name: NotTightlyNested |
| ; CHECK-NEXT: Function: test |
| ; CHECK-NEXT: Args: |
| ; CHECK-NEXT: - String: Cannot interchange loops because they are not tightly nested. |
| ; CHECK-NEXT: ... |
| ; CHECK-NEXT: --- !Missed |
| ; CHECK-NEXT: Pass: loop-interchange |
| ; CHECK-NEXT: Name: Dependence |
| ; CHECK-NEXT: Function: test |
| ; CHECK-NEXT: Args: |
| ; CHECK-NEXT: - String: Cannot interchange loops due to dependences. |
| ; CHECK-NEXT: ... |
| ; CHECK-NEXT: --- !Analysis |
| ; CHECK-NEXT: Pass: loop-interchange |
| ; CHECK-NEXT: Name: Dependence |
| ; CHECK-NEXT: Function: test |
| ; CHECK-NEXT: Args: |
| ; CHECK-NEXT: - String: Computed dependence info, invoking the transform. |
| ; CHECK-NEXT: ... |
| ; CHECK-NEXT: --- !Missed |
| ; CHECK-NEXT: Pass: loop-interchange |
| ; CHECK-NEXT: Name: Dependence |
| ; CHECK-NEXT: Function: test |
| ; CHECK-NEXT: Args: |
| ; CHECK-NEXT: - String: All loops have dependencies in all directions. |
| ; CHECK-NEXT: ... |
| |
| @GlobC = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| @GlobD = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| @GlobE = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| @GlobF = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| @GlobG = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| @GlobH = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| @GlobI = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| @GlobJ = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer |
| @GlobK = local_unnamed_addr global [1000 x [1000 x double]] zeroinitializer |
| @GlobL = local_unnamed_addr global [1000 x [1000 x double]] zeroinitializer |
| @GlobM = local_unnamed_addr global [2500 x double] zeroinitializer |
| |
| define void @test(ptr noalias readonly captures(none) %0, ptr noalias readonly captures(none) %1, ptr noalias captures(none) %2, ptr noalias captures(none) %3, ptr noalias readonly captures(none) %4, ptr noalias readonly captures(none) %5, ptr noalias readonly captures(none) %6, ptr noalias readonly captures(none) %7, ptr noalias readonly captures(none) %8, ptr noalias readonly captures(none) %9) { |
| %11 = alloca [2500 x double], align 8 |
| %12 = load i32, ptr %4, align 4 |
| %13 = tail call i32 @llvm.smax.i32(i32 %12, i32 0) |
| %14 = zext nneg i32 %13 to i64 |
| %15 = load i32, ptr %9, align 4 |
| %.not = icmp eq i32 %15, 1 |
| br i1 %.not, label %171, label %16 |
| |
| 16: |
| %17 = load i32, ptr %7, align 4 |
| %18 = sext i32 %17 to i64 |
| %19 = icmp sgt i32 %17, 0 |
| br i1 %19, label %.lr.ph286, label %._crit_edge287 |
| |
| .lr.ph286: |
| %20 = load i32, ptr %8, align 4 |
| %21 = sext i32 %20 to i64 |
| %22 = icmp sgt i32 %20, 0 |
| br i1 %22, label %preheader.L, label %._crit_edge287 |
| |
| preheader.L: |
| %23 = load i32, ptr %5, align 4 |
| %24 = tail call i32 @llvm.smax.i32(i32 %23, i32 0) |
| %25 = zext nneg i32 %24 to i64 |
| %26 = load i32, ptr %6, align 4 |
| %27 = sext i32 %26 to i64 |
| %28 = getelementptr double, ptr %1, i64 %27 |
| %.not241270.us = icmp slt i32 %23, 1 |
| %29 = shl nuw nsw i64 %25, 3 |
| %30 = add nuw nsw i64 %25, 2 |
| %31 = icmp sgt i32 %23, 0 |
| %.neg = sext i1 %31 to i64 |
| %32 = add nsw i64 %30, %.neg |
| br label %L.header |
| |
| L.header: |
| %L = phi i64 [ %L.next, %L.latch ], [ 1, %preheader.L ] |
| %33 = mul nuw nsw i64 %L, 2916 |
| %34 = add nsw i64 %33, -2971 |
| %35 = add nsw i64 %L, -1 |
| %36 = mul nsw i64 %35, %21 |
| br label %M.header |
| |
| exit.i: |
| br i1 %.not241270.us, label %._crit_edge275.us.thread, label %.preheader258.us.preheader |
| |
| .lr.ph274.us: |
| %37 = phi i64 [ %48, %.lr.ph274.us ], [ %25, %.preheader260.us ] |
| %38 = phi double [ %46, %.lr.ph274.us ], [ 0.000000e+00, %.preheader260.us ] |
| %39 = phi i64 [ %47, %.lr.ph274.us ], [ 1, %.preheader260.us ] |
| %40 = add nsw i64 %39, -1 |
| %41 = getelementptr double, ptr %28, i64 %40 |
| %42 = load double, ptr %41, align 8 |
| %43 = getelementptr double, ptr @GlobM, i64 %40 |
| %44 = load double, ptr %43, align 8 |
| %45 = fmul fast double %44, %42 |
| %46 = fadd fast double %45, %38 |
| %47 = add nuw nsw i64 %39, 1 |
| %48 = add nsw i64 %37, -1 |
| %.not242.us = icmp eq i64 %48, 0 |
| br i1 %.not242.us, label %.lr.ph278.us.preheader, label %.lr.ph274.us |
| |
| .lr.ph278.us.preheader: |
| %.lcssa = phi double [ %46, %.lr.ph274.us ] |
| %49 = add nsw i64 %M, %36 |
| %50 = getelementptr double, ptr %11, i64 %49 |
| %51 = getelementptr i8, ptr %50, i64 -8 |
| store double %.lcssa, ptr %51, align 8 |
| %52 = getelementptr double, ptr @GlobK, i64 %49 |
| %53 = getelementptr i8, ptr %52, i64 -8 |
| br label %.lr.ph278.us |
| |
| latch.M.loopexit: |
| br label %latch.M |
| |
| latch.M: |
| %M.next = add nuw nsw i64 %M, 1 |
| %exitcond335.not = icmp eq i64 %M, %21 |
| br i1 %exitcond335.not, label %L.latch, label %M.header |
| |
| .lr.ph278.us: |
| %54 = phi i64 [ %133, %._crit_edge279.us ], [ 1, %.lr.ph278.us.preheader ] |
| %55 = add nsw i64 %54, -1 |
| %.idx244.us = mul nuw nsw i64 %55, 8000 |
| %56 = getelementptr i8, ptr @GlobL, i64 %.idx244.us |
| br label %57 |
| |
| 57: |
| %58 = phi i64 [ %25, %.lr.ph278.us ], [ %69, %57 ] |
| %59 = phi double [ 0.000000e+00, %.lr.ph278.us ], [ %67, %57 ] |
| %60 = phi i64 [ 1, %.lr.ph278.us ], [ %68, %57 ] |
| %61 = add nsw i64 %60, -1 |
| %62 = getelementptr double, ptr %56, i64 %61 |
| %63 = load double, ptr %62, align 8 |
| %64 = getelementptr double, ptr %28, i64 %61 |
| %65 = load double, ptr %64, align 8 |
| %66 = fmul fast double %65, %63 |
| %67 = fadd fast double %66, %59 |
| %68 = add nuw nsw i64 %60, 1 |
| %69 = add nsw i64 %58, -1 |
| %.not243.us = icmp eq i64 %69, 0 |
| br i1 %.not243.us, label %._crit_edge279.us, label %57 |
| |
| 70: |
| %71 = phi i64 [ %25, %.preheader258.us ], [ %81, %70 ] |
| %72 = phi i64 [ 1, %.preheader258.us ], [ %80, %70 ] |
| %73 = add nsw i64 %72, -1 |
| %74 = getelementptr double, ptr @GlobM, i64 %73 |
| %75 = load double, ptr %74, align 8 |
| %76 = getelementptr double, ptr %84, i64 %73 |
| %77 = load double, ptr %76, align 8 |
| %78 = fmul fast double %86, %77 |
| %79 = fadd fast double %78, %75 |
| store double %79, ptr %74, align 8 |
| %80 = add nuw nsw i64 %72, 1 |
| %81 = add nsw i64 %71, -1 |
| %.not245.us = icmp eq i64 %81, 0 |
| br i1 %.not245.us, label %._crit_edge.us, label %70 |
| |
| .preheader258.us: |
| %82 = phi i64 [ %128, %._crit_edge.us ], [ 1, %.preheader258.us.preheader ] |
| %83 = add nsw i64 %82, -1 |
| %.idx246.us = mul nuw nsw i64 %83, 8000 |
| %84 = getelementptr i8, ptr @GlobL, i64 %.idx246.us |
| %85 = getelementptr double, ptr %28, i64 %83 |
| %86 = load double, ptr %85, align 8 |
| br label %70 |
| |
| .preheader260.us: |
| br label %.lr.ph274.us |
| |
| ._crit_edge275.us.thread: |
| %87 = getelementptr double, ptr %11, i64 %M |
| %88 = getelementptr double, ptr %87, i64 %36 |
| %89 = getelementptr i8, ptr %88, i64 -8 |
| store double 0.000000e+00, ptr %89, align 8 |
| br label %latch.M |
| |
| .preheader258.us.preheader: |
| call void @llvm.memset.p0.i64(ptr nonnull align 16 @GlobM, i8 0, i64 %29, i1 false) |
| br label %.preheader258.us |
| |
| M.header: |
| %M = phi i64 [ 1, %L.header ], [ %M.next, %latch.M ] |
| %90 = mul nuw nsw i64 %M, 2916 |
| %91 = add nsw i64 %90, -2971 |
| br label %i.header |
| |
| i.header: |
| %i = phi i64 [ %i.next, %i.latch ], [ 1, %M.header ] |
| %92 = add nsw i64 %34, %i |
| %93 = add nsw i64 %i, -1 |
| %94 = mul nsw i64 %93, %21 |
| %invariant.gep = getelementptr double, ptr @GlobL, i64 %94 |
| br label %j.header |
| |
| j.header: |
| %j = phi i64 [ %j.next, %j.latch ], [ 1, %i.header ] |
| %95 = add nsw i64 %91, %j |
| %gep358 = getelementptr double, ptr %invariant.gep, i64 %j |
| br label %IL.header |
| |
| IL.header: |
| %IL = phi i64 [ %IL.next, %IL.latch ], [ 1, %j.header ] |
| %96 = mul nuw nsw i64 %IL, 54 |
| %97 = add nsw i64 %92, %96 |
| %98 = getelementptr double, ptr @GlobC, i64 %97 |
| %99 = load double, ptr %98, align 8 |
| %100 = getelementptr double, ptr @GlobG, i64 %97 |
| %101 = load double, ptr %100, align 8 |
| %102 = getelementptr double, ptr @GlobE, i64 %97 |
| %103 = load double, ptr %102, align 8 |
| %104 = getelementptr double, ptr @GlobI, i64 %97 |
| %105 = load double, ptr %104, align 8 |
| %106 = add nsw i64 %IL, -1 |
| %107 = mul nsw i64 %106, %21 |
| br label %JL.body |
| |
| JL.body: |
| %JL = phi i64 [ %JL.next, %JL.body ], [ 1, %IL.header ] |
| %109 = mul nuw nsw i64 %JL, 54 |
| %110 = add nsw i64 %95, %109 |
| %111 = getelementptr double, ptr @GlobD, i64 %110 |
| %112 = load double, ptr %111, align 8 |
| %113 = fmul fast double %112, %99 |
| %114 = getelementptr double, ptr @GlobH, i64 %110 |
| %115 = load double, ptr %114, align 8 |
| %116 = fmul fast double %115, %101 |
| %117 = fadd fast double %116, %113 |
| %118 = getelementptr double, ptr @GlobF, i64 %110 |
| %119 = load double, ptr %118, align 8 |
| %120 = fmul fast double %119, %103 |
| %121 = fadd fast double %117, %120 |
| %122 = getelementptr double, ptr @GlobJ, i64 %110 |
| %123 = load double, ptr %122, align 8 |
| %124 = fmul fast double %123, %105 |
| %125 = fadd fast double %121, %124 |
| %126 = add nsw i64 %JL, %107 |
| %.idx247.us.us.us.us.us.us = mul nsw i64 %126, 8000 |
| %gep.us.us.us.us.us.us = getelementptr i8, ptr %gep358, i64 %.idx247.us.us.us.us.us.us |
| %127 = getelementptr i8, ptr %gep.us.us.us.us.us.us, i64 -8008 |
| store double %125, ptr %127, align 8 |
| %JL.next = add nuw nsw i64 %JL, 1 |
| %exitcond.not = icmp eq i64 %JL, %21 |
| br i1 %exitcond.not, label %IL.latch, label %JL.body |
| |
| IL.latch: |
| %IL.next = add nuw nsw i64 %IL, 1 |
| %exitcond320.not = icmp eq i64 %IL, %18 |
| br i1 %exitcond320.not, label %j.latch, label %IL.header |
| |
| j.latch: |
| %j.next = add nuw nsw i64 %j, 1 |
| %exitcond324.not = icmp eq i64 %j, %21 |
| br i1 %exitcond324.not, label %i.latch, label %j.header |
| |
| i.latch: |
| %i.next = add nuw nsw i64 %i, 1 |
| %exitcond328.not = icmp eq i64 %i, %18 |
| br i1 %exitcond328.not, label %exit.i, label %i.header |
| |
| ._crit_edge.us: |
| %128 = add nuw nsw i64 %82, 1 |
| %exitcond329.not = icmp eq i64 %128, %32 |
| br i1 %exitcond329.not, label %.preheader260.us, label %.preheader258.us |
| |
| ._crit_edge279.us: |
| %.lcssa360 = phi double [ %67, %57 ] |
| %129 = getelementptr double, ptr @GlobM, i64 %55 |
| %130 = load double, ptr %129, align 8 |
| %131 = fadd fast double %130, %.lcssa360 |
| %132 = getelementptr i8, ptr %53, i64 %.idx244.us |
| store double %131, ptr %132, align 8 |
| %133 = add nuw nsw i64 %54, 1 |
| %exitcond331.not = icmp eq i64 %133, %32 |
| br i1 %exitcond331.not, label %latch.M.loopexit, label %.lr.ph278.us |
| |
| L.latch: |
| %L.next = add nuw nsw i64 %L, 1 |
| %exitcond339.not = icmp eq i64 %L, %18 |
| br i1 %exitcond339.not, label %exit.L, label %L.header |
| |
| exit.L: |
| br label %._crit_edge287 |
| |
| ._crit_edge287: |
| %134 = load i32, ptr %6, align 4 |
| %135 = load i32, ptr %5, align 4 |
| %136 = tail call i32 @llvm.smax.i32(i32 %135, i32 0) |
| %137 = zext nneg i32 %136 to i64 |
| %138 = sext i32 %134 to i64 |
| %139 = getelementptr double, ptr %2, i64 %138 |
| %140 = shl nuw nsw i64 %137, 3 |
| %.not236 = icmp slt i32 %135, 1 |
| %141 = select i1 %.not236, i64 1, i64 %140 |
| %142 = tail call ptr @malloc(i64 %141) |
| br i1 %.not236, label %._crit_edge294, label %.preheader254.preheader |
| |
| .preheader254.preheader: |
| call void @llvm.memset.p0.i64(ptr align 8 %142, i8 0, i64 %140, i1 false) |
| br label %.preheader254 |
| |
| .preheader254: |
| %143 = phi i64 [ %160, %._crit_edge ], [ 1, %.preheader254.preheader ] |
| %144 = add nsw i64 %143, -1 |
| %.idx240 = mul nuw nsw i64 %144, 8000 |
| %145 = getelementptr i8, ptr %0, i64 %.idx240 |
| %146 = getelementptr double, ptr %11, i64 %144 |
| %147 = load double, ptr %146, align 8 |
| br label %148 |
| |
| .preheader253: |
| br label %.lr.ph293 |
| |
| 148: |
| %149 = phi i64 [ %137, %.preheader254 ], [ %159, %148 ] |
| %150 = phi i64 [ 1, %.preheader254 ], [ %158, %148 ] |
| %151 = add nsw i64 %150, -1 |
| %152 = getelementptr double, ptr %142, i64 %151 |
| %153 = load double, ptr %152, align 8 |
| %154 = getelementptr double, ptr %145, i64 %151 |
| %155 = load double, ptr %154, align 8 |
| %156 = fmul fast double %147, %155 |
| %157 = fadd fast double %156, %153 |
| store double %157, ptr %152, align 8 |
| %158 = add nuw nsw i64 %150, 1 |
| %159 = add nsw i64 %149, -1 |
| %.not239 = icmp eq i64 %159, 0 |
| br i1 %.not239, label %._crit_edge, label %148 |
| |
| ._crit_edge: |
| %160 = add nuw nsw i64 %143, 1 |
| %exitcond341.not = icmp eq i64 %143, %137 |
| br i1 %exitcond341.not, label %.preheader253, label %.preheader254 |
| |
| .lr.ph293: |
| %161 = phi i64 [ %170, %.lr.ph293 ], [ %137, %.preheader253 ] |
| %162 = phi i64 [ %169, %.lr.ph293 ], [ 1, %.preheader253 ] |
| %163 = add nsw i64 %162, -1 |
| %164 = getelementptr double, ptr %139, i64 %163 |
| %165 = getelementptr double, ptr %142, i64 %163 |
| %166 = load double, ptr %165, align 8 |
| %167 = load double, ptr %164, align 8 |
| %168 = fsub fast double %167, %166 |
| store double %168, ptr %164, align 8 |
| %169 = add nuw nsw i64 %162, 1 |
| %170 = add nsw i64 %161, -1 |
| %.not238 = icmp eq i64 %170, 0 |
| br i1 %.not238, label %._crit_edge294.loopexit359, label %.lr.ph293 |
| |
| 171: |
| %172 = load i32, ptr %6, align 4 |
| %173 = load i32, ptr %5, align 4 |
| %174 = tail call i32 @llvm.smax.i32(i32 %173, i32 0) |
| %175 = zext nneg i32 %174 to i64 |
| %176 = shl nuw nsw i64 %175, 3 |
| %177 = mul i64 %176, %175 |
| %178 = tail call i64 @llvm.smax.i64(i64 %177, i64 1) |
| %179 = tail call ptr @malloc(i64 %178) |
| %.not311 = icmp slt i32 %173, 1 |
| br i1 %.not311, label %._crit_edge294, label %.preheader250.us.preheader |
| |
| .preheader250.us.preheader: |
| %180 = mul nuw nsw i64 %175, %175 |
| %181 = shl i64 %180, 3 |
| call void @llvm.memset.p0.i64(ptr align 8 %179, i8 0, i64 %181, i1 false) |
| br label %.preheader250.us |
| |
| .preheader250.us: |
| %182 = phi i64 [ %203, %._crit_edge301.split.us ], [ 1, %.preheader250.us.preheader ] |
| %183 = add nsw i64 %182, -1 |
| %.idx.us = mul nuw nsw i64 %183, 8000 |
| %184 = getelementptr i8, ptr %0, i64 %.idx.us |
| %invariant.gep.us = getelementptr double, ptr @GlobK, i64 %183 |
| br label %.preheader249.us |
| |
| 185: |
| %186 = phi i64 [ %175, %.preheader249.us ], [ %196, %185 ] |
| %187 = phi i64 [ 1, %.preheader249.us ], [ %195, %185 ] |
| %188 = add nsw i64 %187, -1 |
| %189 = getelementptr double, ptr %200, i64 %188 |
| %190 = load double, ptr %189, align 8 |
| %191 = getelementptr double, ptr %184, i64 %188 |
| %192 = load double, ptr %191, align 8 |
| %193 = fmul fast double %201, %192 |
| %194 = fadd fast double %193, %190 |
| store double %194, ptr %189, align 8 |
| %195 = add nuw nsw i64 %187, 1 |
| %196 = add nsw i64 %186, -1 |
| %.not233.us = icmp eq i64 %196, 0 |
| br i1 %.not233.us, label %._crit_edge300.us, label %185 |
| |
| .preheader249.us: |
| %197 = phi i64 [ 1, %.preheader250.us ], [ %202, %._crit_edge300.us ] |
| %198 = add nsw i64 %197, -1 |
| %199 = mul nuw nsw i64 %198, %175 |
| %200 = getelementptr double, ptr %179, i64 %199 |
| %.idx234.us = mul nuw nsw i64 %198, 8000 |
| %gep.us = getelementptr i8, ptr %invariant.gep.us, i64 %.idx234.us |
| %201 = load double, ptr %gep.us, align 8 |
| br label %185 |
| |
| ._crit_edge300.us: |
| %202 = add nuw nsw i64 %197, 1 |
| %exitcond344.not = icmp eq i64 %197, %175 |
| br i1 %exitcond344.not, label %._crit_edge301.split.us, label %.preheader249.us |
| |
| ._crit_edge301.split.us: |
| %203 = add nuw nsw i64 %182, 1 |
| %exitcond345.not = icmp eq i64 %182, %175 |
| br i1 %exitcond345.not, label %.preheader248, label %.preheader250.us |
| |
| .preheader248: |
| br label %.preheader.lr.ph |
| |
| .preheader.lr.ph: |
| %204 = sext i32 %172 to i64 |
| %invariant.gep306 = getelementptr double, ptr %3, i64 %204 |
| br label %.preheader |
| |
| .preheader: |
| %205 = phi i64 [ 1, %.preheader.lr.ph ], [ %221, %._crit_edge304 ] |
| %206 = add nsw i64 %205, -1 |
| %207 = add nsw i64 %206, %204 |
| %208 = mul nsw i64 %207, %14 |
| %gep307 = getelementptr double, ptr %invariant.gep306, i64 %208 |
| %209 = mul nuw nsw i64 %206, %175 |
| %210 = getelementptr double, ptr %179, i64 %209 |
| br label %211 |
| |
| 211: |
| %212 = phi i64 [ %175, %.preheader ], [ %220, %211 ] |
| %213 = phi i64 [ 1, %.preheader ], [ %219, %211 ] |
| %214 = add nsw i64 %213, -1 |
| %gep = getelementptr double, ptr %gep307, i64 %214 |
| %215 = getelementptr double, ptr %210, i64 %214 |
| %216 = load double, ptr %215, align 8 |
| %217 = load double, ptr %gep, align 8 |
| %218 = fsub fast double %217, %216 |
| store double %218, ptr %gep, align 8 |
| %219 = add nuw nsw i64 %213, 1 |
| %220 = add nsw i64 %212, -1 |
| %.not232 = icmp eq i64 %220, 0 |
| br i1 %.not232, label %._crit_edge304, label %211 |
| |
| ._crit_edge304: |
| %221 = add nuw nsw i64 %205, 1 |
| %exitcond347.not = icmp eq i64 %205, %175 |
| br i1 %exitcond347.not, label %._crit_edge294.loopexit, label %.preheader |
| |
| ._crit_edge294.loopexit: |
| br label %._crit_edge294 |
| |
| ._crit_edge294.loopexit359: |
| br label %._crit_edge294 |
| |
| ._crit_edge294: |
| %.sink = phi ptr [ %142, %._crit_edge287 ], [ %179, %171 ], [ %179, %._crit_edge294.loopexit ], [ %142, %._crit_edge294.loopexit359 ] |
| tail call void @free(ptr %.sink) |
| ret void |
| } |
| |
| declare i64 @llvm.smax.i64(i64, i64) |
| declare i32 @llvm.smax.i32(i32, i32) |
| declare void @llvm.memset.p0.i64(ptr writeonly captures(none), i8, i64, i1 immarg) |
| declare void @free(ptr allocptr noundef captures(none)) local_unnamed_addr |
| declare noalias noundef ptr @malloc(i64 noundef) local_unnamed_addr |