llvm/test/Transforms/LICM/lnicm.ll - llvm-project - Git at Google

 ; RUN: opt -aa-pipeline=basic-aa -passes='loop(loop-interchange)'       -S %s | FileCheck %s --check-prefixes INTC
 ; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(lnicm),loop(loop-interchange)' -S %s | FileCheck %s --check-prefixes LNICM,CHECK
 ; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(licm),loop(loop-interchange)'  -S %s | FileCheck %s --check-prefixes LICM,CHECK

 ; This test represents the following function:
 ; void test(int x[10][10], int y[10], int *z) {
 ;   for (int k = 0; k < 10; k++) {
 ;     int tmp = *z;
 ;     for (int i = 0; i < 10; i++)
 ;       x[i][k] += y[k] + tmp;
 ;   }
 ; }
 ; We only want to hoist the load of z out of the loop nest.
 ; LICM hoists the load of y[k] out of the i-loop, but LNICM doesn't do so
 ; to keep perfect loop nest. This enables optimizations that require
 ; perfect loop nest (e.g. loop-interchange) to perform.


 define dso_local void @test([10 x i32]* noalias %x, i32* noalias readonly %y, i32* readonly %z) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   [[Z:%.*]] = load i32, i32* %z, align 4
 ; CHECK-NEXT:   br label [[FOR_BODY3_PREHEADER:%.*]]
 ; LNICM:      for.body.preheader:
 ; LICM-NOT:   for.body.preheader:
 ; INTC-NOT:   for.body.preheader:
 ; LNICM-NEXT:   br label [[FOR_BODY:%.*]]
 ; CHECK:      for.body:
 ; LNICM-NEXT:   [[K:%.*]] = phi i32 [ [[INC10:%.*]], [[FOR_END:%.*]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
 ; LNICM-NEXT:   br label [[FOR_BODY3_SPLIT1:%.*]]
 ; LICM:         [[TMP:%.*]] = load i32, i32* [[ARRAYIDX:%.*]], align 4
 ; LNICM:      for.body3.preheader:
 ; LICM-NOT:   for.body3.preheader:
 ; INTC-NOT:   for.body3.preheader:
 ; LNICM-NEXT:   br label [[FOR_BODY3:%.*]]
 ; CHECK:      for.body3:
 ; LNICM-NEXT:   [[I:%.*]] = phi i32 [ [[TMP3:%.*]], [[FOR_BODY3_SPLIT:%.*]] ], [ 0, [[FOR_BODY3_PREHEADER:%.*]] ]
 ; LNICM-NEXT:   br label [[FOR_BODY_PREHEADER:%.*]]
 ; LNICM:      for.body3.split1:
 ; LNICM-NEXT:   [[IDXPROM:%.*]] = sext i32 [[K:%.*]] to i64
 ; LNICM-NEXT:   [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* %y, i64 [[IDXPROM:%.*]]
 ; LNICM-NEXT:   [[TMP:%.*]] = load i32, i32* [[ARRAYIDX:%.*]], align 4
 ; LNICM-NEXT:   [[ADD:%.*]] = add nsw i32 [[TMP:%.*]], [[Z:%.*]]
 ; LNICM-NEXT:   [[IDXPROM4:%.*]] = sext i32 [[I:%.*]] to i64
 ; LNICM-NEXT:   [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %x, i64 [[IDXPROM4:%.*]]
 ; LNICM-NEXT:   [[IDXPROM6:%.*]] = sext i32 [[K:%.*]] to i64
 ; LNICM-NEXT:   [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX5:%.*]], i64 0, i64 [[IDXPROM6:%.*]]
 ; LNICM-NEXT:   [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX7:%.*]], align 4
 ; LNICM-NEXT:   [[ADD8:%.*]] = add nsw i32 [[TMP2:%.*]], [[ADD:%.*]]
 ; LNICM-NEXT:   store i32 [[ADD8:%.*]], i32* [[ARRAYIDX7:%.*]], align 4
 ; LNICM-NEXT:   [[INC:%.*]] = add nsw i32 [[I:%.*]], 1
 ; LNICM-NEXT:   [[CMP2:%.*]] = icmp slt i32 [[INC:%.*]], 10
 ; LNICM-NEXT:   br label [[FOR_END:%.*]]
 ; LNICM:      for.body3.split:
 ; LICM-NOT:   for.body3.split:
 ; INTC-NOT:   for.body3.split:
 ; LNICM-NEXT:   [[TMP3:%.*]] = add nsw i32 [[I:%.*]], 1
 ; LNICM-NEXT:   [[TMP4:%.*]] = icmp slt i32 [[TMP3:%.*]], 10
 ; LNICM-NEXT:   br i1 [[TMP4:%.*]], label [[FOR_BODY3:%.*]], label [[FOR_END11:%.*]], !llvm.loop !0
 ; LNICM:      for.end:
 ; LNICM-NEXT:   [[INC10:%.*]] = add nsw i32 [[K:%.*]], 1
 ; LNICM-NEXT:   [[CMP:%.*]] = icmp slt i32 [[INC10:%.*]], 10
 ; LNICM-NEXT:   br i1 [[CMP:%.*]], label [[FOR_BODY:%.*]], label [[FOR_BODY3_SPLIT:%.*]], !llvm.loop !2
 ; LNICM:      for.end11:
 ; LNICM-NEXT:   ret void

 entry:
   br label %for.body

 for.body:
   %k.02 = phi i32 [ 0, %entry ], [ %inc10, %for.end ]
   %0 = load i32, i32* %z, align 4
   br label %for.body3

 for.body3:
   %i.01 = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ]
   %idxprom = sext i32 %k.02 to i64
   %arrayidx = getelementptr inbounds i32, i32* %y, i64 %idxprom
   %1 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %1, %0
   %idxprom4 = sext i32 %i.01 to i64
   %arrayidx5 = getelementptr inbounds [10 x i32], [10 x i32]* %x, i64 %idxprom4
   %idxprom6 = sext i32 %k.02 to i64
   %arrayidx7 = getelementptr inbounds [10 x i32], [10 x i32]* %arrayidx5, i64 0, i64 %idxprom6
   %2 = load i32, i32* %arrayidx7, align 4
   %add8 = add nsw i32 %2, %add
   store i32 %add8, i32* %arrayidx7, align 4
   %inc = add nsw i32 %i.01, 1
   %cmp2 = icmp slt i32 %inc, 10
   br i1 %cmp2, label %for.body3, label %for.end, !llvm.loop !0

 for.end:
   %inc10 = add nsw i32 %k.02, 1
   %cmp = icmp slt i32 %inc10, 10
   br i1 %cmp, label %for.body, label %for.end11, !llvm.loop !2

 for.end11:
   ret void
 }

 !0 = distinct !{!0, !1}
 !1 = !{!"llvm.loop.mustprogress"}
 !2 = distinct !{!2, !1}
	; RUN: opt -aa-pipeline=basic-aa -passes='loop(loop-interchange)' -S %s \| FileCheck %s --check-prefixes INTC
	; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(lnicm),loop(loop-interchange)' -S %s \| FileCheck %s --check-prefixes LNICM,CHECK
	; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(licm),loop(loop-interchange)' -S %s \| FileCheck %s --check-prefixes LICM,CHECK

	; This test represents the following function:
	; void test(int x[10][10], int y[10], int *z) {
	; for (int k = 0; k < 10; k++) {
	; int tmp = *z;
	; for (int i = 0; i < 10; i++)
	; x[i][k] += y[k] + tmp;
	; }
	; }
	; We only want to hoist the load of z out of the loop nest.
	; LICM hoists the load of y[k] out of the i-loop, but LNICM doesn't do so
	; to keep perfect loop nest. This enables optimizations that require
	; perfect loop nest (e.g. loop-interchange) to perform.


	define dso_local void @test([10 x i32]* noalias %x, i32* noalias readonly %y, i32* readonly %z) {
	; CHECK-LABEL: @test(
	; CHECK-NEXT: entry:
	; CHECK-NEXT: [[Z:%.]] = load i32, i32 %z, align 4
	; CHECK-NEXT: br label [[FOR_BODY3_PREHEADER:%.*]]
	; LNICM: for.body.preheader:
	; LICM-NOT: for.body.preheader:
	; INTC-NOT: for.body.preheader:
	; LNICM-NEXT: br label [[FOR_BODY:%.*]]
	; CHECK: for.body:
	; LNICM-NEXT: [[K:%.]] = phi i32 [ [[INC10:%.]], [[FOR_END:%.]] ], [ 0, [[FOR_BODY_PREHEADER:%.]] ]
	; LNICM-NEXT: br label [[FOR_BODY3_SPLIT1:%.*]]
	; LICM: [[TMP:%.]] = load i32, i32 [[ARRAYIDX:%.*]], align 4
	; LNICM: for.body3.preheader:
	; LICM-NOT: for.body3.preheader:
	; INTC-NOT: for.body3.preheader:
	; LNICM-NEXT: br label [[FOR_BODY3:%.*]]
	; CHECK: for.body3:
	; LNICM-NEXT: [[I:%.]] = phi i32 [ [[TMP3:%.]], [[FOR_BODY3_SPLIT:%.]] ], [ 0, [[FOR_BODY3_PREHEADER:%.]] ]
	; LNICM-NEXT: br label [[FOR_BODY_PREHEADER:%.*]]
	; LNICM: for.body3.split1:
	; LNICM-NEXT: [[IDXPROM:%.]] = sext i32 [[K:%.]] to i64
	; LNICM-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds i32, i32 %y, i64 [[IDXPROM:%.*]]
	; LNICM-NEXT: [[TMP:%.]] = load i32, i32 [[ARRAYIDX:%.*]], align 4
	; LNICM-NEXT: [[ADD:%.]] = add nsw i32 [[TMP:%.]], [[Z:%.*]]
	; LNICM-NEXT: [[IDXPROM4:%.]] = sext i32 [[I:%.]] to i64
	; LNICM-NEXT: [[ARRAYIDX5:%.]] = getelementptr inbounds [10 x i32], [10 x i32] %x, i64 [[IDXPROM4:%.*]]
	; LNICM-NEXT: [[IDXPROM6:%.]] = sext i32 [[K:%.]] to i64
	; LNICM-NEXT: [[ARRAYIDX7:%.]] = getelementptr inbounds [10 x i32], [10 x i32] [[ARRAYIDX5:%.]], i64 0, i64 [[IDXPROM6:%.]]
	; LNICM-NEXT: [[TMP2:%.]] = load i32, i32 [[ARRAYIDX7:%.*]], align 4
	; LNICM-NEXT: [[ADD8:%.]] = add nsw i32 [[TMP2:%.]], [[ADD:%.*]]
	; LNICM-NEXT: store i32 [[ADD8:%.]], i32 [[ARRAYIDX7:%.*]], align 4
	; LNICM-NEXT: [[INC:%.]] = add nsw i32 [[I:%.]], 1
	; LNICM-NEXT: [[CMP2:%.]] = icmp slt i32 [[INC:%.]], 10
	; LNICM-NEXT: br label [[FOR_END:%.*]]
	; LNICM: for.body3.split:
	; LICM-NOT: for.body3.split:
	; INTC-NOT: for.body3.split:
	; LNICM-NEXT: [[TMP3:%.]] = add nsw i32 [[I:%.]], 1
	; LNICM-NEXT: [[TMP4:%.]] = icmp slt i32 [[TMP3:%.]], 10
	; LNICM-NEXT: br i1 [[TMP4:%.]], label [[FOR_BODY3:%.]], label [[FOR_END11:%.*]], !llvm.loop !0
	; LNICM: for.end:
	; LNICM-NEXT: [[INC10:%.]] = add nsw i32 [[K:%.]], 1
	; LNICM-NEXT: [[CMP:%.]] = icmp slt i32 [[INC10:%.]], 10
	; LNICM-NEXT: br i1 [[CMP:%.]], label [[FOR_BODY:%.]], label [[FOR_BODY3_SPLIT:%.*]], !llvm.loop !2
	; LNICM: for.end11:
	; LNICM-NEXT: ret void

	entry:
	br label %for.body

	for.body:
	%k.02 = phi i32 [ 0, %entry ], [ %inc10, %for.end ]
	%0 = load i32, i32* %z, align 4
	br label %for.body3

	for.body3:
	%i.01 = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ]
	%idxprom = sext i32 %k.02 to i64
	%arrayidx = getelementptr inbounds i32, i32* %y, i64 %idxprom
	%1 = load i32, i32* %arrayidx, align 4
	%add = add nsw i32 %1, %0
	%idxprom4 = sext i32 %i.01 to i64
	%arrayidx5 = getelementptr inbounds [10 x i32], [10 x i32]* %x, i64 %idxprom4
	%idxprom6 = sext i32 %k.02 to i64
	%arrayidx7 = getelementptr inbounds [10 x i32], [10 x i32]* %arrayidx5, i64 0, i64 %idxprom6
	%2 = load i32, i32* %arrayidx7, align 4
	%add8 = add nsw i32 %2, %add
	store i32 %add8, i32* %arrayidx7, align 4
	%inc = add nsw i32 %i.01, 1
	%cmp2 = icmp slt i32 %inc, 10
	br i1 %cmp2, label %for.body3, label %for.end, !llvm.loop !0

	for.end:
	%inc10 = add nsw i32 %k.02, 1
	%cmp = icmp slt i32 %inc10, 10
	br i1 %cmp, label %for.body, label %for.end11, !llvm.loop !2

	for.end11:
	ret void
	}

	!0 = distinct !{!0, !1}
	!1 = !{!"llvm.loop.mustprogress"}
	!2 = distinct !{!2, !1}