blob: aa9b7f2fc593344db45fc30349a25acd83955ebe [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; Here we have multiple exits, but the different sources, different outputs are
; needed, this checks that they are handled by separate switch statements.
define void @outline_outputs1() #0 {
entry:
%output = alloca i32, align 4
%result = alloca i32, align 4
%output2 = alloca i32, align 4
%result2 = alloca i32, align 4
%a = alloca i32, align 4
%b = alloca i32, align 4
br label %block_2
block_1:
%a2 = alloca i32, align 4
%b2 = alloca i32, align 4
br label %block_2
block_2:
%a2val = load i32, i32* %a
%b2val = load i32, i32* %b
%add2 = add i32 2, %a2val
%mul2 = mul i32 2, %b2val
br label %block_5
block_3:
%aval = load i32, i32* %a
%bval = load i32, i32* %b
%add = add i32 2, %aval
%mul = mul i32 2, %bval
br label %block_4
block_4:
store i32 %add, i32* %output, align 4
store i32 %mul, i32* %result, align 4
br label %block_6
block_5:
store i32 %add2, i32* %output, align 4
store i32 %mul2, i32* %result, align 4
br label %block_7
block_6:
%div = udiv i32 %aval, %bval
ret void
block_7:
%sub = sub i32 %a2val, %b2val
ret void
}
define void @outline_outputs2() #0 {
entry:
%output = alloca i32, align 4
%result = alloca i32, align 4
%output2 = alloca i32, align 4
%result2 = alloca i32, align 4
%a = alloca i32, align 4
%b = alloca i32, align 4
br label %block_2
block_1:
%a2 = alloca i32, align 4
%b2 = alloca i32, align 4
br label %block_2
block_2:
%a2val = load i32, i32* %a
%b2val = load i32, i32* %b
%add2 = add i32 2, %a2val
%mul2 = mul i32 2, %b2val
br label %block_5
block_3:
%aval = load i32, i32* %a
%bval = load i32, i32* %b
%add = add i32 2, %aval
%mul = mul i32 2, %bval
br label %block_4
block_4:
store i32 %add, i32* %output, align 4
store i32 %mul, i32* %result, align 4
br label %block_7
block_5:
store i32 %add2, i32* %output, align 4
store i32 %mul2, i32* %result, align 4
br label %block_6
block_6:
%diff = sub i32 %a2val, %b2val
ret void
block_7:
%quot = udiv i32 %add, %mul
ret void
}
; CHECK-LABEL: @outline_outputs1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BVAL_LOC:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[AVAL_LOC:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B2VAL_LOC:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[A2VAL_LOC:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: br label [[BLOCK_2:%.*]]
; CHECK: block_1:
; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B2:%.*]] = alloca i32, align 4
; CHECK-NEXT: br label [[BLOCK_2]]
; CHECK: block_2:
; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[A2VAL_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[B2VAL_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
; CHECK-NEXT: [[LT_CAST2:%.*]] = bitcast i32* [[AVAL_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST2]])
; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[BVAL_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
; CHECK-NEXT: [[TMP0:%.*]] = call i1 @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[A2VAL_LOC]], i32* [[B2VAL_LOC]], i32* [[AVAL_LOC]], i32* [[BVAL_LOC]], i32 0)
; CHECK-NEXT: [[A2VAL_RELOAD:%.*]] = load i32, i32* [[A2VAL_LOC]], align 4
; CHECK-NEXT: [[B2VAL_RELOAD:%.*]] = load i32, i32* [[B2VAL_LOC]], align 4
; CHECK-NEXT: [[AVAL_RELOAD:%.*]] = load i32, i32* [[AVAL_LOC]], align 4
; CHECK-NEXT: [[BVAL_RELOAD:%.*]] = load i32, i32* [[BVAL_LOC]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST2]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST3]])
; CHECK-NEXT: br i1 [[TMP0]], label [[BLOCK_6:%.*]], label [[BLOCK_7:%.*]]
; CHECK: block_6:
; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[AVAL_RELOAD]], [[BVAL_RELOAD]]
; CHECK-NEXT: ret void
; CHECK: block_7:
; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[A2VAL_RELOAD]], [[B2VAL_RELOAD]]
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: @outline_outputs2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[MUL_LOC:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B2VAL_LOC:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[A2VAL_LOC:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: br label [[BLOCK_2:%.*]]
; CHECK: block_1:
; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B2:%.*]] = alloca i32, align 4
; CHECK-NEXT: br label [[BLOCK_2]]
; CHECK: block_2:
; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[A2VAL_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[B2VAL_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
; CHECK-NEXT: [[LT_CAST2:%.*]] = bitcast i32* [[ADD_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST2]])
; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[MUL_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
; CHECK-NEXT: [[TMP0:%.*]] = call i1 @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[A2VAL_LOC]], i32* [[B2VAL_LOC]], i32* [[ADD_LOC]], i32* [[MUL_LOC]], i32 1)
; CHECK-NEXT: [[A2VAL_RELOAD:%.*]] = load i32, i32* [[A2VAL_LOC]], align 4
; CHECK-NEXT: [[B2VAL_RELOAD:%.*]] = load i32, i32* [[B2VAL_LOC]], align 4
; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
; CHECK-NEXT: [[MUL_RELOAD:%.*]] = load i32, i32* [[MUL_LOC]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST2]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST3]])
; CHECK-NEXT: br i1 [[TMP0]], label [[BLOCK_7:%.*]], label [[BLOCK_6:%.*]]
; CHECK: block_6:
; CHECK-NEXT: [[DIFF:%.*]] = sub i32 [[A2VAL_RELOAD]], [[B2VAL_RELOAD]]
; CHECK-NEXT: ret void
; CHECK: block_7:
; CHECK-NEXT: [[QUOT:%.*]] = udiv i32 [[ADD_RELOAD]], [[MUL_RELOAD]]
; CHECK-NEXT: ret void
;
;
; CHECK: define internal i1 @outlined_ir_func_0(
; CHECK-NEXT: newFuncRoot:
; CHECK-NEXT: br label [[BLOCK_2_TO_OUTLINE:%.*]]
; CHECK: block_2_to_outline:
; CHECK-NEXT: [[A2VAL:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
; CHECK-NEXT: [[B2VAL:%.*]] = load i32, i32* [[TMP1:%.*]], align 4
; CHECK-NEXT: [[ADD2:%.*]] = add i32 2, [[A2VAL]]
; CHECK-NEXT: [[MUL2:%.*]] = mul i32 2, [[B2VAL]]
; CHECK-NEXT: br label [[BLOCK_5:%.*]]
; CHECK: block_3:
; CHECK-NEXT: [[AVAL:%.*]] = load i32, i32* [[TMP0]], align 4
; CHECK-NEXT: [[BVAL:%.*]] = load i32, i32* [[TMP1]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add i32 2, [[AVAL]]
; CHECK-NEXT: [[MUL:%.*]] = mul i32 2, [[BVAL]]
; CHECK-NEXT: br label [[BLOCK_4:%.*]]
; CHECK: block_4:
; CHECK-NEXT: store i32 [[ADD]], i32* [[TMP2:%.*]], align 4
; CHECK-NEXT: store i32 [[MUL]], i32* [[TMP3:%.*]], align 4
; CHECK-NEXT: br label [[BLOCK_6_EXITSTUB:%.*]]
; CHECK: block_5:
; CHECK-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4
; CHECK-NEXT: store i32 [[MUL2]], i32* [[TMP3]], align 4
; CHECK-NEXT: br label [[BLOCK_7_EXITSTUB:%.*]]
; CHECK: block_6.exitStub:
; CHECK-NEXT: switch i32 [[TMP8:%.*]], label [[FINAL_BLOCK_1:%.*]] [
; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_1:%.*]]
; CHECK-NEXT: i32 1, label [[OUTPUT_BLOCK_1_1:%.*]]
; CHECK-NEXT: ]
; CHECK: block_7.exitStub:
; CHECK-NEXT: switch i32 [[TMP8]], label [[FINAL_BLOCK_0:%.*]] [
; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_0:%.*]]
; CHECK-NEXT: i32 1, label [[OUTPUT_BLOCK_1_0:%.*]]
; CHECK-NEXT: ]
; CHECK: output_block_0_0:
; CHECK-NEXT: store i32 [[A2VAL]], i32* [[TMP4:%.*]], align 4
; CHECK-NEXT: store i32 [[B2VAL]], i32* [[TMP5:%.*]], align 4
; CHECK-NEXT: br label [[FINAL_BLOCK_0]]
; CHECK: output_block_0_1:
; CHECK-NEXT: store i32 [[AVAL]], i32* [[TMP6:%.*]], align 4
; CHECK-NEXT: store i32 [[BVAL]], i32* [[TMP7:%.*]], align 4
; CHECK-NEXT: br label [[FINAL_BLOCK_1]]
; CHECK: output_block_1_0:
; CHECK-NEXT: store i32 [[A2VAL]], i32* [[TMP4]], align 4
; CHECK-NEXT: store i32 [[B2VAL]], i32* [[TMP5]], align 4
; CHECK-NEXT: br label [[FINAL_BLOCK_0]]
; CHECK: output_block_1_1:
; CHECK-NEXT: store i32 [[ADD]], i32* [[TMP6]], align 4
; CHECK-NEXT: store i32 [[MUL]], i32* [[TMP7]], align 4
; CHECK-NEXT: br label [[FINAL_BLOCK_1]]
; CHECK: final_block_0:
; CHECK-NEXT: ret i1 false
; CHECK: final_block_1:
; CHECK-NEXT: ret i1 true
;