blob: 56d7fc335911ec6a96709ffff892ade7f52cc72f [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
; RUN: opt -mtriple=amdgcn-amd-amdhsa -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -verify -S %s -o - | FileCheck -check-prefix=IR %s
; A test with a divergent unreachable block and uniform return block. The
; compiler needs to create a regions that includes them so that
; StructurizeCFG correctly transform the CFG, and then SI Annotate Control
; Flow does not fail during annotation.
define void @my_func(i32 %0) {
; IR-LABEL: @my_func(
; IR-NEXT: entry:
; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) null, align 8
; IR-NEXT: br label [[NODEBLOCK:%.*]]
; IR: NodeBlock:
; IR-NEXT: [[PIVOT:%.*]] = icmp sge i32 [[TMP1]], 1
; IR-NEXT: br i1 [[PIVOT]], label [[LEAFBLOCK1:%.*]], label [[FLOW:%.*]]
; IR: LeafBlock1:
; IR-NEXT: [[SWITCHLEAF2:%.*]] = icmp ne i32 [[TMP1]], 1
; IR-NEXT: br label [[FLOW]]
; IR: Flow:
; IR-NEXT: [[TMP2:%.*]] = phi i1 [ [[SWITCHLEAF2]], [[LEAFBLOCK1]] ], [ false, [[NODEBLOCK]] ]
; IR-NEXT: [[TMP3:%.*]] = phi i1 [ false, [[LEAFBLOCK1]] ], [ true, [[NODEBLOCK]] ]
; IR-NEXT: br i1 [[TMP3]], label [[LEAFBLOCK:%.*]], label [[FLOW11:%.*]]
; IR: LeafBlock:
; IR-NEXT: [[SWITCHLEAF:%.*]] = icmp eq i32 [[TMP1]], 0
; IR-NEXT: br i1 [[SWITCHLEAF]], label [[SW_BB2:%.*]], label [[FLOW12:%.*]]
; IR: Flow11:
; IR-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP9:%.*]], [[FLOW12]] ], [ false, [[FLOW]] ]
; IR-NEXT: [[TMP5:%.*]] = phi i1 [ [[TMP10:%.*]], [[FLOW12]] ], [ [[TMP2]], [[FLOW]] ]
; IR-NEXT: [[TMP6:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP5]])
; IR-NEXT: [[TMP7:%.*]] = extractvalue { i1, i64 } [[TMP6]], 0
; IR-NEXT: [[TMP8:%.*]] = extractvalue { i1, i64 } [[TMP6]], 1
; IR-NEXT: br i1 [[TMP7]], label [[DO_BODY:%.*]], label [[FLOW17:%.*]]
; IR: sw.bb2:
; IR-NEXT: br label [[NODEBLOCK7:%.*]]
; IR: Flow12:
; IR-NEXT: [[TMP9]] = phi i1 [ [[TMP24:%.*]], [[FLOW15:%.*]] ], [ false, [[LEAFBLOCK]] ]
; IR-NEXT: [[TMP10]] = phi i1 [ [[TMP25:%.*]], [[FLOW15]] ], [ true, [[LEAFBLOCK]] ]
; IR-NEXT: br label [[FLOW11]]
; IR: NodeBlock7:
; IR-NEXT: [[PIVOT8:%.*]] = icmp sge i32 [[TMP0:%.*]], 2
; IR-NEXT: [[TMP11:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[PIVOT8]])
; IR-NEXT: [[TMP12:%.*]] = extractvalue { i1, i64 } [[TMP11]], 0
; IR-NEXT: [[TMP13:%.*]] = extractvalue { i1, i64 } [[TMP11]], 1
; IR-NEXT: br i1 [[TMP12]], label [[LEAFBLOCK5:%.*]], label [[FLOW13:%.*]]
; IR: LeafBlock5:
; IR-NEXT: [[SWITCHLEAF6:%.*]] = icmp eq i32 [[TMP0]], 2
; IR-NEXT: br label [[FLOW13]]
; IR: Flow13:
; IR-NEXT: [[TMP14:%.*]] = phi i1 [ true, [[LEAFBLOCK5]] ], [ false, [[NODEBLOCK7]] ]
; IR-NEXT: [[TMP15:%.*]] = phi i1 [ [[SWITCHLEAF6]], [[LEAFBLOCK5]] ], [ false, [[NODEBLOCK7]] ]
; IR-NEXT: [[TMP16:%.*]] = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 [[TMP13]])
; IR-NEXT: [[TMP17:%.*]] = extractvalue { i1, i64 } [[TMP16]], 0
; IR-NEXT: [[TMP18:%.*]] = extractvalue { i1, i64 } [[TMP16]], 1
; IR-NEXT: br i1 [[TMP17]], label [[LEAFBLOCK3:%.*]], label [[FLOW14:%.*]]
; IR: LeafBlock3:
; IR-NEXT: [[SWITCHLEAF4:%.*]] = icmp eq i32 [[TMP0]], 0
; IR-NEXT: [[SWITCHLEAF4_INV:%.*]] = xor i1 [[SWITCHLEAF4]], true
; IR-NEXT: br label [[FLOW14]]
; IR: Flow14:
; IR-NEXT: [[TMP19:%.*]] = phi i1 [ [[SWITCHLEAF4_INV]], [[LEAFBLOCK3]] ], [ [[TMP14]], [[FLOW13]] ]
; IR-NEXT: [[TMP20:%.*]] = phi i1 [ [[SWITCHLEAF4]], [[LEAFBLOCK3]] ], [ [[TMP15]], [[FLOW13]] ]
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP18]])
; IR-NEXT: [[TMP21:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP20]])
; IR-NEXT: [[TMP22:%.*]] = extractvalue { i1, i64 } [[TMP21]], 0
; IR-NEXT: [[TMP23:%.*]] = extractvalue { i1, i64 } [[TMP21]], 1
; IR-NEXT: br i1 [[TMP22]], label [[LAND_LHS_TRUE_I:%.*]], label [[FLOW15]]
; IR: land.lhs.true.i:
; IR-NEXT: br label [[LEAFBLOCK9:%.*]]
; IR: Flow15:
; IR-NEXT: [[TMP24]] = phi i1 [ [[TMP29:%.*]], [[FLOW16:%.*]] ], [ false, [[FLOW14]] ]
; IR-NEXT: [[TMP25]] = phi i1 [ [[TMP30:%.*]], [[FLOW16]] ], [ [[TMP19]], [[FLOW14]] ]
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP23]])
; IR-NEXT: br label [[FLOW12]]
; IR: LeafBlock9:
; IR-NEXT: [[SWITCHLEAF10:%.*]] = icmp sgt i32 [[TMP0]], 1
; IR-NEXT: [[TMP26:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[SWITCHLEAF10]])
; IR-NEXT: [[TMP27:%.*]] = extractvalue { i1, i64 } [[TMP26]], 0
; IR-NEXT: [[TMP28:%.*]] = extractvalue { i1, i64 } [[TMP26]], 1
; IR-NEXT: br i1 [[TMP27]], label [[DO_BODY_I_I_I_I:%.*]], label [[FLOW16]]
; IR: do.body.i.i.i.i:
; IR-NEXT: tail call fastcc void null()
; IR-NEXT: br label [[FLOW16]]
; IR: Flow16:
; IR-NEXT: [[TMP29]] = phi i1 [ true, [[DO_BODY_I_I_I_I]] ], [ false, [[LEAFBLOCK9]] ]
; IR-NEXT: [[TMP30]] = phi i1 [ false, [[DO_BODY_I_I_I_I]] ], [ true, [[LEAFBLOCK9]] ]
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP28]])
; IR-NEXT: br label [[FLOW15]]
; IR: do.body:
; IR-NEXT: tail call fastcc void null()
; IR-NEXT: br label [[FLOW17]]
; IR: Flow17:
; IR-NEXT: [[TMP31:%.*]] = phi i1 [ true, [[DO_BODY]] ], [ [[TMP4]], [[FLOW11]] ]
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP8]])
; IR-NEXT: [[TMP32:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP31]])
; IR-NEXT: [[TMP33:%.*]] = extractvalue { i1, i64 } [[TMP32]], 0
; IR-NEXT: [[TMP34:%.*]] = extractvalue { i1, i64 } [[TMP32]], 1
; IR-NEXT: br i1 [[TMP33]], label [[UNIFIEDUNREACHABLEBLOCK:%.*]], label [[UNIFIEDRETURNBLOCK:%.*]]
; IR: UnifiedUnreachableBlock:
; IR-NEXT: call void @llvm.amdgcn.unreachable()
; IR-NEXT: br label [[UNIFIEDRETURNBLOCK]]
; IR: UnifiedReturnBlock:
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP34]])
; IR-NEXT: ret void
;
; GCN-LABEL: my_func:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], 0
; GCN-NEXT: s_load_dword s10, s[4:5], 0x0
; GCN-NEXT: s_mov_b64 s[8:9], -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lt_i32 s10, 1
; GCN-NEXT: s_mov_b64 s[6:7], 0
; GCN-NEXT: s_cbranch_scc1 .LBB0_7
; GCN-NEXT: ; %bb.1: ; %LeafBlock1
; GCN-NEXT: s_cmp_lg_u32 s10, 1
; GCN-NEXT: s_cselect_b64 s[6:7], -1, 0
; GCN-NEXT: s_mov_b64 vcc, exec
; GCN-NEXT: s_cbranch_execz .LBB0_8
; GCN-NEXT: .LBB0_2: ; %Flow11
; GCN-NEXT: s_and_saveexec_b64 s[8:9], s[6:7]
; GCN-NEXT: .LBB0_3: ; %do.body
; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec
; GCN-NEXT: .LBB0_4: ; %Flow17
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; GCN-NEXT: ; %bb.5: ; %UnifiedUnreachableBlock
; GCN-NEXT: ; divergent unreachable
; GCN-NEXT: ; %bb.6: ; %UnifiedReturnBlock
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: .LBB0_7: ; %Flow
; GCN-NEXT: s_andn2_b64 vcc, exec, s[8:9]
; GCN-NEXT: s_cbranch_vccnz .LBB0_2
; GCN-NEXT: .LBB0_8: ; %LeafBlock
; GCN-NEXT: s_cmp_eq_u32 s10, 0
; GCN-NEXT: s_cbranch_scc1 .LBB0_10
; GCN-NEXT: ; %bb.9:
; GCN-NEXT: s_mov_b64 s[6:7], -1
; GCN-NEXT: s_and_saveexec_b64 s[8:9], s[6:7]
; GCN-NEXT: s_cbranch_execnz .LBB0_3
; GCN-NEXT: s_branch .LBB0_4
; GCN-NEXT: .LBB0_10: ; %NodeBlock7
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 1, v0
; GCN-NEXT: s_mov_b64 s[8:9], 0
; GCN-NEXT: s_mov_b64 s[6:7], 0
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; GCN-NEXT: ; %bb.11: ; %LeafBlock5
; GCN-NEXT: s_mov_b64 s[6:7], exec
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
; GCN-NEXT: ; %bb.12: ; %Flow13
; GCN-NEXT: s_andn2_saveexec_b64 s[10:11], s[4:5]
; GCN-NEXT: ; %bb.13: ; %LeafBlock3
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GCN-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0
; GCN-NEXT: s_andn2_b64 s[6:7], s[6:7], exec
; GCN-NEXT: s_andn2_b64 s[8:9], s[8:9], exec
; GCN-NEXT: s_and_b64 s[4:5], s[4:5], exec
; GCN-NEXT: s_and_b64 s[12:13], vcc, exec
; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[4:5]
; GCN-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13]
; GCN-NEXT: ; %bb.14: ; %Flow14
; GCN-NEXT: s_or_b64 exec, exec, s[10:11]
; GCN-NEXT: s_mov_b64 s[4:5], 0
; GCN-NEXT: s_and_saveexec_b64 s[10:11], s[8:9]
; GCN-NEXT: s_cbranch_execz .LBB0_18
; GCN-NEXT: ; %bb.15: ; %LeafBlock9
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 1, v0
; GCN-NEXT: s_mov_b64 s[8:9], -1
; GCN-NEXT: s_and_saveexec_b64 s[12:13], vcc
; GCN-NEXT: ; %bb.16: ; %do.body.i.i.i.i
; GCN-NEXT: s_mov_b64 s[4:5], exec
; GCN-NEXT: s_xor_b64 s[8:9], exec, -1
; GCN-NEXT: ; %bb.17: ; %Flow16
; GCN-NEXT: s_or_b64 exec, exec, s[12:13]
; GCN-NEXT: s_and_b64 s[4:5], s[4:5], exec
; GCN-NEXT: s_andn2_b64 s[6:7], s[6:7], exec
; GCN-NEXT: s_and_b64 s[8:9], s[8:9], exec
; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
; GCN-NEXT: .LBB0_18: ; %Flow15
; GCN-NEXT: s_or_b64 exec, exec, s[10:11]
; GCN-NEXT: s_and_saveexec_b64 s[8:9], s[6:7]
; GCN-NEXT: s_cbranch_execnz .LBB0_3
; GCN-NEXT: s_branch .LBB0_4
entry:
%1 = load i32, ptr addrspace(4) null, align 8
switch i32 %1, label %do.body [
i32 1, label %sw.bb
i32 0, label %sw.bb2
]
sw.bb:
ret void
sw.bb2:
switch i32 %0, label %do.body [
i32 0, label %land.lhs.true.i
i32 2, label %land.lhs.true.i
]
land.lhs.true.i:
switch i32 %0, label %do.body.i.i.i.i [
i32 0, label %do.body
i32 1, label %do.body
]
do.body.i.i.i.i:
tail call fastcc void null()
unreachable
do.body:
tail call fastcc void null()
unreachable
}