blob: 19d741db1c6126e11f139bc0c0c758ffe8d24b69 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -stop-after=si-insert-waitcnts < %s | FileCheck %s
; Testcase reduced from Blender 4.1 where we generated incorrect waitcnts due to a bad
; WaitcntBrackets::merge implementation.
%struct.bar = type { %struct.bar.0 }
%struct.bar.0 = type { float, float, float, float }
define amdgpu_kernel void @widget(ptr addrspace(1) %arg, i1 %arg1) {
; CHECK-LABEL: name: widget
; CHECK: bb.0.bb:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $sgpr8_sgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr17
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $sgpr22_sgpr23 = S_MOV_B64 $sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $sgpr20_sgpr21_sgpr22_sgpr23
; CHECK-NEXT: $sgpr20_sgpr21 = S_MOV_B64 killed $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: renamable $sgpr2 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 8, 0 :: (dereferenceable invariant load (s32) from %ir.arg1.kernarg.offset.align.down, align 8, addrspace 4)
; CHECK-NEXT: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s64) from %ir.arg.kernarg.offset1, align 16, addrspace 4)
; CHECK-NEXT: $sgpr20 = S_ADD_U32 $sgpr20, killed $sgpr17, implicit-def $scc, implicit-def $sgpr20_sgpr21_sgpr22_sgpr23
; CHECK-NEXT: $sgpr21 = S_ADDC_U32 $sgpr21, 0, implicit-def dead $scc, implicit killed $scc, implicit-def $sgpr20_sgpr21_sgpr22_sgpr23
; CHECK-NEXT: renamable $vgpr1 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: S_WAITCNT 49279
; CHECK-NEXT: S_BITCMP1_B32 killed renamable $sgpr2, 0, implicit-def $scc
; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_MOV_B64 0
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec, implicit $exec
; CHECK-NEXT: renamable $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit killed $scc
; CHECK-NEXT: $vgpr2_vgpr3 = V_PK_MOV_B32 8, 0, 8, 0, 0, 0, 0, 0, 0, implicit $exec
; CHECK-NEXT: renamable $sgpr6_sgpr7 = IMPLICIT_DEF
; CHECK-NEXT: S_BRANCH %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.10.loop.exit.guard:
; CHECK-NEXT: successors: %bb.11(0x04000000), %bb.1(0x7c000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_MOV_B64 0
; CHECK-NEXT: renamable $sgpr6_sgpr7 = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.11, implicit killed $vcc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.bb2:
; CHECK-NEXT: successors: %bb.12(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_WAITCNT 3952
; CHECK-NEXT: renamable $vgpr0 = V_XOR_B32_e32 1, killed $vgpr0, implicit $exec
; CHECK-NEXT: renamable $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr0_vgpr1, implicit $exec
; CHECK-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr4_vgpr5, 0, 0, implicit $exec :: (load (s32) from %ir.getelementptr, align 16, addrspace 1)
; CHECK-NEXT: renamable $sgpr6_sgpr7 = S_OR_B64 killed renamable $sgpr6_sgpr7, $exec, implicit-def dead $scc
; CHECK-NEXT: S_WAITCNT 3952
; CHECK-NEXT: V_CMP_GT_I32_e32 1, killed $vgpr0, implicit-def $vcc, implicit $exec
; CHECK-NEXT: renamable $vgpr0 = IMPLICIT_DEF
; CHECK-NEXT: $sgpr8_sgpr9 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.12.bb13:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD renamable $vgpr2_vgpr3, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) null`, addrspace 1)
; CHECK-NEXT: renamable $sgpr6_sgpr7 = S_ANDN2_B64 killed renamable $sgpr6_sgpr7, $exec, implicit-def dead $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.Flow3:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr8_sgpr9, implicit-def $scc
; CHECK-NEXT: renamable $sgpr8_sgpr9 = S_AND_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_OR_B64 killed renamable $sgpr8_sgpr9, killed renamable $sgpr4_sgpr5, implicit-def $scc
; CHECK-NEXT: $exec = S_ANDN2_B64 $exec, renamable $sgpr4_sgpr5, implicit-def $scc
; CHECK-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.bb3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
; CHECK-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr20_sgpr21_sgpr22_sgpr23, 0, 0, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(5) null`, addrspace 5)
; CHECK-NEXT: renamable $vgpr4 = GLOBAL_LOAD_DWORD renamable $vgpr2_vgpr3, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) null`, addrspace 1)
; CHECK-NEXT: S_BRANCH %bb.4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.9.Flow2:
; CHECK-NEXT: successors: %bb.10(0x04000000), %bb.4(0x7c000000)
; CHECK-NEXT: liveins: $vgpr0, $vgpr4, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc
; CHECK-NEXT: S_CBRANCH_VCCZ %bb.10, implicit killed $vcc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4.bb6:
; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $vgpr4, $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_WAITCNT 3952
; CHECK-NEXT: renamable $sgpr6_sgpr7 = V_CMP_EQ_U32_e64 0, killed $vgpr4, implicit $exec
; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr6_sgpr7, implicit-def dead $scc
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.5, implicit killed $vcc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6.bb9:
; CHECK-NEXT: successors: %bb.7(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR renamable $vgpr1, renamable $vgpr1, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s32) into %ir.arg.load, addrspace 1)
; CHECK-NEXT: renamable $vgpr4 = GLOBAL_LOAD_DWORD renamable $vgpr2_vgpr3, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) null`, addrspace 1)
; CHECK-NEXT: renamable $sgpr6_sgpr7 = S_MOV_B64 -1
; CHECK-NEXT: S_BRANCH %bb.7
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.7(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr6_sgpr7, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $vgpr4 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7.Flow:
; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.9(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $vgpr4, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr6_sgpr7, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_MOV_B64 -1
; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc
; CHECK-NEXT: renamable $sgpr6_sgpr7 = S_MOV_B64 -1
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.9, implicit killed $vcc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.8.bb11:
; CHECK-NEXT: successors: %bb.9(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $vgpr4, $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_MOV_B64 0
; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $sgpr2_sgpr3
; CHECK-NEXT: S_BRANCH %bb.9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.11.DummyReturnBlock:
; CHECK-NEXT: liveins: $sgpr20_sgpr21_sgpr22_sgpr23
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_ENDPGM 0
bb:
br label %bb2
bb2: ; preds = %bb13, %bb11, %bb
%phi = phi i32 [ 0, %bb ], [ %load14, %bb13 ], [ %load4, %bb11 ]
%xor = xor i32 %phi, 1
%zext = zext i32 %xor to i64
%getelementptr = getelementptr %struct.bar, ptr addrspace(1) null, i64 %zext
%load = load i32, ptr addrspace(1) %getelementptr, align 16
%icmp = icmp sgt i32 %load, 0
br i1 %icmp, label %bb3, label %bb13
bb3: ; preds = %bb2
%load4 = load i32, ptr addrspace(5) null, align 4
%load5 = load i32, ptr addrspace(1) null, align 4
br label %bb6
bb6: ; preds = %bb11, %bb3
%phi7 = phi i32 [ %load5, %bb3 ], [ %phi12, %bb11 ]
%icmp8 = icmp eq i32 %phi7, 0
br i1 %icmp8, label %bb11, label %bb9
bb9: ; preds = %bb6
store i32 0, ptr addrspace(1) %arg, align 4
%load10 = load i32, ptr addrspace(1) null, align 4
br label %bb11
bb11: ; preds = %bb9, %bb6
%phi12 = phi i32 [ 0, %bb6 ], [ %load10, %bb9 ]
br i1 %arg1, label %bb2, label %bb6
bb13: ; preds = %bb2
%load14 = load i32, ptr addrspace(1) null, align 4
br label %bb2
}