| ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -stop-after=si-insert-waitcnts < %s | FileCheck %s |
| |
| ; Testcase reduced from Blender 4.1 where we generated incorrect waitcnts due to a bad |
| ; WaitcntBrackets::merge implementation. |
| |
| %struct.bar = type { %struct.bar.0 } |
| %struct.bar.0 = type { float, float, float, float } |
| |
| define amdgpu_kernel void @widget(ptr addrspace(1) %arg, i1 %arg1) { |
| ; CHECK-LABEL: name: widget |
| ; CHECK: bb.0.bb: |
| ; CHECK-NEXT: successors: %bb.1(0x80000000) |
| ; CHECK-NEXT: liveins: $sgpr8_sgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr17 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $sgpr22_sgpr23 = S_MOV_B64 $sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $sgpr20_sgpr21_sgpr22_sgpr23 |
| ; CHECK-NEXT: $sgpr20_sgpr21 = S_MOV_B64 killed $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-NEXT: renamable $sgpr2 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 8, 0 :: (dereferenceable invariant load (s32) from %ir.arg1.kernarg.offset.align.down, align 8, addrspace 4) |
| ; CHECK-NEXT: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s64) from %ir.arg.kernarg.offset1, align 16, addrspace 4) |
| ; CHECK-NEXT: $sgpr20 = S_ADD_U32 $sgpr20, killed $sgpr17, implicit-def $scc, implicit-def $sgpr20_sgpr21_sgpr22_sgpr23 |
| ; CHECK-NEXT: $sgpr21 = S_ADDC_U32 $sgpr21, 0, implicit-def dead $scc, implicit killed $scc, implicit-def $sgpr20_sgpr21_sgpr22_sgpr23 |
| ; CHECK-NEXT: renamable $vgpr1 = V_MOV_B32_e32 0, implicit $exec |
| ; CHECK-NEXT: S_WAITCNT 49279 |
| ; CHECK-NEXT: S_BITCMP1_B32 killed renamable $sgpr2, 0, implicit-def $scc |
| ; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_MOV_B64 0 |
| ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec, implicit $exec |
| ; CHECK-NEXT: renamable $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit killed $scc |
| ; CHECK-NEXT: $vgpr2_vgpr3 = V_PK_MOV_B32 8, 0, 8, 0, 0, 0, 0, 0, 0, implicit $exec |
| ; CHECK-NEXT: renamable $sgpr6_sgpr7 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_BRANCH %bb.1 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.10.loop.exit.guard: |
| ; CHECK-NEXT: successors: %bb.11(0x04000000), %bb.1(0x7c000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc |
| ; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_MOV_B64 0 |
| ; CHECK-NEXT: renamable $sgpr6_sgpr7 = IMPLICIT_DEF |
| ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.11, implicit killed $vcc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1.bb2: |
| ; CHECK-NEXT: successors: %bb.12(0x40000000), %bb.2(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: S_WAITCNT 3952 |
| ; CHECK-NEXT: renamable $vgpr0 = V_XOR_B32_e32 1, killed $vgpr0, implicit $exec |
| ; CHECK-NEXT: renamable $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr0_vgpr1, implicit $exec |
| ; CHECK-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr4_vgpr5, 0, 0, implicit $exec :: (load (s32) from %ir.getelementptr, align 16, addrspace 1) |
| ; CHECK-NEXT: renamable $sgpr6_sgpr7 = S_OR_B64 killed renamable $sgpr6_sgpr7, $exec, implicit-def dead $scc |
| ; CHECK-NEXT: S_WAITCNT 3952 |
| ; CHECK-NEXT: V_CMP_GT_I32_e32 1, killed $vgpr0, implicit-def $vcc, implicit $exec |
| ; CHECK-NEXT: renamable $vgpr0 = IMPLICIT_DEF |
| ; CHECK-NEXT: $sgpr8_sgpr9 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec |
| ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.12.bb13: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD renamable $vgpr2_vgpr3, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) null`, addrspace 1) |
| ; CHECK-NEXT: renamable $sgpr6_sgpr7 = S_ANDN2_B64 killed renamable $sgpr6_sgpr7, $exec, implicit-def dead $scc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2.Flow3: |
| ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr8_sgpr9, implicit-def $scc |
| ; CHECK-NEXT: renamable $sgpr8_sgpr9 = S_AND_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc |
| ; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_OR_B64 killed renamable $sgpr8_sgpr9, killed renamable $sgpr4_sgpr5, implicit-def $scc |
| ; CHECK-NEXT: $exec = S_ANDN2_B64 $exec, renamable $sgpr4_sgpr5, implicit-def $scc |
| ; CHECK-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.3.bb3: |
| ; CHECK-NEXT: successors: %bb.4(0x80000000) |
| ; CHECK-NEXT: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc |
| ; CHECK-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr20_sgpr21_sgpr22_sgpr23, 0, 0, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(5) null`, addrspace 5) |
| ; CHECK-NEXT: renamable $vgpr4 = GLOBAL_LOAD_DWORD renamable $vgpr2_vgpr3, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) null`, addrspace 1) |
| ; CHECK-NEXT: S_BRANCH %bb.4 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.9.Flow2: |
| ; CHECK-NEXT: successors: %bb.10(0x04000000), %bb.4(0x7c000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $vgpr4, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc |
| ; CHECK-NEXT: S_CBRANCH_VCCZ %bb.10, implicit killed $vcc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4.bb6: |
| ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $vgpr4, $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: S_WAITCNT 3952 |
| ; CHECK-NEXT: renamable $sgpr6_sgpr7 = V_CMP_EQ_U32_e64 0, killed $vgpr4, implicit $exec |
| ; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr6_sgpr7, implicit-def dead $scc |
| ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.5, implicit killed $vcc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.6.bb9: |
| ; CHECK-NEXT: successors: %bb.7(0x80000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR renamable $vgpr1, renamable $vgpr1, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s32) into %ir.arg.load, addrspace 1) |
| ; CHECK-NEXT: renamable $vgpr4 = GLOBAL_LOAD_DWORD renamable $vgpr2_vgpr3, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) null`, addrspace 1) |
| ; CHECK-NEXT: renamable $sgpr6_sgpr7 = S_MOV_B64 -1 |
| ; CHECK-NEXT: S_BRANCH %bb.7 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5: |
| ; CHECK-NEXT: successors: %bb.7(0x80000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr6_sgpr7, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: renamable $vgpr4 = V_MOV_B32_e32 0, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.7.Flow: |
| ; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.9(0x40000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $vgpr4, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr6_sgpr7, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_MOV_B64 -1 |
| ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc |
| ; CHECK-NEXT: renamable $sgpr6_sgpr7 = S_MOV_B64 -1 |
| ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.9, implicit killed $vcc |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.8.bb11: |
| ; CHECK-NEXT: successors: %bb.9(0x80000000) |
| ; CHECK-NEXT: liveins: $vgpr0, $vgpr4, $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_MOV_B64 0 |
| ; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $sgpr2_sgpr3 |
| ; CHECK-NEXT: S_BRANCH %bb.9 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.11.DummyReturnBlock: |
| ; CHECK-NEXT: liveins: $sgpr20_sgpr21_sgpr22_sgpr23 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: S_ENDPGM 0 |
| bb: |
| br label %bb2 |
| |
| bb2: ; preds = %bb13, %bb11, %bb |
| %phi = phi i32 [ 0, %bb ], [ %load14, %bb13 ], [ %load4, %bb11 ] |
| %xor = xor i32 %phi, 1 |
| %zext = zext i32 %xor to i64 |
| %getelementptr = getelementptr %struct.bar, ptr addrspace(1) null, i64 %zext |
| %load = load i32, ptr addrspace(1) %getelementptr, align 16 |
| %icmp = icmp sgt i32 %load, 0 |
| br i1 %icmp, label %bb3, label %bb13 |
| |
| bb3: ; preds = %bb2 |
| %load4 = load i32, ptr addrspace(5) null, align 4 |
| %load5 = load i32, ptr addrspace(1) null, align 4 |
| br label %bb6 |
| |
| bb6: ; preds = %bb11, %bb3 |
| %phi7 = phi i32 [ %load5, %bb3 ], [ %phi12, %bb11 ] |
| %icmp8 = icmp eq i32 %phi7, 0 |
| br i1 %icmp8, label %bb11, label %bb9 |
| |
| bb9: ; preds = %bb6 |
| store i32 0, ptr addrspace(1) %arg, align 4 |
| %load10 = load i32, ptr addrspace(1) null, align 4 |
| br label %bb11 |
| |
| bb11: ; preds = %bb9, %bb6 |
| %phi12 = phi i32 [ 0, %bb6 ], [ %load10, %bb9 ] |
| br i1 %arg1, label %bb2, label %bb6 |
| |
| bb13: ; preds = %bb2 |
| %load14 = load i32, ptr addrspace(1) null, align 4 |
| br label %bb2 |
| } |