| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 |
| # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -start-before machine-scheduler -stop-after amdgpu-wait-sgpr-hazards -o - %s | FileCheck -check-prefix=GFX11 %s |
| |
| # The following loop should only require a single s_waitcnt_depctr |
| --- |
| name: gemm_loop1 |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| ; GFX11-LABEL: name: gemm_loop1 |
| ; GFX11: bb.0: |
| ; GFX11-NEXT: successors: %bb.1(0x80000000) |
| ; GFX11-NEXT: {{ $}} |
| ; GFX11-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec |
| ; GFX11-NEXT: renamable $vgpr1 = V_MOV_B32_e32 0, implicit $exec |
| ; GFX11-NEXT: renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec |
| ; GFX11-NEXT: renamable $vgpr3 = V_MOV_B32_e32 0, implicit $exec |
| ; GFX11-NEXT: renamable $vgpr4 = V_MOV_B32_e32 0, implicit $exec |
| ; GFX11-NEXT: renamable $sgpr8 = S_MOV_B32 0 |
| ; GFX11-NEXT: renamable $sgpr9 = S_MOV_B32 0 |
| ; GFX11-NEXT: renamable $sgpr10 = S_MOV_B32 0 |
| ; GFX11-NEXT: renamable $sgpr11 = S_MOV_B32 0 |
| ; GFX11-NEXT: renamable $sgpr12 = S_MOV_B32 0 |
| ; GFX11-NEXT: renamable $sgpr13 = S_MOV_B32 0 |
| ; GFX11-NEXT: renamable $sgpr14 = S_MOV_B32 0 |
| ; GFX11-NEXT: renamable $sgpr15 = S_MOV_B32 0 |
| ; GFX11-NEXT: renamable $sgpr16 = S_MOV_B32 0 |
| ; GFX11-NEXT: renamable $sgpr17 = S_MOV_B32 0 |
| ; GFX11-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF |
| ; GFX11-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = IMPLICIT_DEF |
| ; GFX11-NEXT: {{ $}} |
| ; GFX11-NEXT: bb.1: |
| ; GFX11-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| ; GFX11-NEXT: liveins: $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 |
| ; GFX11-NEXT: {{ $}} |
| ; GFX11-NEXT: renamable $vgpr5 = V_ADD_U32_e32 $sgpr17, $vgpr0, implicit $exec |
| ; GFX11-NEXT: renamable $sgpr17 = S_ADDK_I32 killed renamable $sgpr17, 128, implicit-def dead $scc |
| ; GFX11-NEXT: S_CMP_LT_U32 renamable $sgpr17, renamable $sgpr15, implicit-def $scc |
| ; GFX11-NEXT: renamable $sgpr17 = S_CSELECT_B32 killed renamable $sgpr17, 0, implicit killed $scc |
| ; GFX11-NEXT: renamable $vgpr7 = V_ADD_U32_e32 64, $vgpr5, implicit $exec |
| ; GFX11-NEXT: renamable $sgpr18_sgpr19 = V_CMP_GT_U32_e64 $sgpr15, $vgpr5, implicit $exec |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0 |
| ; GFX11-NEXT: renamable $vgpr6 = V_ADD_U32_e32 $sgpr8, $vgpr5, implicit $exec |
| ; GFX11-NEXT: renamable $vgpr8 = V_ADD_U32_e32 $sgpr9, killed $vgpr5, implicit $exec |
| ; GFX11-NEXT: renamable $sgpr16 = nsw S_ADD_I32 killed renamable $sgpr16, -1, implicit-def dead $scc |
| ; GFX11-NEXT: renamable $sgpr20_sgpr21 = V_CMP_GT_U32_e64 $sgpr15, $vgpr7, implicit $exec |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0 |
| ; GFX11-NEXT: S_CMP_LG_U32 renamable $sgpr16, 0, implicit-def $scc |
| ; GFX11-NEXT: renamable $vgpr5 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr6, $sgpr18_sgpr19, implicit $exec |
| ; GFX11-NEXT: renamable $vgpr6 = V_ADD_U32_e32 $sgpr8, $vgpr7, implicit $exec |
| ; GFX11-NEXT: renamable $vgpr7 = V_ADD_U32_e32 $sgpr9, killed $vgpr7, implicit $exec |
| ; GFX11-NEXT: renamable $vgpr8 = V_CNDMASK_B32_e64 0, $sgpr10, 0, killed $vgpr8, killed $sgpr18_sgpr19, implicit $exec |
| ; GFX11-NEXT: renamable $vgpr6 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr6, $sgpr20_sgpr21, implicit $exec |
| ; GFX11-NEXT: renamable $vgpr7 = V_CNDMASK_B32_e64 0, $sgpr10, 0, killed $vgpr7, killed $sgpr20_sgpr21, implicit $exec |
| ; GFX11-NEXT: renamable $vgpr9 = V_ADD_U32_e32 $sgpr11, $vgpr8, implicit $exec |
| ; GFX11-NEXT: renamable $vgpr10 = V_ADD_U32_e32 $sgpr12, $vgpr8, implicit $exec |
| ; GFX11-NEXT: renamable $vgpr11 = V_ADD_U32_e32 $sgpr13, $vgpr8, implicit $exec |
| ; GFX11-NEXT: renamable $vgpr8 = V_ADD_U32_e32 $sgpr14, killed $vgpr8, implicit $exec |
| ; GFX11-NEXT: renamable $vgpr12 = V_ADD_U32_e32 $sgpr11, $vgpr7, implicit $exec |
| ; GFX11-NEXT: renamable $vgpr13 = V_ADD_U32_e32 $sgpr12, $vgpr7, implicit $exec |
| ; GFX11-NEXT: renamable $vgpr14 = V_ADD_U32_e32 $sgpr13, $vgpr7, implicit $exec |
| ; GFX11-NEXT: renamable $vgpr7 = V_ADD_U32_e32 $sgpr14, killed $vgpr7, implicit $exec |
| ; GFX11-NEXT: BUNDLE implicit-def $vgpr5, implicit-def $vgpr6, implicit killed $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec, implicit killed $vgpr6 :: (dereferenceable invariant load (s16), align 1, addrspace 8) { |
| ; GFX11-NEXT: S_CLAUSE 1 |
| ; GFX11-NEXT: renamable $vgpr5 = BUFFER_LOAD_USHORT_IDXEN killed renamable $vgpr5, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s16), align 1, addrspace 8) |
| ; GFX11-NEXT: renamable $vgpr6 = BUFFER_LOAD_USHORT_IDXEN killed renamable $vgpr6, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s16), align 1, addrspace 8) |
| ; GFX11-NEXT: } |
| ; GFX11-NEXT: BUNDLE implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr8, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr7, implicit killed $vgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec, implicit killed $vgpr10, implicit killed $vgpr11, implicit killed $vgpr8, implicit killed $vgpr12, implicit killed $vgpr13, implicit killed $vgpr14, implicit killed $vgpr7 :: (dereferenceable invariant load (s16), align 1, addrspace 8) { |
| ; GFX11-NEXT: S_CLAUSE 7 |
| ; GFX11-NEXT: renamable $vgpr9 = BUFFER_LOAD_USHORT_IDXEN killed renamable $vgpr9, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s16), align 1, addrspace 8) |
| ; GFX11-NEXT: renamable $vgpr10 = BUFFER_LOAD_USHORT_IDXEN killed renamable $vgpr10, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s16), align 1, addrspace 8) |
| ; GFX11-NEXT: renamable $vgpr11 = BUFFER_LOAD_USHORT_IDXEN killed renamable $vgpr11, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s16), align 1, addrspace 8) |
| ; GFX11-NEXT: renamable $vgpr8 = BUFFER_LOAD_USHORT_IDXEN killed renamable $vgpr8, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s16), align 1, addrspace 8) |
| ; GFX11-NEXT: renamable $vgpr12 = BUFFER_LOAD_USHORT_IDXEN killed renamable $vgpr12, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s16), align 1, addrspace 8) |
| ; GFX11-NEXT: renamable $vgpr13 = BUFFER_LOAD_USHORT_IDXEN killed renamable $vgpr13, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s16), align 1, addrspace 8) |
| ; GFX11-NEXT: renamable $vgpr14 = BUFFER_LOAD_USHORT_IDXEN killed renamable $vgpr14, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s16), align 1, addrspace 8) |
| ; GFX11-NEXT: renamable $vgpr7 = BUFFER_LOAD_USHORT_IDXEN killed renamable $vgpr7, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s16), align 1, addrspace 8) |
| ; GFX11-NEXT: } |
| ; GFX11-NEXT: S_WAITCNT 9207 |
| ; GFX11-NEXT: renamable $vgpr5 = V_PERM_B32_e64 killed $vgpr6, killed $vgpr5, 84148480, implicit $exec |
| ; GFX11-NEXT: S_WAITCNT 4087 |
| ; GFX11-NEXT: renamable $vgpr6 = V_PERM_B32_e64 killed $vgpr12, killed $vgpr9, 84148480, implicit $exec |
| ; GFX11-NEXT: S_WAITCNT 3063 |
| ; GFX11-NEXT: renamable $vgpr9 = V_PERM_B32_e64 killed $vgpr13, killed $vgpr10, 84148480, implicit $exec |
| ; GFX11-NEXT: S_WAITCNT 2039 |
| ; GFX11-NEXT: renamable $vgpr10 = V_PERM_B32_e64 killed $vgpr14, killed $vgpr11, 84148480, implicit $exec |
| ; GFX11-NEXT: S_WAITCNT 1015 |
| ; GFX11-NEXT: renamable $vgpr7 = V_PERM_B32_e64 killed $vgpr7, killed $vgpr8, 84148480, implicit $exec |
| ; GFX11-NEXT: renamable $vgpr1 = nofpexcept V_DOT2_F32_F16 8, $vgpr5, 8, killed $vgpr6, 8, killed $vgpr1, -1, 0, 0, 0, 0, implicit $mode, implicit $exec |
| ; GFX11-NEXT: renamable $vgpr2 = nofpexcept V_DOT2_F32_F16 8, $vgpr5, 8, killed $vgpr9, 8, killed $vgpr2, -1, 0, 0, 0, 0, implicit $mode, implicit $exec |
| ; GFX11-NEXT: renamable $vgpr3 = nofpexcept V_DOT2_F32_F16 8, $vgpr5, 8, killed $vgpr10, 8, killed $vgpr3, -1, 0, 0, 0, 0, implicit $mode, implicit $exec |
| ; GFX11-NEXT: renamable $vgpr4 = nofpexcept V_DOT2_F32_F16 8, killed $vgpr5, 8, killed $vgpr7, 8, killed $vgpr4, -1, 0, 0, 0, 0, implicit $mode, implicit $exec |
| ; GFX11-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc |
| ; GFX11-NEXT: {{ $}} |
| ; GFX11-NEXT: bb.2: |
| ; GFX11-NEXT: S_ENDPGM 0 |
| bb.0: |
| %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| %1:sreg_32 = S_MOV_B32 0 |
| %2:sreg_32 = S_MOV_B32 0 |
| %3:sreg_32 = S_MOV_B32 0 |
| %4:sgpr_128 = IMPLICIT_DEF |
| %5:sreg_32 = S_MOV_B32 0 |
| %6:sgpr_128 = IMPLICIT_DEF |
| %7:sreg_32 = S_MOV_B32 0 |
| %8:sreg_32 = S_MOV_B32 0 |
| %9:sreg_32 = S_MOV_B32 0 |
| %10:sgpr_32 = S_MOV_B32 0 |
| %11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| %12:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| %13:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| %14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| %15:sreg_32 = S_MOV_B32 0 |
| %16:sreg_32 = S_MOV_B32 0 |
| S_BRANCH %bb.1 |
| |
| bb.1: |
| successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| |
| %17:vgpr_32 = V_ADD_U32_e64 %16, %0, 0, implicit $exec |
| %18:vgpr_32 = V_ADD_U32_e64 %1, %17, 0, implicit $exec |
| %19:sreg_64_xexec = V_CMP_GT_U32_e64 %10, %17, implicit $exec |
| %20:vgpr_32 = V_CNDMASK_B32_e64 0, -1, 0, %18, %19, implicit $exec |
| %21:vgpr_32 = V_ADD_U32_e64 %2, %17, 0, implicit $exec |
| %22:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %21, %19, implicit $exec |
| %23:vgpr_32 = BUFFER_LOAD_USHORT_IDXEN %20, %4, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s16), align 1, addrspace 8) |
| %24:vgpr_32 = V_ADD_U32_e64 %5, %22, 0, implicit $exec |
| %25:vgpr_32 = BUFFER_LOAD_USHORT_IDXEN %24, %6, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s16), align 1, addrspace 8) |
| %26:vgpr_32 = V_ADD_U32_e64 %7, %22, 0, implicit $exec |
| %27:vgpr_32 = BUFFER_LOAD_USHORT_IDXEN %26, %6, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s16), align 1, addrspace 8) |
| %28:vgpr_32 = V_ADD_U32_e64 %8, %22, 0, implicit $exec |
| %29:vgpr_32 = BUFFER_LOAD_USHORT_IDXEN %28, %6, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s16), align 1, addrspace 8) |
| %30:vgpr_32 = V_ADD_U32_e64 %9, %22, 0, implicit $exec |
| %31:vgpr_32 = BUFFER_LOAD_USHORT_IDXEN %30, %6, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s16), align 1, addrspace 8) |
| %32:vgpr_32 = V_ADD_U32_e64 64, %17, 0, implicit $exec |
| %33:vgpr_32 = V_ADD_U32_e64 %1, %32, 0, implicit $exec |
| %34:sreg_64_xexec = V_CMP_GT_U32_e64 %10, %32, implicit $exec |
| %35:vgpr_32 = V_CNDMASK_B32_e64 0, -1, 0, %33, %34, implicit $exec |
| %36:vgpr_32 = V_ADD_U32_e64 %2, %32, 0, implicit $exec |
| %37:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %36, %34, implicit $exec |
| %38:vgpr_32 = BUFFER_LOAD_USHORT_IDXEN %35, %4, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s16), align 1, addrspace 8) |
| %39:vgpr_32 = V_ADD_U32_e64 %5, %37, 0, implicit $exec |
| %40:vgpr_32 = BUFFER_LOAD_USHORT_IDXEN %39, %6, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s16), align 1, addrspace 8) |
| %41:vgpr_32 = V_ADD_U32_e64 %7, %37, 0, implicit $exec |
| %42:vgpr_32 = BUFFER_LOAD_USHORT_IDXEN %41, %6, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s16), align 1, addrspace 8) |
| %43:vgpr_32 = V_ADD_U32_e64 %8, %37, 0, implicit $exec |
| %44:vgpr_32 = BUFFER_LOAD_USHORT_IDXEN %43, %6, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s16), align 1, addrspace 8) |
| %45:vgpr_32 = V_ADD_U32_e64 %9, %37, 0, implicit $exec |
| %46:vgpr_32 = BUFFER_LOAD_USHORT_IDXEN %45, %6, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s16), align 1, addrspace 8) |
| %47:vgpr_32 = V_PERM_B32_e64 %38, %23, 84148480, implicit $exec |
| %48:vgpr_32 = V_PERM_B32_e64 %40, %25, 84148480, implicit $exec |
| %11:vgpr_32 = nofpexcept V_DOT2_F32_F16 8, %47, 8, %48, 8, %11, -1, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %49:vgpr_32 = V_PERM_B32_e64 %42, %27, 84148480, implicit $exec |
| %12:vgpr_32 = nofpexcept V_DOT2_F32_F16 8, %47, 8, %49, 8, %12, -1, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %50:vgpr_32 = V_PERM_B32_e64 %44, %29, 84148480, implicit $exec |
| %13:vgpr_32 = nofpexcept V_DOT2_F32_F16 8, %47, 8, %50, 8, %13, -1, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %51:vgpr_32 = V_PERM_B32_e64 %46, %31, 84148480, implicit $exec |
| %14:vgpr_32 = nofpexcept V_DOT2_F32_F16 8, %47, 8, %51, 8, %14, -1, 0, 0, 0, 0, implicit $mode, implicit $exec |
| %52:sreg_32 = S_ADD_I32 %16, 128, implicit-def dead $scc |
| S_CMP_LT_U32 %52, %10, implicit-def $scc |
| %16:sreg_32 = S_CSELECT_B32 %52, 0, implicit killed $scc |
| %15:sreg_32 = nsw S_ADD_I32 %15, -1, implicit-def dead $scc |
| S_CMP_LG_U32 %15, 0, implicit-def $scc |
| S_CBRANCH_SCC1 %bb.1, implicit killed $scc |
| S_BRANCH %bb.2 |
| |
| bb.2: |
| S_ENDPGM 0 |
| ... |