| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 |
| # RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GCN %s |
| |
| |
| # Expected vmcnt(0) since the direct load is the only load. |
| --- |
| name: dma_then_fence |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: dma_then_fence |
| ; GCN: S_WAITCNT 0 |
| ; GCN-NEXT: $m0 = S_MOV_B32 0 |
| ; GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4, addrspace 1), (store (s32) into `ptr addrspace(3) poison` + 4, addrspace 3) |
| ; GCN-NEXT: S_WAITCNT 3952 |
| ; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec |
| ; GCN-NEXT: S_ENDPGM 0 |
| $m0 = S_MOV_B32 0 |
| BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4) |
| S_WAITCNT_lds_direct |
| $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec |
| S_ENDPGM 0 |
| |
| ... |
| |
| # Expected vmcnt(1) since the global load is not processed by SIInsertWaitcnts. |
| |
| --- |
| name: dma_then_global_load |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: dma_then_global_load |
| ; GCN: S_WAITCNT 0 |
| ; GCN-NEXT: $m0 = S_MOV_B32 0 |
| ; GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4, addrspace 1), (store (s32) into `ptr addrspace(3) poison` + 4, addrspace 3) |
| ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec |
| ; GCN-NEXT: S_WAITCNT 3953 |
| ; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec |
| ; GCN-NEXT: S_ENDPGM 0 |
| $m0 = S_MOV_B32 0 |
| BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4) |
| $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec |
| S_WAITCNT_lds_direct |
| $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec |
| S_ENDPGM 0 |
| |
| ... |
| |
| # Expected no vmcnt since there is no direct load to LDS, and the global load is not processed by SIInsertWaitcnts. |
| |
| --- |
| name: no_dma_just_fence |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: no_dma_just_fence |
| ; GCN: S_WAITCNT 0 |
| ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec |
| ; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec |
| ; GCN-NEXT: S_ENDPGM 0 |
| $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec |
| S_WAITCNT_lds_direct |
| $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec |
| S_ENDPGM 0 |
| |
| ... |
| |
| # Expected vmcnt(1) since the global load is not processed by SIInsertWaitcnts. |
| |
| --- |
| name: dma_then_system_fence |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: dma_then_system_fence |
| ; GCN: S_WAITCNT 0 |
| ; GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4, addrspace 1), (store (s32) into `ptr addrspace(3) poison` + 4, addrspace 3) |
| ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec |
| ; GCN-NEXT: S_WAITCNT 3953 |
| ; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec |
| ; GCN-NEXT: S_ENDPGM 0 |
| BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4) |
| $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec |
| S_WAITCNT_lds_direct |
| $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec |
| S_ENDPGM 0 |
| |
| ... |
| |
| # The computed vmcnt(1) gets merged with the existing vmcnt(0). |
| |
| --- |
| name: merge_with_prev_wait |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: merge_with_prev_wait |
| ; GCN: S_WAITCNT 0 |
| ; GCN-NEXT: $m0 = S_MOV_B32 0 |
| ; GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4, addrspace 1), (store (s32) into `ptr addrspace(3) poison` + 4, addrspace 3) |
| ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec |
| ; GCN-NEXT: S_WAITCNT 3952 |
| ; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec |
| ; GCN-NEXT: S_ENDPGM 0 |
| $m0 = S_MOV_B32 0 |
| BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4) |
| $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec |
| S_WAITCNT 3952 |
| S_WAITCNT_lds_direct |
| $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec |
| S_ENDPGM 0 |
| |
| ... |
| |
| # The computed vmcnt(1) gets merged with the existing vmcnt(0). |
| |
| --- |
| name: merge_with_next_wait |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: merge_with_next_wait |
| ; GCN: S_WAITCNT 0 |
| ; GCN-NEXT: $m0 = S_MOV_B32 0 |
| ; GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4, addrspace 1), (store (s32) into `ptr addrspace(3) poison` + 4, addrspace 3) |
| ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec |
| ; GCN-NEXT: S_WAITCNT 3952 |
| ; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec |
| ; GCN-NEXT: S_ENDPGM 0 |
| $m0 = S_MOV_B32 0 |
| BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4) |
| $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec |
| S_WAITCNT_lds_direct |
| S_WAITCNT 3952 |
| $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec |
| S_ENDPGM 0 |
| |
| ... |