| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10-WGP %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s |
| |
| |
| define float @test_barrier_workgroup_local_mmra(ptr addrspace(3) noundef %x, ptr addrspace(3) noundef %y, float %val) { |
| ; GFX10-WGP-LABEL: test_barrier_workgroup_local_mmra: |
| ; GFX10-WGP: ; %bb.0: |
| ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-WGP-NEXT: ds_write_b32 v0, v2 |
| ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX10-WGP-NEXT: s_barrier |
| ; GFX10-WGP-NEXT: ds_read_b32 v0, v1 |
| ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX10-WGP-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-WGP-LABEL: test_barrier_workgroup_local_mmra: |
| ; GFX11-WGP: ; %bb.0: |
| ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-WGP-NEXT: ds_store_b32 v0, v2 |
| ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-WGP-NEXT: s_barrier |
| ; GFX11-WGP-NEXT: ds_load_b32 v0, v1 |
| ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-WGP-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-WGP-LABEL: test_barrier_workgroup_local_mmra: |
| ; GFX12-WGP: ; %bb.0: |
| ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-WGP-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-WGP-NEXT: ds_store_b32 v0, v2 |
| ; GFX12-WGP-NEXT: s_wait_dscnt 0x0 |
| ; GFX12-WGP-NEXT: s_barrier_signal -1 |
| ; GFX12-WGP-NEXT: s_barrier_wait -1 |
| ; GFX12-WGP-NEXT: ds_load_b32 v0, v1 |
| ; GFX12-WGP-NEXT: s_wait_dscnt 0x0 |
| ; GFX12-WGP-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-LABEL: test_barrier_workgroup_local_mmra: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: ds_store_b32 v0, v2 |
| ; GFX1250-NEXT: s_wait_dscnt 0x0 |
| ; GFX1250-NEXT: s_barrier_signal -1 |
| ; GFX1250-NEXT: s_barrier_wait -1 |
| ; GFX1250-NEXT: ds_load_b32 v0, v1 |
| ; GFX1250-NEXT: s_wait_dscnt 0x0 |
| ; GFX1250-NEXT: s_set_pc_i64 s[30:31] |
| store float %val, ptr addrspace(3) %x |
| fence syncscope("workgroup") release, !mmra !0 |
| tail call void @llvm.amdgcn.s.barrier() |
| fence syncscope("workgroup") acquire, !mmra !0 |
| %ret = load float, ptr addrspace(3) %y |
| ret float %ret |
| } |
| |
| define float @test_barrier_workgroup_global_mmra(ptr addrspace(1) noundef %x, ptr addrspace(1) noundef %y, float %val) { |
| ; GFX10-WGP-LABEL: test_barrier_workgroup_global_mmra: |
| ; GFX10-WGP: ; %bb.0: |
| ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-WGP-NEXT: global_store_dword v[0:1], v4, off |
| ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-WGP-NEXT: s_barrier |
| ; GFX10-WGP-NEXT: buffer_gl0_inv |
| ; GFX10-WGP-NEXT: global_load_dword v0, v[2:3], off |
| ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-WGP-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-WGP-LABEL: test_barrier_workgroup_global_mmra: |
| ; GFX11-WGP: ; %bb.0: |
| ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-WGP-NEXT: global_store_b32 v[0:1], v4, off |
| ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-WGP-NEXT: s_barrier |
| ; GFX11-WGP-NEXT: buffer_gl0_inv |
| ; GFX11-WGP-NEXT: global_load_b32 v0, v[2:3], off |
| ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-WGP-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-WGP-LABEL: test_barrier_workgroup_global_mmra: |
| ; GFX12-WGP: ; %bb.0: |
| ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-WGP-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-WGP-NEXT: global_store_b32 v[0:1], v4, off |
| ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-WGP-NEXT: s_barrier_signal -1 |
| ; GFX12-WGP-NEXT: s_barrier_wait -1 |
| ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-WGP-NEXT: global_load_b32 v0, v[2:3], off |
| ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-WGP-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-LABEL: test_barrier_workgroup_global_mmra: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: global_store_b32 v[0:1], v4, off |
| ; GFX1250-NEXT: s_wait_storecnt 0x0 |
| ; GFX1250-NEXT: s_barrier_signal -1 |
| ; GFX1250-NEXT: s_barrier_wait -1 |
| ; GFX1250-NEXT: global_load_b32 v0, v[2:3], off |
| ; GFX1250-NEXT: s_wait_loadcnt 0x0 |
| ; GFX1250-NEXT: s_set_pc_i64 s[30:31] |
| store float %val, ptr addrspace(1) %x |
| fence syncscope("workgroup") release, !mmra !1 |
| tail call void @llvm.amdgcn.s.barrier() |
| fence syncscope("workgroup") acquire, !mmra !1 |
| %ret = load float, ptr addrspace(1) %y |
| ret float %ret |
| } |
| |
| !0 = !{!"amdgpu-synchronize-as", !"local"} |
| !1 = !{!"amdgpu-synchronize-as", !"global"} |