blob: 1e6dc4e06ef4dbe3585246fbfb200ab5908adaba [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define float @test_barrier_workgroup_local_mmra(ptr addrspace(3) noundef %x, ptr addrspace(3) noundef %y, float %val) {
; GFX10-WGP-LABEL: test_barrier_workgroup_local_mmra:
; GFX10-WGP: ; %bb.0:
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: ds_write_b32 v0, v2
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_barrier
; GFX10-WGP-NEXT: ds_read_b32 v0, v1
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-WGP-LABEL: test_barrier_workgroup_local_mmra:
; GFX11-WGP: ; %bb.0:
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-WGP-NEXT: ds_store_b32 v0, v2
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: s_barrier
; GFX11-WGP-NEXT: ds_load_b32 v0, v1
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-WGP-LABEL: test_barrier_workgroup_local_mmra:
; GFX12-WGP: ; %bb.0:
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-WGP-NEXT: s_wait_expcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: ds_store_b32 v0, v2
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
; GFX12-WGP-NEXT: s_barrier_signal -1
; GFX12-WGP-NEXT: s_barrier_wait -1
; GFX12-WGP-NEXT: ds_load_b32 v0, v1
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
; GFX12-WGP-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: test_barrier_workgroup_local_mmra:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: ds_store_b32 v0, v2
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_barrier_signal -1
; GFX1250-NEXT: s_barrier_wait -1
; GFX1250-NEXT: ds_load_b32 v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
store float %val, ptr addrspace(3) %x
fence syncscope("workgroup") release, !mmra !0
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire, !mmra !0
%ret = load float, ptr addrspace(3) %y
ret float %ret
}
define float @test_barrier_workgroup_global_mmra(ptr addrspace(1) noundef %x, ptr addrspace(1) noundef %y, float %val) {
; GFX10-WGP-LABEL: test_barrier_workgroup_global_mmra:
; GFX10-WGP: ; %bb.0:
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: global_store_dword v[0:1], v4, off
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_barrier
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: global_load_dword v0, v[2:3], off
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX10-WGP-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-WGP-LABEL: test_barrier_workgroup_global_mmra:
; GFX11-WGP: ; %bb.0:
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v[0:1], v4, off
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: s_barrier
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: global_load_b32 v0, v[2:3], off
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
; GFX11-WGP-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-WGP-LABEL: test_barrier_workgroup_global_mmra:
; GFX12-WGP: ; %bb.0:
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-WGP-NEXT: s_wait_expcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: global_store_b32 v[0:1], v4, off
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
; GFX12-WGP-NEXT: s_barrier_signal -1
; GFX12-WGP-NEXT: s_barrier_wait -1
; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
; GFX12-WGP-NEXT: global_load_b32 v0, v[2:3], off
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
; GFX12-WGP-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: test_barrier_workgroup_global_mmra:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_store_b32 v[0:1], v4, off
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_barrier_signal -1
; GFX1250-NEXT: s_barrier_wait -1
; GFX1250-NEXT: global_load_b32 v0, v[2:3], off
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
store float %val, ptr addrspace(1) %x
fence syncscope("workgroup") release, !mmra !1
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire, !mmra !1
%ret = load float, ptr addrspace(1) %y
ret float %ret
}
!0 = !{!"amdgpu-synchronize-as", !"local"}
!1 = !{!"amdgpu-synchronize-as", !"global"}