| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s |
| ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10 %s |
| ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX11 %s |
| ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12 %s |
| |
| ; Test using saddr addressing mode of global_* flat atomic instructions. |
| |
| define amdgpu_ps void @global_xchg_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_xchg_saddr_i32_nortn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_swap v0, v1, s[2:3] |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_xchg_saddr_i32_nortn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_swap v0, v1, s[2:3] |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_xchg_saddr_i32_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_swap_b32 v0, v1, s[2:3] |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_xchg_saddr_i32_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_swap_b32 v0, v1, s[2:3] scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw xchg ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| ; Maximum positive offset on gfx10 |
| define amdgpu_ps void @global_xchg_saddr_i32_nortn_offset_2047(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_xchg_saddr_i32_nortn_offset_2047: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_swap v0, v1, s[2:3] offset:2047 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_xchg_saddr_i32_nortn_offset_2047: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_swap v0, v1, s[2:3] offset:2047 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_xchg_saddr_i32_nortn_offset_2047: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_swap_b32 v0, v1, s[2:3] offset:2047 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_xchg_saddr_i32_nortn_offset_2047: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_swap_b32 v0, v1, s[2:3] offset:2047 scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2047 |
| %unused = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| ; Maximum negative offset on gfx10 |
| define amdgpu_ps void @global_xchg_saddr_i32_nortn_offset_neg2048(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_xchg_saddr_i32_nortn_offset_neg2048: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_swap v0, v1, s[2:3] offset:-2048 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_xchg_saddr_i32_nortn_offset_neg2048: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_swap v0, v1, s[2:3] offset:-2048 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_xchg_saddr_i32_nortn_offset_neg2048: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_swap_b32 v0, v1, s[2:3] offset:-2048 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_xchg_saddr_i32_nortn_offset_neg2048: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_swap_b32 v0, v1, s[2:3] offset:-2048 scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2048 |
| %unused = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps float @global_xchg_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_xchg_saddr_i32_rtn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_swap v0, v0, v1, s[2:3] glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_xchg_saddr_i32_rtn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_swap v0, v0, v1, s[2:3] glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_xchg_saddr_i32_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_swap_b32 v0, v0, v1, s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_xchg_saddr_i32_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_swap_b32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw xchg ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps float @global_xchg_saddr_i32_rtn_2048(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_xchg_saddr_i32_rtn_2048: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_swap v0, v0, v1, s[2:3] offset:2048 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_xchg_saddr_i32_rtn_2048: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 |
| ; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s[0:1], s3, 0, s[0:1] |
| ; GFX10-NEXT: v_add_co_u32 v2, vcc, 0x800, v0 |
| ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX10-NEXT: global_atomic_swap v0, v[2:3], v1, off glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_xchg_saddr_i32_rtn_2048: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_swap_b32 v0, v0, v1, s[2:3] offset:2048 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_xchg_saddr_i32_rtn_2048: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_swap_b32 v0, v0, v1, s[2:3] offset:2048 th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2048 |
| %rtn = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps float @global_xchg_saddr_i32_rtn_neg2048(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_xchg_saddr_i32_rtn_neg2048: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_swap v0, v0, v1, s[2:3] offset:-2048 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_xchg_saddr_i32_rtn_neg2048: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_swap v0, v0, v1, s[2:3] offset:-2048 glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_xchg_saddr_i32_rtn_neg2048: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_swap_b32 v0, v0, v1, s[2:3] offset:-2048 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_xchg_saddr_i32_rtn_neg2048: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_swap_b32 v0, v0, v1, s[2:3] offset:-2048 th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2048 |
| %rtn = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| ; -------------------------------------------------------------------------------- |
| ; Uniformity edge cases |
| ; -------------------------------------------------------------------------------- |
| |
| @ptr.in.lds = internal addrspace(3) global ptr addrspace(1) poison |
| |
| ; Base pointer is uniform, but also in VGPRs |
| define amdgpu_ps float @global_xchg_saddr_uniform_ptr_in_vgprs_rtn(i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_rtn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX9-NEXT: ds_read_b64 v[2:3], v2 |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX9-NEXT: v_readfirstlane_b32 s1, v3 |
| ; GFX9-NEXT: s_nop 4 |
| ; GFX9-NEXT: global_atomic_swap v0, v0, v1, s[0:1] glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_rtn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX10-NEXT: ds_read_b64 v[2:3], v2 |
| ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX10-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX10-NEXT: v_readfirstlane_b32 s1, v3 |
| ; GFX10-NEXT: global_atomic_swap v0, v0, v1, s[0:1] glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX11-NEXT: ds_load_b64 v[2:3], v2 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX11-NEXT: v_readfirstlane_b32 s1, v3 |
| ; GFX11-NEXT: global_atomic_swap_b32 v0, v0, v1, s[0:1] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX12-NEXT: ds_load_b64 v[2:3], v2 |
| ; GFX12-NEXT: s_wait_dscnt 0x0 |
| ; GFX12-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX12-NEXT: v_readfirstlane_b32 s1, v3 |
| ; GFX12-NEXT: global_atomic_swap_b32 v0, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw xchg ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| ; Base pointer is uniform, but also in VGPRs, with imm offset |
| define amdgpu_ps float @global_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset(i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX9-NEXT: ds_read_b64 v[2:3], v2 |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX9-NEXT: v_readfirstlane_b32 s1, v3 |
| ; GFX9-NEXT: s_nop 4 |
| ; GFX9-NEXT: global_atomic_swap v0, v0, v1, s[0:1] offset:42 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX10-NEXT: ds_read_b64 v[2:3], v2 |
| ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX10-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX10-NEXT: v_readfirstlane_b32 s1, v3 |
| ; GFX10-NEXT: global_atomic_swap v0, v0, v1, s[0:1] offset:42 glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX11-NEXT: ds_load_b64 v[2:3], v2 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX11-NEXT: v_readfirstlane_b32 s1, v3 |
| ; GFX11-NEXT: global_atomic_swap_b32 v0, v0, v1, s[0:1] offset:42 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX12-NEXT: ds_load_b64 v[2:3], v2 |
| ; GFX12-NEXT: s_wait_dscnt 0x0 |
| ; GFX12-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX12-NEXT: v_readfirstlane_b32 s1, v3 |
| ; GFX12-NEXT: global_atomic_swap_b32 v0, v0, v1, s[0:1] offset:42 th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 42 |
| %rtn = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| ; Base pointer is uniform, but also in VGPRs |
| define amdgpu_ps void @global_xchg_saddr_uniform_ptr_in_vgprs_nortn(i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_nortn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX9-NEXT: ds_read_b64 v[2:3], v2 |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX9-NEXT: v_readfirstlane_b32 s1, v3 |
| ; GFX9-NEXT: s_nop 4 |
| ; GFX9-NEXT: global_atomic_swap v0, v1, s[0:1] |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_nortn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX10-NEXT: ds_read_b64 v[2:3], v2 |
| ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX10-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX10-NEXT: v_readfirstlane_b32 s1, v3 |
| ; GFX10-NEXT: global_atomic_swap v0, v1, s[0:1] |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX11-NEXT: ds_load_b64 v[2:3], v2 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX11-NEXT: v_readfirstlane_b32 s1, v3 |
| ; GFX11-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX12-NEXT: ds_load_b64 v[2:3], v2 |
| ; GFX12-NEXT: s_wait_dscnt 0x0 |
| ; GFX12-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX12-NEXT: v_readfirstlane_b32 s1, v3 |
| ; GFX12-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw xchg ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| ; Base pointer is uniform, but also in VGPRs, with imm offset |
| define amdgpu_ps void @global_xchg_saddr_uniform_ptr_in_vgprs_nortn_immoffset(i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_nortn_immoffset: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX9-NEXT: ds_read_b64 v[2:3], v2 |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX9-NEXT: v_readfirstlane_b32 s1, v3 |
| ; GFX9-NEXT: s_nop 4 |
| ; GFX9-NEXT: global_atomic_swap v0, v1, s[0:1] offset:42 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_nortn_immoffset: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX10-NEXT: ds_read_b64 v[2:3], v2 |
| ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX10-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX10-NEXT: v_readfirstlane_b32 s1, v3 |
| ; GFX10-NEXT: global_atomic_swap v0, v1, s[0:1] offset:42 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_nortn_immoffset: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX11-NEXT: ds_load_b64 v[2:3], v2 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX11-NEXT: v_readfirstlane_b32 s1, v3 |
| ; GFX11-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] offset:42 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_nortn_immoffset: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX12-NEXT: ds_load_b64 v[2:3], v2 |
| ; GFX12-NEXT: s_wait_dscnt 0x0 |
| ; GFX12-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX12-NEXT: v_readfirstlane_b32 s1, v3 |
| ; GFX12-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] offset:42 scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 42 |
| %unused = atomicrmw xchg ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------------------- |
| ; All atomicrmw ops |
| ; -------------------------------------------------------------------------------- |
| |
| ; -------------------------------------------------------------------------------- |
| ; atomicrmw xchg |
| ; -------------------------------------------------------------------------------- |
| |
| define amdgpu_ps <2 x float> @global_xchg_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_xchg_saddr_i64_rtn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_swap_x2 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_xchg_saddr_i64_rtn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_swap_x2 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_xchg_saddr_i64_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_swap_b64 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_xchg_saddr_i64_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_swap_b64 v[0:1], v0, v[1:2], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw xchg ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps <2 x float> @global_xchg_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_xchg_saddr_i64_rtn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_swap_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_xchg_saddr_i64_rtn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_swap_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_xchg_saddr_i64_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_swap_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_xchg_saddr_i64_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_swap_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %rtn = atomicrmw xchg ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_xchg_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_xchg_saddr_i64_nortn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_swap_x2 v0, v[1:2], s[2:3] |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_xchg_saddr_i64_nortn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_swap_x2 v0, v[1:2], s[2:3] |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_xchg_saddr_i64_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_swap_b64 v0, v[1:2], s[2:3] |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_xchg_saddr_i64_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_swap_b64 v0, v[1:2], s[2:3] scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw xchg ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps void @global_xchg_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_xchg_saddr_i64_nortn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_swap_x2 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_xchg_saddr_i64_nortn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_swap_x2 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_xchg_saddr_i64_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_swap_b64 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_xchg_saddr_i64_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_swap_b64 v0, v[1:2], s[2:3] offset:-128 scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = atomicrmw xchg ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------------------- |
| ; atomicrmw add |
| ; -------------------------------------------------------------------------------- |
| |
| define amdgpu_ps float @global_add_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_add_saddr_i32_rtn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_add v0, v0, v1, s[2:3] glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_add_saddr_i32_rtn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_add v0, v0, v1, s[2:3] glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_add_saddr_i32_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_add_u32 v0, v0, v1, s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_add_saddr_i32_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_add_u32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw add ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps float @global_add_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_add_saddr_i32_rtn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_add v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_add_saddr_i32_rtn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_add v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_add_saddr_i32_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_add_u32 v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_add_saddr_i32_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_add_u32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %rtn = atomicrmw add ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_add_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_add_saddr_i32_nortn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_add v0, v1, s[2:3] |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_add_saddr_i32_nortn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_add v0, v1, s[2:3] |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_add_saddr_i32_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_add_u32 v0, v1, s[2:3] |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_add_saddr_i32_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_add_u32 v0, v1, s[2:3] scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw add ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps void @global_add_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_add_saddr_i32_nortn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_add v0, v1, s[2:3] offset:-128 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_add_saddr_i32_nortn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_add v0, v1, s[2:3] offset:-128 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_add_saddr_i32_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_add_u32 v0, v1, s[2:3] offset:-128 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_add_saddr_i32_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_add_u32 v0, v1, s[2:3] offset:-128 scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = atomicrmw add ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps <2 x float> @global_add_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_add_saddr_i64_rtn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_add_x2 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_add_saddr_i64_rtn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_add_x2 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_add_saddr_i64_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_add_u64 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_add_saddr_i64_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_add_u64 v[0:1], v0, v[1:2], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw add ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps <2 x float> @global_add_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_add_saddr_i64_rtn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_add_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_add_saddr_i64_rtn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_add_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_add_saddr_i64_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_add_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_add_saddr_i64_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_add_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %rtn = atomicrmw add ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_add_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_add_saddr_i64_nortn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_add_x2 v0, v[1:2], s[2:3] |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_add_saddr_i64_nortn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_add_x2 v0, v[1:2], s[2:3] |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_add_saddr_i64_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_add_u64 v0, v[1:2], s[2:3] |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_add_saddr_i64_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_add_u64 v0, v[1:2], s[2:3] scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw add ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps void @global_add_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_add_saddr_i64_nortn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_add_x2 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_add_saddr_i64_nortn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_add_x2 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_add_saddr_i64_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_add_u64 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_add_saddr_i64_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_add_u64 v0, v[1:2], s[2:3] offset:-128 scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = atomicrmw add ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------------------- |
| ; atomicrmw sub |
| ; -------------------------------------------------------------------------------- |
| |
| define amdgpu_ps float @global_sub_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_sub_saddr_i32_rtn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_sub v0, v0, v1, s[2:3] glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_sub_saddr_i32_rtn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_sub v0, v0, v1, s[2:3] glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_sub_saddr_i32_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_sub_u32 v0, v0, v1, s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_sub_saddr_i32_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_sub_u32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw sub ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps float @global_sub_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_sub_saddr_i32_rtn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_sub v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_sub_saddr_i32_rtn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_sub v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_sub_saddr_i32_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_sub_u32 v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_sub_saddr_i32_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_sub_u32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %rtn = atomicrmw sub ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_sub_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_sub_saddr_i32_nortn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_sub v0, v1, s[2:3] |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_sub_saddr_i32_nortn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_sub v0, v1, s[2:3] |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_sub_saddr_i32_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_sub_u32 v0, v1, s[2:3] |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_sub_saddr_i32_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_sub_u32 v0, v1, s[2:3] scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw sub ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps void @global_sub_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_sub_saddr_i32_nortn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_sub v0, v1, s[2:3] offset:-128 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_sub_saddr_i32_nortn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_sub v0, v1, s[2:3] offset:-128 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_sub_saddr_i32_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_sub_u32 v0, v1, s[2:3] offset:-128 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_sub_saddr_i32_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_sub_u32 v0, v1, s[2:3] offset:-128 scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = atomicrmw sub ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps <2 x float> @global_sub_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_sub_saddr_i64_rtn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_sub_x2 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_sub_saddr_i64_rtn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_sub_x2 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_sub_saddr_i64_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_sub_u64 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_sub_saddr_i64_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_sub_u64 v[0:1], v0, v[1:2], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw sub ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps <2 x float> @global_sub_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_sub_saddr_i64_rtn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_sub_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_sub_saddr_i64_rtn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_sub_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_sub_saddr_i64_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_sub_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_sub_saddr_i64_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_sub_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %rtn = atomicrmw sub ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_sub_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_sub_saddr_i64_nortn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_sub_x2 v0, v[1:2], s[2:3] |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_sub_saddr_i64_nortn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_sub_x2 v0, v[1:2], s[2:3] |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_sub_saddr_i64_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_sub_u64 v0, v[1:2], s[2:3] |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_sub_saddr_i64_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_sub_u64 v0, v[1:2], s[2:3] scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw sub ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps void @global_sub_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_sub_saddr_i64_nortn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_sub_x2 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_sub_saddr_i64_nortn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_sub_x2 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_sub_saddr_i64_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_sub_u64 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_sub_saddr_i64_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_sub_u64 v0, v[1:2], s[2:3] offset:-128 scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = atomicrmw sub ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------------------- |
| ; atomicrmw and |
| ; -------------------------------------------------------------------------------- |
| |
| define amdgpu_ps float @global_and_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_and_saddr_i32_rtn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_and v0, v0, v1, s[2:3] glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_and_saddr_i32_rtn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_and v0, v0, v1, s[2:3] glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_and_saddr_i32_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_and_b32 v0, v0, v1, s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_and_saddr_i32_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_and_b32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw and ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps float @global_and_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_and_saddr_i32_rtn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_and v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_and_saddr_i32_rtn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_and v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_and_saddr_i32_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_and_b32 v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_and_saddr_i32_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_and_b32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %rtn = atomicrmw and ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_and_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_and_saddr_i32_nortn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_and v0, v1, s[2:3] |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_and_saddr_i32_nortn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_and v0, v1, s[2:3] |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_and_saddr_i32_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_and_b32 v0, v1, s[2:3] |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_and_saddr_i32_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_and_b32 v0, v1, s[2:3] scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw and ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps void @global_and_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_and_saddr_i32_nortn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_and v0, v1, s[2:3] offset:-128 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_and_saddr_i32_nortn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_and v0, v1, s[2:3] offset:-128 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_and_saddr_i32_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_and_b32 v0, v1, s[2:3] offset:-128 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_and_saddr_i32_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_and_b32 v0, v1, s[2:3] offset:-128 scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = atomicrmw and ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps <2 x float> @global_and_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_and_saddr_i64_rtn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_and_x2 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_and_saddr_i64_rtn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_and_x2 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_and_saddr_i64_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_and_b64 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_and_saddr_i64_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_and_b64 v[0:1], v0, v[1:2], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw and ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps <2 x float> @global_and_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_and_saddr_i64_rtn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_and_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_and_saddr_i64_rtn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_and_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_and_saddr_i64_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_and_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_and_saddr_i64_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_and_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %rtn = atomicrmw and ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_and_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_and_saddr_i64_nortn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_and_x2 v0, v[1:2], s[2:3] |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_and_saddr_i64_nortn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_and_x2 v0, v[1:2], s[2:3] |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_and_saddr_i64_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_and_b64 v0, v[1:2], s[2:3] |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_and_saddr_i64_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_and_b64 v0, v[1:2], s[2:3] scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw and ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps void @global_and_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_and_saddr_i64_nortn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_and_x2 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_and_saddr_i64_nortn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_and_x2 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_and_saddr_i64_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_and_b64 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_and_saddr_i64_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_and_b64 v0, v[1:2], s[2:3] offset:-128 scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = atomicrmw and ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------------------- |
| ; atomicrmw or |
| ; -------------------------------------------------------------------------------- |
| |
| define amdgpu_ps float @global_or_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_or_saddr_i32_rtn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_or v0, v0, v1, s[2:3] glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_or_saddr_i32_rtn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_or v0, v0, v1, s[2:3] glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_or_saddr_i32_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_or_b32 v0, v0, v1, s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_or_saddr_i32_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_or_b32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw or ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps float @global_or_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_or_saddr_i32_rtn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_or v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_or_saddr_i32_rtn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_or v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_or_saddr_i32_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_or_b32 v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_or_saddr_i32_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_or_b32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %rtn = atomicrmw or ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_or_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_or_saddr_i32_nortn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_or v0, v1, s[2:3] |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_or_saddr_i32_nortn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_or v0, v1, s[2:3] |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_or_saddr_i32_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_or_b32 v0, v1, s[2:3] |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_or_saddr_i32_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_or_b32 v0, v1, s[2:3] scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw or ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps void @global_or_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_or_saddr_i32_nortn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_or v0, v1, s[2:3] offset:-128 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_or_saddr_i32_nortn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_or v0, v1, s[2:3] offset:-128 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_or_saddr_i32_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_or_b32 v0, v1, s[2:3] offset:-128 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_or_saddr_i32_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_or_b32 v0, v1, s[2:3] offset:-128 scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = atomicrmw or ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps <2 x float> @global_or_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_or_saddr_i64_rtn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_or_x2 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_or_saddr_i64_rtn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_or_x2 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_or_saddr_i64_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_or_b64 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_or_saddr_i64_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_or_b64 v[0:1], v0, v[1:2], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw or ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps <2 x float> @global_or_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_or_saddr_i64_rtn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_or_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_or_saddr_i64_rtn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_or_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_or_saddr_i64_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_or_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_or_saddr_i64_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_or_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %rtn = atomicrmw or ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_or_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_or_saddr_i64_nortn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_or_x2 v0, v[1:2], s[2:3] |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_or_saddr_i64_nortn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_or_x2 v0, v[1:2], s[2:3] |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_or_saddr_i64_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_or_b64 v0, v[1:2], s[2:3] |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_or_saddr_i64_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_or_b64 v0, v[1:2], s[2:3] scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw or ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps void @global_or_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_or_saddr_i64_nortn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_or_x2 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_or_saddr_i64_nortn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_or_x2 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_or_saddr_i64_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_or_b64 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_or_saddr_i64_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_or_b64 v0, v[1:2], s[2:3] offset:-128 scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = atomicrmw or ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------------------- |
| ; atomicrmw xor |
| ; -------------------------------------------------------------------------------- |
| |
| define amdgpu_ps float @global_xor_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_xor_saddr_i32_rtn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_xor v0, v0, v1, s[2:3] glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_xor_saddr_i32_rtn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_xor v0, v0, v1, s[2:3] glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_xor_saddr_i32_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_xor_b32 v0, v0, v1, s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_xor_saddr_i32_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_xor_b32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw xor ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps float @global_xor_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_xor_saddr_i32_rtn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_xor v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_xor_saddr_i32_rtn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_xor v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_xor_saddr_i32_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_xor_b32 v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_xor_saddr_i32_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_xor_b32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %rtn = atomicrmw xor ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_xor_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_xor_saddr_i32_nortn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_xor v0, v1, s[2:3] |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_xor_saddr_i32_nortn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_xor v0, v1, s[2:3] |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_xor_saddr_i32_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_xor_b32 v0, v1, s[2:3] |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_xor_saddr_i32_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_xor_b32 v0, v1, s[2:3] scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw xor ptr addrspace(1) %gep0, i32 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps void @global_xor_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_xor_saddr_i32_nortn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_xor v0, v1, s[2:3] offset:-128 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_xor_saddr_i32_nortn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_xor v0, v1, s[2:3] offset:-128 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_xor_saddr_i32_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_xor_b32 v0, v1, s[2:3] offset:-128 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_xor_saddr_i32_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_xor_b32 v0, v1, s[2:3] offset:-128 scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = atomicrmw xor ptr addrspace(1) %gep1, i32 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps <2 x float> @global_xor_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_xor_saddr_i64_rtn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_xor_x2 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_xor_saddr_i64_rtn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_xor_x2 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_xor_saddr_i64_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_xor_b64 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_xor_saddr_i64_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_xor_b64 v[0:1], v0, v[1:2], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw xor ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps <2 x float> @global_xor_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_xor_saddr_i64_rtn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_xor_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_xor_saddr_i64_rtn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_xor_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_xor_saddr_i64_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_xor_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_xor_saddr_i64_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_xor_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %rtn = atomicrmw xor ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_xor_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_xor_saddr_i64_nortn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_xor_x2 v0, v[1:2], s[2:3] |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_xor_saddr_i64_nortn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_xor_x2 v0, v[1:2], s[2:3] |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_xor_saddr_i64_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_xor_b64 v0, v[1:2], s[2:3] |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_xor_saddr_i64_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_xor_b64 v0, v[1:2], s[2:3] scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw xor ptr addrspace(1) %gep0, i64 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps void @global_xor_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_xor_saddr_i64_nortn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_xor_x2 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_xor_saddr_i64_nortn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_xor_x2 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_xor_saddr_i64_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_xor_b64 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_xor_saddr_i64_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_xor_b64 v0, v[1:2], s[2:3] offset:-128 scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = atomicrmw xor ptr addrspace(1) %gep1, i64 %data syncscope("agent") seq_cst |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------------------- |
| ; atomicrmw max |
| ; -------------------------------------------------------------------------------- |
| |
| define amdgpu_ps float @global_max_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_max_saddr_i32_rtn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_smax v0, v0, v1, s[2:3] glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_max_saddr_i32_rtn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_smax v0, v0, v1, s[2:3] glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_max_saddr_i32_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_max_i32 v0, v0, v1, s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_max_saddr_i32_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_max_i32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw max ptr addrspace(1) %gep0, i32 %data syncscope("workgroup") seq_cst |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps float @global_max_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_max_saddr_i32_rtn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_smax v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_max_saddr_i32_rtn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_smax v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_max_saddr_i32_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_max_i32 v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_max_saddr_i32_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_max_i32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %rtn = atomicrmw max ptr addrspace(1) %gep1, i32 %data syncscope("workgroup") seq_cst |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_max_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_max_saddr_i32_nortn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_smax v0, v1, s[2:3] |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_max_saddr_i32_nortn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_smax v0, v1, s[2:3] |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_max_saddr_i32_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_max_i32 v0, v1, s[2:3] |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_max_saddr_i32_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_max_i32 v0, v1, s[2:3] scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw max ptr addrspace(1) %gep0, i32 %data syncscope("workgroup") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps void @global_max_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_max_saddr_i32_nortn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_smax v0, v1, s[2:3] offset:-128 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_max_saddr_i32_nortn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_smax v0, v1, s[2:3] offset:-128 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_max_saddr_i32_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_max_i32 v0, v1, s[2:3] offset:-128 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_max_saddr_i32_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_max_i32 v0, v1, s[2:3] offset:-128 scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = atomicrmw max ptr addrspace(1) %gep1, i32 %data syncscope("workgroup") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps <2 x float> @global_max_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_max_saddr_i64_rtn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_smax_x2 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_max_saddr_i64_rtn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_smax_x2 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_max_saddr_i64_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_max_i64 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_max_saddr_i64_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_max_i64 v[0:1], v0, v[1:2], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw max ptr addrspace(1) %gep0, i64 %data syncscope("workgroup") seq_cst |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps <2 x float> @global_max_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_max_saddr_i64_rtn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_smax_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_max_saddr_i64_rtn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_smax_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_max_saddr_i64_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_max_i64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_max_saddr_i64_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_max_i64 v[0:1], v0, v[1:2], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %rtn = atomicrmw max ptr addrspace(1) %gep1, i64 %data syncscope("workgroup") seq_cst |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_max_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_max_saddr_i64_nortn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_smax_x2 v0, v[1:2], s[2:3] |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_max_saddr_i64_nortn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_smax_x2 v0, v[1:2], s[2:3] |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_max_saddr_i64_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_max_i64 v0, v[1:2], s[2:3] |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_max_saddr_i64_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_max_i64 v0, v[1:2], s[2:3] scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw max ptr addrspace(1) %gep0, i64 %data syncscope("workgroup") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps void @global_max_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_max_saddr_i64_nortn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_smax_x2 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_max_saddr_i64_nortn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_smax_x2 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_max_saddr_i64_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_max_i64 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_max_saddr_i64_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_max_i64 v0, v[1:2], s[2:3] offset:-128 scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = atomicrmw max ptr addrspace(1) %gep1, i64 %data syncscope("workgroup") seq_cst |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------------------- |
| ; atomicrmw min |
| ; -------------------------------------------------------------------------------- |
| |
| define amdgpu_ps float @global_min_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_min_saddr_i32_rtn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_smin v0, v0, v1, s[2:3] glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_min_saddr_i32_rtn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_smin v0, v0, v1, s[2:3] glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_min_saddr_i32_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_min_i32 v0, v0, v1, s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_min_saddr_i32_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_min_i32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw min ptr addrspace(1) %gep0, i32 %data syncscope("workgroup") seq_cst |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps float @global_min_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_min_saddr_i32_rtn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_smin v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_min_saddr_i32_rtn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_smin v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_min_saddr_i32_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_min_i32 v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_min_saddr_i32_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_min_i32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %rtn = atomicrmw min ptr addrspace(1) %gep1, i32 %data syncscope("workgroup") seq_cst |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_min_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_min_saddr_i32_nortn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_smin v0, v1, s[2:3] |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_min_saddr_i32_nortn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_smin v0, v1, s[2:3] |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_min_saddr_i32_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_min_i32 v0, v1, s[2:3] |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_min_saddr_i32_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_min_i32 v0, v1, s[2:3] scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw min ptr addrspace(1) %gep0, i32 %data syncscope("workgroup") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps void @global_min_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_min_saddr_i32_nortn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_smin v0, v1, s[2:3] offset:-128 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_min_saddr_i32_nortn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_smin v0, v1, s[2:3] offset:-128 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_min_saddr_i32_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_min_i32 v0, v1, s[2:3] offset:-128 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_min_saddr_i32_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_min_i32 v0, v1, s[2:3] offset:-128 scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = atomicrmw min ptr addrspace(1) %gep1, i32 %data syncscope("workgroup") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps <2 x float> @global_min_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_min_saddr_i64_rtn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_smin_x2 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_min_saddr_i64_rtn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_smin_x2 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_min_saddr_i64_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_min_i64 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_min_saddr_i64_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_min_i64 v[0:1], v0, v[1:2], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw min ptr addrspace(1) %gep0, i64 %data syncscope("workgroup") seq_cst |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps <2 x float> @global_min_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_min_saddr_i64_rtn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_smin_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_min_saddr_i64_rtn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_smin_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_min_saddr_i64_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_min_i64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_min_saddr_i64_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_min_i64 v[0:1], v0, v[1:2], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %rtn = atomicrmw min ptr addrspace(1) %gep1, i64 %data syncscope("workgroup") seq_cst |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_min_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_min_saddr_i64_nortn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_smin_x2 v0, v[1:2], s[2:3] |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_min_saddr_i64_nortn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_smin_x2 v0, v[1:2], s[2:3] |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_min_saddr_i64_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_min_i64 v0, v[1:2], s[2:3] |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_min_saddr_i64_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_min_i64 v0, v[1:2], s[2:3] scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw min ptr addrspace(1) %gep0, i64 %data syncscope("workgroup") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps void @global_min_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_min_saddr_i64_nortn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_smin_x2 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_min_saddr_i64_nortn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_smin_x2 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_min_saddr_i64_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_min_i64 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_min_saddr_i64_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_min_i64 v0, v[1:2], s[2:3] offset:-128 scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = atomicrmw min ptr addrspace(1) %gep1, i64 %data syncscope("workgroup") seq_cst |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------------------- |
| ; atomicrmw umax |
| ; -------------------------------------------------------------------------------- |
| |
| define amdgpu_ps float @global_umax_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_umax_saddr_i32_rtn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_umax v0, v0, v1, s[2:3] glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_umax_saddr_i32_rtn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_umax v0, v0, v1, s[2:3] glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_umax_saddr_i32_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_max_u32 v0, v0, v1, s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_umax_saddr_i32_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_max_u32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw umax ptr addrspace(1) %gep0, i32 %data syncscope("workgroup") seq_cst |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps float @global_umax_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_umax_saddr_i32_rtn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_umax v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_umax_saddr_i32_rtn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_umax v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_umax_saddr_i32_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_max_u32 v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_umax_saddr_i32_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_max_u32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %rtn = atomicrmw umax ptr addrspace(1) %gep1, i32 %data syncscope("workgroup") seq_cst |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_umax_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_umax_saddr_i32_nortn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_umax v0, v1, s[2:3] |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_umax_saddr_i32_nortn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_umax v0, v1, s[2:3] |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_umax_saddr_i32_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_max_u32 v0, v1, s[2:3] |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_umax_saddr_i32_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_max_u32 v0, v1, s[2:3] scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw umax ptr addrspace(1) %gep0, i32 %data syncscope("workgroup") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps void @global_umax_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_umax_saddr_i32_nortn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_umax v0, v1, s[2:3] offset:-128 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_umax_saddr_i32_nortn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_umax v0, v1, s[2:3] offset:-128 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_umax_saddr_i32_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_max_u32 v0, v1, s[2:3] offset:-128 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_umax_saddr_i32_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_max_u32 v0, v1, s[2:3] offset:-128 scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = atomicrmw umax ptr addrspace(1) %gep1, i32 %data syncscope("workgroup") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps <2 x float> @global_umax_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_umax_saddr_i64_rtn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_umax_x2 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_umax_saddr_i64_rtn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_umax_x2 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_umax_saddr_i64_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_max_u64 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_umax_saddr_i64_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_max_u64 v[0:1], v0, v[1:2], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw umax ptr addrspace(1) %gep0, i64 %data syncscope("workgroup") seq_cst |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps <2 x float> @global_umax_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_umax_saddr_i64_rtn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_umax_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_umax_saddr_i64_rtn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_umax_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_umax_saddr_i64_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_max_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_umax_saddr_i64_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_max_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %rtn = atomicrmw umax ptr addrspace(1) %gep1, i64 %data syncscope("workgroup") seq_cst |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_umax_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_umax_saddr_i64_nortn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_umax_x2 v0, v[1:2], s[2:3] |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_umax_saddr_i64_nortn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_umax_x2 v0, v[1:2], s[2:3] |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_umax_saddr_i64_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_max_u64 v0, v[1:2], s[2:3] |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_umax_saddr_i64_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_max_u64 v0, v[1:2], s[2:3] scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw umax ptr addrspace(1) %gep0, i64 %data syncscope("workgroup") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps void @global_umax_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_umax_saddr_i64_nortn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_umax_x2 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_umax_saddr_i64_nortn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_umax_x2 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_umax_saddr_i64_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_max_u64 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_umax_saddr_i64_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_max_u64 v0, v[1:2], s[2:3] offset:-128 scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = atomicrmw umax ptr addrspace(1) %gep1, i64 %data syncscope("workgroup") seq_cst |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------------------- |
| ; atomicrmw umin |
| ; -------------------------------------------------------------------------------- |
| |
| define amdgpu_ps float @global_umin_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_umin_saddr_i32_rtn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_umin v0, v0, v1, s[2:3] glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_umin_saddr_i32_rtn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_umin v0, v0, v1, s[2:3] glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_umin_saddr_i32_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_min_u32 v0, v0, v1, s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_umin_saddr_i32_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_min_u32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw umin ptr addrspace(1) %gep0, i32 %data syncscope("workgroup") seq_cst |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps float @global_umin_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_umin_saddr_i32_rtn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_umin v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_umin_saddr_i32_rtn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_umin v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_umin_saddr_i32_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_min_u32 v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_umin_saddr_i32_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_min_u32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %rtn = atomicrmw umin ptr addrspace(1) %gep1, i32 %data syncscope("workgroup") seq_cst |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_umin_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_umin_saddr_i32_nortn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_umin v0, v1, s[2:3] |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_umin_saddr_i32_nortn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_umin v0, v1, s[2:3] |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_umin_saddr_i32_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_min_u32 v0, v1, s[2:3] |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_umin_saddr_i32_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_min_u32 v0, v1, s[2:3] scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw umin ptr addrspace(1) %gep0, i32 %data syncscope("workgroup") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps void @global_umin_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GFX9-LABEL: global_umin_saddr_i32_nortn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_umin v0, v1, s[2:3] offset:-128 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_umin_saddr_i32_nortn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_umin v0, v1, s[2:3] offset:-128 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_umin_saddr_i32_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_min_u32 v0, v1, s[2:3] offset:-128 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_umin_saddr_i32_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_min_u32 v0, v1, s[2:3] offset:-128 scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = atomicrmw umin ptr addrspace(1) %gep1, i32 %data syncscope("workgroup") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps <2 x float> @global_umin_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_umin_saddr_i64_rtn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_umin_x2 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_umin_saddr_i64_rtn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_umin_x2 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_umin_saddr_i64_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_min_u64 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_umin_saddr_i64_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_min_u64 v[0:1], v0, v[1:2], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw umin ptr addrspace(1) %gep0, i64 %data syncscope("workgroup") seq_cst |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps <2 x float> @global_umin_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_umin_saddr_i64_rtn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_umin_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_umin_saddr_i64_rtn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_umin_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_umin_saddr_i64_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_min_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_umin_saddr_i64_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_min_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %rtn = atomicrmw umin ptr addrspace(1) %gep1, i64 %data syncscope("workgroup") seq_cst |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_umin_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_umin_saddr_i64_nortn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_umin_x2 v0, v[1:2], s[2:3] |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_umin_saddr_i64_nortn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_umin_x2 v0, v[1:2], s[2:3] |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_umin_saddr_i64_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_min_u64 v0, v[1:2], s[2:3] |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_umin_saddr_i64_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_min_u64 v0, v[1:2], s[2:3] scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw umin ptr addrspace(1) %gep0, i64 %data syncscope("workgroup") seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps void @global_umin_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GFX9-LABEL: global_umin_saddr_i64_nortn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: global_atomic_umin_x2 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_umin_saddr_i64_nortn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: global_atomic_umin_x2 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_umin_saddr_i64_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_min_u64 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_umin_saddr_i64_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_min_u64 v0, v[1:2], s[2:3] offset:-128 scope:SCOPE_SE |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SE |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = atomicrmw umin ptr addrspace(1) %gep1, i64 %data syncscope("workgroup") seq_cst |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------------------- |
| ; cmpxchg |
| ; -------------------------------------------------------------------------------- |
| |
| define amdgpu_ps float @global_cmpxchg_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %cmp, i32 %data) { |
| ; GFX9-LABEL: global_cmpxchg_saddr_i32_rtn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: v_mov_b32_e32 v3, v1 |
| ; GFX9-NEXT: global_atomic_cmpswap v0, v0, v[2:3], s[2:3] glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_cmpxchg_saddr_i32_rtn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: v_mov_b32_e32 v3, v1 |
| ; GFX10-NEXT: global_atomic_cmpswap v0, v0, v[2:3], s[2:3] glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_cmpxchg_saddr_i32_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: v_mov_b32_e32 v3, v1 |
| ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v0, v[2:3], s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_cmpxchg_saddr_i32_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_mov_b32_e32 v3, v1 |
| ; GFX12-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX12-NEXT: global_atomic_cmpswap_b32 v0, v0, v[2:3], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %cmpxchg = cmpxchg ptr addrspace(1) %gep0, i32 %cmp, i32 %data seq_cst seq_cst |
| %rtn = extractvalue { i32, i1 } %cmpxchg, 0 |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps float @global_cmpxchg_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %cmp, i32 %data) { |
| ; GFX9-LABEL: global_cmpxchg_saddr_i32_rtn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: v_mov_b32_e32 v3, v1 |
| ; GFX9-NEXT: global_atomic_cmpswap v0, v0, v[2:3], s[2:3] offset:-128 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_cmpxchg_saddr_i32_rtn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: v_mov_b32_e32 v3, v1 |
| ; GFX10-NEXT: global_atomic_cmpswap v0, v0, v[2:3], s[2:3] offset:-128 glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_cmpxchg_saddr_i32_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: v_mov_b32_e32 v3, v1 |
| ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v0, v[2:3], s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_cmpxchg_saddr_i32_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_mov_b32_e32 v3, v1 |
| ; GFX12-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX12-NEXT: global_atomic_cmpswap_b32 v0, v0, v[2:3], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_SYS |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %cmpxchg = cmpxchg ptr addrspace(1) %gep1, i32 %cmp, i32 %data seq_cst seq_cst |
| %rtn = extractvalue { i32, i1 } %cmpxchg, 0 |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_cmpxchg_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %cmp, i32 %data) { |
| ; GFX9-LABEL: global_cmpxchg_saddr_i32_nortn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: v_mov_b32_e32 v3, v1 |
| ; GFX9-NEXT: global_atomic_cmpswap v0, v[2:3], s[2:3] |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_cmpxchg_saddr_i32_nortn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: v_mov_b32_e32 v3, v1 |
| ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], s[2:3] |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_cmpxchg_saddr_i32_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: v_mov_b32_e32 v3, v1 |
| ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[2:3] |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_cmpxchg_saddr_i32_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_mov_b32_e32 v3, v1 |
| ; GFX12-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX12-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[2:3] scope:SCOPE_SYS |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = cmpxchg ptr addrspace(1) %gep0, i32 %cmp, i32 %data seq_cst seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps void @global_cmpxchg_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %cmp, i32 %data) { |
| ; GFX9-LABEL: global_cmpxchg_saddr_i32_nortn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: v_mov_b32_e32 v3, v1 |
| ; GFX9-NEXT: global_atomic_cmpswap v0, v[2:3], s[2:3] offset:-128 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_cmpxchg_saddr_i32_nortn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: v_mov_b32_e32 v3, v1 |
| ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], s[2:3] offset:-128 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_cmpxchg_saddr_i32_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: v_mov_b32_e32 v3, v1 |
| ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[2:3] offset:-128 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_cmpxchg_saddr_i32_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_mov_b32_e32 v3, v1 |
| ; GFX12-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX12-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[2:3] offset:-128 scope:SCOPE_SYS |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = cmpxchg ptr addrspace(1) %gep1, i32 %cmp, i32 %data seq_cst seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps <2 x float> @global_cmpxchg_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %cmp, i64 %data) { |
| ; GFX9-LABEL: global_cmpxchg_saddr_i64_rtn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX9-NEXT: v_mov_b32_e32 v5, v1 |
| ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v0, v[3:6], s[2:3] glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_cmpxchg_saddr_i64_rtn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX10-NEXT: v_mov_b32_e32 v5, v1 |
| ; GFX10-NEXT: global_atomic_cmpswap_x2 v[0:1], v0, v[3:6], s[2:3] glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_cmpxchg_saddr_i64_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX11-NEXT: v_mov_b32_e32 v5, v1 |
| ; GFX11-NEXT: global_atomic_cmpswap_b64 v[0:1], v0, v[3:6], s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_cmpxchg_saddr_i64_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX12-NEXT: v_mov_b32_e32 v5, v1 |
| ; GFX12-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX12-NEXT: global_atomic_cmpswap_b64 v[0:1], v0, v[3:6], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %cmpxchg = cmpxchg ptr addrspace(1) %gep0, i64 %cmp, i64 %data seq_cst seq_cst |
| %rtn = extractvalue { i64, i1 } %cmpxchg, 0 |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps <2 x float> @global_cmpxchg_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %cmp, i64 %data) { |
| ; GFX9-LABEL: global_cmpxchg_saddr_i64_rtn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX9-NEXT: v_mov_b32_e32 v5, v1 |
| ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v0, v[3:6], s[2:3] offset:-128 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX10-LABEL: global_cmpxchg_saddr_i64_rtn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX10-NEXT: v_mov_b32_e32 v5, v1 |
| ; GFX10-NEXT: global_atomic_cmpswap_x2 v[0:1], v0, v[3:6], s[2:3] offset:-128 glc |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_cmpxchg_saddr_i64_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX11-NEXT: v_mov_b32_e32 v5, v1 |
| ; GFX11-NEXT: global_atomic_cmpswap_b64 v[0:1], v0, v[3:6], s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_cmpxchg_saddr_i64_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX12-NEXT: v_mov_b32_e32 v5, v1 |
| ; GFX12-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX12-NEXT: global_atomic_cmpswap_b64 v[0:1], v0, v[3:6], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_SYS |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %cmpxchg = cmpxchg ptr addrspace(1) %gep1, i64 %cmp, i64 %data seq_cst seq_cst |
| %rtn = extractvalue { i64, i1 } %cmpxchg, 0 |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_cmpxchg_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %cmp, i64 %data) { |
| ; GFX9-LABEL: global_cmpxchg_saddr_i64_nortn: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX9-NEXT: v_mov_b32_e32 v5, v1 |
| ; GFX9-NEXT: global_atomic_cmpswap_x2 v0, v[3:6], s[2:3] |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_cmpxchg_saddr_i64_nortn: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX10-NEXT: v_mov_b32_e32 v5, v1 |
| ; GFX10-NEXT: global_atomic_cmpswap_x2 v0, v[3:6], s[2:3] |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_cmpxchg_saddr_i64_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX11-NEXT: v_mov_b32_e32 v5, v1 |
| ; GFX11-NEXT: global_atomic_cmpswap_b64 v0, v[3:6], s[2:3] |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_cmpxchg_saddr_i64_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX12-NEXT: v_mov_b32_e32 v5, v1 |
| ; GFX12-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX12-NEXT: global_atomic_cmpswap_b64 v0, v[3:6], s[2:3] scope:SCOPE_SYS |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = cmpxchg ptr addrspace(1) %gep0, i64 %cmp, i64 %data seq_cst seq_cst |
| ret void |
| } |
| |
| define amdgpu_ps void @global_cmpxchg_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %cmp, i64 %data) { |
| ; GFX9-LABEL: global_cmpxchg_saddr_i64_nortn_neg128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX9-NEXT: v_mov_b32_e32 v5, v1 |
| ; GFX9-NEXT: global_atomic_cmpswap_x2 v0, v[3:6], s[2:3] offset:-128 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_wbinvl1 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_cmpxchg_saddr_i64_nortn_neg128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX10-NEXT: v_mov_b32_e32 v5, v1 |
| ; GFX10-NEXT: global_atomic_cmpswap_x2 v0, v[3:6], s[2:3] offset:-128 |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: buffer_gl1_inv |
| ; GFX10-NEXT: buffer_gl0_inv |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_cmpxchg_saddr_i64_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX11-NEXT: v_mov_b32_e32 v5, v1 |
| ; GFX11-NEXT: global_atomic_cmpswap_b64 v0, v[3:6], s[2:3] offset:-128 |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_gl1_inv |
| ; GFX11-NEXT: buffer_gl0_inv |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_cmpxchg_saddr_i64_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX12-NEXT: v_mov_b32_e32 v5, v1 |
| ; GFX12-NEXT: global_wb scope:SCOPE_SYS |
| ; GFX12-NEXT: global_atomic_cmpswap_b64 v0, v[3:6], s[2:3] offset:-128 scope:SCOPE_SYS |
| ; GFX12-NEXT: s_wait_storecnt 0x0 |
| ; GFX12-NEXT: global_inv scope:SCOPE_SYS |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = cmpxchg ptr addrspace(1) %gep1, i64 %cmp, i64 %data seq_cst seq_cst |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------------------- |
| ; amdgcn atomic inc |
| ; -------------------------------------------------------------------------------- |
| |
| define amdgpu_ps float @global_inc_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GCN-LABEL: global_inc_saddr_i32_rtn: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: global_atomic_inc v0, v0, v1, s[2:3] glc |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_inc_saddr_i32_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_inc_u32 v0, v0, v1, s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_inc_saddr_i32_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_inc_u32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw uinc_wrap ptr addrspace(1) %gep0, i32 %data syncscope("agent") monotonic |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps float @global_inc_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GCN-LABEL: global_inc_saddr_i32_rtn_neg128: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: global_atomic_inc v0, v0, v1, s[2:3] offset:-128 glc |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_inc_saddr_i32_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_inc_u32 v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_inc_saddr_i32_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_inc_u32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %rtn = atomicrmw uinc_wrap ptr addrspace(1) %gep1, i32 %data syncscope("agent") monotonic |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_inc_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GCN-LABEL: global_inc_saddr_i32_nortn: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: global_atomic_inc v0, v1, s[2:3] |
| ; GCN-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_inc_saddr_i32_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_inc_u32 v0, v1, s[2:3] |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_inc_saddr_i32_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_inc_u32 v0, v1, s[2:3] scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw uinc_wrap ptr addrspace(1) %gep0, i32 %data syncscope("agent") monotonic |
| ret void |
| } |
| |
| define amdgpu_ps void @global_inc_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GCN-LABEL: global_inc_saddr_i32_nortn_neg128: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: global_atomic_inc v0, v1, s[2:3] offset:-128 |
| ; GCN-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_inc_saddr_i32_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_inc_u32 v0, v1, s[2:3] offset:-128 |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_inc_saddr_i32_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_inc_u32 v0, v1, s[2:3] offset:-128 scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = atomicrmw uinc_wrap ptr addrspace(1) %gep1, i32 %data syncscope("agent") monotonic |
| ret void |
| } |
| |
| define amdgpu_ps <2 x float> @global_inc_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GCN-LABEL: global_inc_saddr_i64_rtn: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: global_atomic_inc_x2 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_inc_saddr_i64_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_inc_u64 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_inc_saddr_i64_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_inc_u64 v[0:1], v0, v[1:2], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw uinc_wrap ptr addrspace(1) %gep0, i64 %data syncscope("agent") monotonic |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps <2 x float> @global_inc_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GCN-LABEL: global_inc_saddr_i64_rtn_neg128: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: global_atomic_inc_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_inc_saddr_i64_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_inc_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_inc_saddr_i64_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_inc_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %rtn = atomicrmw uinc_wrap ptr addrspace(1) %gep1, i64 %data syncscope("agent") monotonic |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_inc_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GCN-LABEL: global_inc_saddr_i64_nortn: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: global_atomic_inc_x2 v0, v[1:2], s[2:3] |
| ; GCN-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_inc_saddr_i64_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_inc_u64 v0, v[1:2], s[2:3] |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_inc_saddr_i64_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_inc_u64 v0, v[1:2], s[2:3] scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw uinc_wrap ptr addrspace(1) %gep0, i64 %data syncscope("agent") monotonic |
| ret void |
| } |
| |
| define amdgpu_ps void @global_inc_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GCN-LABEL: global_inc_saddr_i64_nortn_neg128: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: global_atomic_inc_x2 v0, v[1:2], s[2:3] offset:-128 |
| ; GCN-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_inc_saddr_i64_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_inc_u64 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_inc_saddr_i64_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_inc_u64 v0, v[1:2], s[2:3] offset:-128 scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = atomicrmw uinc_wrap ptr addrspace(1) %gep1, i64 %data syncscope("agent") monotonic |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------------------- |
| ; amdgcn atomic dec |
| ; -------------------------------------------------------------------------------- |
| |
| |
| define amdgpu_ps float @global_dec_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GCN-LABEL: global_dec_saddr_i32_rtn: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: global_atomic_dec v0, v0, v1, s[2:3] glc |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_dec_saddr_i32_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_dec_u32 v0, v0, v1, s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_dec_saddr_i32_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_dec_u32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw udec_wrap ptr addrspace(1) %gep0, i32 %data syncscope("agent") monotonic |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps float @global_dec_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GCN-LABEL: global_dec_saddr_i32_rtn_neg128: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: global_atomic_dec v0, v0, v1, s[2:3] offset:-128 glc |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_dec_saddr_i32_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_dec_u32 v0, v0, v1, s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_dec_saddr_i32_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_dec_u32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %rtn = atomicrmw udec_wrap ptr addrspace(1) %gep1, i32 %data syncscope("agent") monotonic |
| %cast.rtn = bitcast i32 %rtn to float |
| ret float %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_dec_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GCN-LABEL: global_dec_saddr_i32_nortn: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: global_atomic_dec v0, v1, s[2:3] |
| ; GCN-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_dec_saddr_i32_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_dec_u32 v0, v1, s[2:3] |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_dec_saddr_i32_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_dec_u32 v0, v1, s[2:3] scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw udec_wrap ptr addrspace(1) %gep0, i32 %data syncscope("agent") monotonic |
| ret void |
| } |
| |
| define amdgpu_ps void @global_dec_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { |
| ; GCN-LABEL: global_dec_saddr_i32_nortn_neg128: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: global_atomic_dec v0, v1, s[2:3] offset:-128 |
| ; GCN-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_dec_saddr_i32_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_dec_u32 v0, v1, s[2:3] offset:-128 |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_dec_saddr_i32_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_dec_u32 v0, v1, s[2:3] offset:-128 scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = atomicrmw udec_wrap ptr addrspace(1) %gep1, i32 %data syncscope("agent") monotonic |
| ret void |
| } |
| |
| define amdgpu_ps <2 x float> @global_dec_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GCN-LABEL: global_dec_saddr_i64_rtn: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: global_atomic_dec_x2 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_dec_saddr_i64_rtn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_dec_u64 v[0:1], v0, v[1:2], s[2:3] glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_dec_saddr_i64_rtn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_dec_u64 v[0:1], v0, v[1:2], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %rtn = atomicrmw udec_wrap ptr addrspace(1) %gep0, i64 %data syncscope("agent") monotonic |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps <2 x float> @global_dec_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GCN-LABEL: global_dec_saddr_i64_rtn_neg128: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: global_atomic_dec_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: global_dec_saddr_i64_rtn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_dec_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: ; return to shader part epilog |
| ; |
| ; GFX12-LABEL: global_dec_saddr_i64_rtn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_dec_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: ; return to shader part epilog |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %rtn = atomicrmw udec_wrap ptr addrspace(1) %gep1, i64 %data syncscope("agent") monotonic |
| %cast.rtn = bitcast i64 %rtn to <2 x float> |
| ret <2 x float> %cast.rtn |
| } |
| |
| define amdgpu_ps void @global_dec_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GCN-LABEL: global_dec_saddr_i64_nortn: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: global_atomic_dec_x2 v0, v[1:2], s[2:3] |
| ; GCN-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_dec_saddr_i64_nortn: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_dec_u64 v0, v[1:2], s[2:3] |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_dec_saddr_i64_nortn: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_dec_u64 v0, v[1:2], s[2:3] scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %unused = atomicrmw udec_wrap ptr addrspace(1) %gep0, i64 %data syncscope("agent") monotonic |
| ret void |
| } |
| |
| define amdgpu_ps void @global_dec_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { |
| ; GCN-LABEL: global_dec_saddr_i64_nortn_neg128: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: global_atomic_dec_x2 v0, v[1:2], s[2:3] offset:-128 |
| ; GCN-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: global_dec_saddr_i64_nortn_neg128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: global_atomic_dec_u64 v0, v[1:2], s[2:3] offset:-128 |
| ; GFX11-NEXT: s_endpgm |
| ; |
| ; GFX12-LABEL: global_dec_saddr_i64_nortn_neg128: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: global_atomic_dec_u64 v0, v[1:2], s[2:3] offset:-128 scope:SCOPE_DEV |
| ; GFX12-NEXT: s_endpgm |
| %zext.offset = zext i32 %voffset to i64 |
| %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 |
| %unused = atomicrmw udec_wrap ptr addrspace(1) %gep1, i64 %data syncscope("agent") monotonic |
| ret void |
| } |
| |
| attributes #0 = { argmemonly nounwind willreturn } |