blob: 34b39a1a3ecb47b88866e82cace8adef49ee71b9 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=0 < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10-SDAG %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10-GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -global-isel=0 < %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-SDAG %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-GISEL %s
define amdgpu_cs void @atomic_store_f32x2_monotonic_agent(<2 x float> %in, ptr addrspace(1) %out) {
; GFX9-LABEL: atomic_store_f32x2_monotonic_agent:
; GFX9: ; %bb.0:
; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_f32x2_monotonic_agent:
; GFX10: ; %bb.0:
; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_f32x2_monotonic_agent:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_f32x2_monotonic_agent:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_store_b64 v[2:3], v[0:1], off scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
store atomic <2 x float> %in, ptr addrspace(1) %out syncscope("agent") monotonic, align 8
ret void
}
define amdgpu_cs void @atomic_store_f32x2_seq_cst_agent(<2 x float> %in, ptr addrspace(1) %out) {
; GFX9-LABEL: atomic_store_f32x2_seq_cst_agent:
; GFX9: ; %bb.0:
; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_f32x2_seq_cst_agent:
; GFX10: ; %bb.0:
; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_f32x2_seq_cst_agent:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_f32x2_seq_cst_agent:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_store_b64 v[2:3], v[0:1], off scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
store atomic <2 x float> %in, ptr addrspace(1) %out syncscope("agent") seq_cst, align 8
ret void
}
define amdgpu_cs void @atomic_store_f32x2_seq_cst_wavefront(<2 x float> %in, ptr addrspace(1) %out) {
; GFX9-LABEL: atomic_store_f32x2_seq_cst_wavefront:
; GFX9: ; %bb.0:
; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_f32x2_seq_cst_wavefront:
; GFX10: ; %bb.0:
; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_f32x2_seq_cst_wavefront:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_f32x2_seq_cst_wavefront:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_store_b64 v[2:3], v[0:1], off
; GFX12-NEXT: s_endpgm
store atomic <2 x float> %in, ptr addrspace(1) %out syncscope("wavefront") seq_cst, align 8
ret void
}
define amdgpu_cs void @atomic_store_f16x2_monotonic_agent(<2 x half> %in, ptr addrspace(1) %out) {
; GFX9-LABEL: atomic_store_f16x2_monotonic_agent:
; GFX9: ; %bb.0:
; GFX9-NEXT: global_store_dword v[1:2], v0, off
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_f16x2_monotonic_agent:
; GFX10: ; %bb.0:
; GFX10-NEXT: global_store_dword v[1:2], v0, off
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_f16x2_monotonic_agent:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b32 v[1:2], v0, off
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_f16x2_monotonic_agent:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_store_b32 v[1:2], v0, off scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
store atomic <2 x half> %in, ptr addrspace(1) %out syncscope("agent") monotonic, align 4
ret void
}
define amdgpu_cs void @atomic_store_f16x2_seq_cst_agent(<2 x half> %in, ptr addrspace(1) %out) {
; GFX9-LABEL: atomic_store_f16x2_seq_cst_agent:
; GFX9: ; %bb.0:
; GFX9-NEXT: global_store_dword v[1:2], v0, off
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_f16x2_seq_cst_agent:
; GFX10: ; %bb.0:
; GFX10-NEXT: global_store_dword v[1:2], v0, off
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_f16x2_seq_cst_agent:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b32 v[1:2], v0, off
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_f16x2_seq_cst_agent:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_store_b32 v[1:2], v0, off scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
store atomic <2 x half> %in, ptr addrspace(1) %out syncscope("agent") seq_cst, align 4
ret void
}
define amdgpu_cs void @atomic_store_f16x2_monotonic_wavefront(<2 x half> %in, ptr addrspace(1) %out) {
; GFX9-LABEL: atomic_store_f16x2_monotonic_wavefront:
; GFX9: ; %bb.0:
; GFX9-NEXT: global_store_dword v[1:2], v0, off
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_f16x2_monotonic_wavefront:
; GFX10: ; %bb.0:
; GFX10-NEXT: global_store_dword v[1:2], v0, off
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_f16x2_monotonic_wavefront:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b32 v[1:2], v0, off
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_f16x2_monotonic_wavefront:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_store_b32 v[1:2], v0, off
; GFX12-NEXT: s_endpgm
store atomic <2 x half> %in, ptr addrspace(1) %out syncscope("wavefront") monotonic, align 4
ret void
}
define amdgpu_cs void @atomic_store_i16x2_monotonic_agent(<2 x i16> %in, ptr addrspace(1) %out) {
; GFX9-LABEL: atomic_store_i16x2_monotonic_agent:
; GFX9: ; %bb.0:
; GFX9-NEXT: global_store_dword v[1:2], v0, off
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_i16x2_monotonic_agent:
; GFX10: ; %bb.0:
; GFX10-NEXT: global_store_dword v[1:2], v0, off
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_i16x2_monotonic_agent:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b32 v[1:2], v0, off
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_i16x2_monotonic_agent:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_store_b32 v[1:2], v0, off scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
store atomic <2 x i16> %in, ptr addrspace(1) %out syncscope("agent") monotonic, align 4
ret void
}
define amdgpu_cs void @atomic_store_i16x2_seq_cst_agent(<2 x i16> %in, ptr addrspace(1) %out) {
; GFX9-LABEL: atomic_store_i16x2_seq_cst_agent:
; GFX9: ; %bb.0:
; GFX9-NEXT: global_store_dword v[1:2], v0, off
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_i16x2_seq_cst_agent:
; GFX10: ; %bb.0:
; GFX10-NEXT: global_store_dword v[1:2], v0, off
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_i16x2_seq_cst_agent:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b32 v[1:2], v0, off
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_i16x2_seq_cst_agent:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_store_b32 v[1:2], v0, off scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
store atomic <2 x i16> %in, ptr addrspace(1) %out syncscope("agent") seq_cst, align 4
ret void
}
define amdgpu_cs void @atomic_store_i16x2_monotonic_wavefront(<2 x i16> %in, ptr addrspace(1) %out) {
; GFX9-LABEL: atomic_store_i16x2_monotonic_wavefront:
; GFX9: ; %bb.0:
; GFX9-NEXT: global_store_dword v[1:2], v0, off
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_i16x2_monotonic_wavefront:
; GFX10: ; %bb.0:
; GFX10-NEXT: global_store_dword v[1:2], v0, off
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_i16x2_monotonic_wavefront:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b32 v[1:2], v0, off
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_i16x2_monotonic_wavefront:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_store_b32 v[1:2], v0, off
; GFX12-NEXT: s_endpgm
store atomic <2 x i16> %in, ptr addrspace(1) %out syncscope("wavefront") monotonic, align 4
ret void
}
define amdgpu_cs void @atomic_store_f16x4_monotonic_agent(<4 x half> %in, ptr addrspace(1) %out) {
; GFX9-LABEL: atomic_store_f16x4_monotonic_agent:
; GFX9: ; %bb.0:
; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_f16x4_monotonic_agent:
; GFX10: ; %bb.0:
; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_f16x4_monotonic_agent:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_f16x4_monotonic_agent:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_store_b64 v[2:3], v[0:1], off scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
store atomic <4 x half> %in, ptr addrspace(1) %out syncscope("agent") monotonic, align 8
ret void
}
define amdgpu_cs void @atomic_store_f16x4_seq_cst_agent(<4 x half> %in, ptr addrspace(1) %out) {
; GFX9-LABEL: atomic_store_f16x4_seq_cst_agent:
; GFX9: ; %bb.0:
; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_f16x4_seq_cst_agent:
; GFX10: ; %bb.0:
; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_f16x4_seq_cst_agent:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_f16x4_seq_cst_agent:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_store_b64 v[2:3], v[0:1], off scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
store atomic <4 x half> %in, ptr addrspace(1) %out syncscope("agent") seq_cst, align 8
ret void
}
define amdgpu_cs void @atomic_store_f16x4_monotonic_wavefront(<4 x half> %in, ptr addrspace(1) %out) {
; GFX9-LABEL: atomic_store_f16x4_monotonic_wavefront:
; GFX9: ; %bb.0:
; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_f16x4_monotonic_wavefront:
; GFX10: ; %bb.0:
; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_f16x4_monotonic_wavefront:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_f16x4_monotonic_wavefront:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_store_b64 v[2:3], v[0:1], off
; GFX12-NEXT: s_endpgm
store atomic <4 x half> %in, ptr addrspace(1) %out syncscope("wavefront") monotonic, align 8
ret void
}
define amdgpu_cs void @atomic_store_i16x4_monotonic_agent(<4 x i16> %in, ptr addrspace(1) %out) {
; GFX9-LABEL: atomic_store_i16x4_monotonic_agent:
; GFX9: ; %bb.0:
; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_i16x4_monotonic_agent:
; GFX10: ; %bb.0:
; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_i16x4_monotonic_agent:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_i16x4_monotonic_agent:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_store_b64 v[2:3], v[0:1], off scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
store atomic <4 x i16> %in, ptr addrspace(1) %out syncscope("agent") monotonic, align 8
ret void
}
define amdgpu_cs void @atomic_store_i16x4_seq_cst_agent(<4 x i16> %in, ptr addrspace(1) %out) {
; GFX9-LABEL: atomic_store_i16x4_seq_cst_agent:
; GFX9: ; %bb.0:
; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_i16x4_seq_cst_agent:
; GFX10: ; %bb.0:
; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_i16x4_seq_cst_agent:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_i16x4_seq_cst_agent:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_store_b64 v[2:3], v[0:1], off scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
store atomic <4 x i16> %in, ptr addrspace(1) %out syncscope("agent") seq_cst, align 8
ret void
}
define amdgpu_cs void @atomic_store_i16x4_monotonic_wavefront(<4 x i16> %in, ptr addrspace(1) %out) {
; GFX9-LABEL: atomic_store_i16x4_monotonic_wavefront:
; GFX9: ; %bb.0:
; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_i16x4_monotonic_wavefront:
; GFX10: ; %bb.0:
; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_i16x4_monotonic_wavefront:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_i16x4_monotonic_wavefront:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_store_b64 v[2:3], v[0:1], off
; GFX12-NEXT: s_endpgm
store atomic <4 x i16> %in, ptr addrspace(1) %out syncscope("wavefront") monotonic, align 8
ret void
}
define amdgpu_cs void @atomic_store_i16x4_monotonic_agent_offset_min(<4 x i16> %in, ptr addrspace(1) %out) {
; GFX9-LABEL: atomic_store_i16x4_monotonic_agent_offset_min:
; GFX9: ; %bb.0:
; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off offset:-4096
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_i16x4_monotonic_agent_offset_min:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, 0xfffff000, v2
; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, -1, v3, vcc_lo
; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_i16x4_monotonic_agent_offset_min:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off offset:-4096
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_i16x4_monotonic_agent_offset_min:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_store_b64 v[2:3], v[0:1], off offset:-4096 scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
%gep = getelementptr inbounds i8, ptr addrspace(1) %out, i64 -4096
store atomic <4 x i16> %in, ptr addrspace(1) %gep syncscope("agent") monotonic, align 8
ret void
}
define amdgpu_cs void @atomic_store_i16x4_monotonic_agent_offset_max(<4 x i16> %in, ptr addrspace(1) %out) {
; GFX9-LABEL: atomic_store_i16x4_monotonic_agent_offset_max:
; GFX9: ; %bb.0:
; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off offset:4088
; GFX9-NEXT: s_endpgm
;
; GFX10-SDAG-LABEL: atomic_store_i16x4_monotonic_agent_offset_max:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: v_add_co_u32 v2, vcc_lo, 0x800, v2
; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[0:1], off offset:2040
; GFX10-SDAG-NEXT: s_endpgm
;
; GFX10-GISEL-LABEL: atomic_store_i16x4_monotonic_agent_offset_max:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: v_add_co_u32 v2, vcc_lo, 0xff8, v2
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
; GFX10-GISEL-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_i16x4_monotonic_agent_offset_max:
; GFX11: ; %bb.0:
; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off offset:4088
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_i16x4_monotonic_agent_offset_max:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_store_b64 v[2:3], v[0:1], off offset:4088 scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
%gep = getelementptr inbounds i8, ptr addrspace(1) %out, i64 4088
store atomic <4 x i16> %in, ptr addrspace(1) %gep syncscope("agent") monotonic, align 8
ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GCN: {{.*}}
; GFX11-GISEL: {{.*}}
; GFX11-SDAG: {{.*}}
; GFX12-GISEL: {{.*}}
; GFX12-SDAG: {{.*}}
; GFX9-GISEL: {{.*}}
; GFX9-SDAG: {{.*}}