blob: 0d2f48d886f2db6421e4b0bed592737caaffcad5 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=0 < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10-SDAG %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=1 < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10-GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -global-isel=0 < %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-SDAG %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -global-isel=1 < %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-GISEL %s
define amdgpu_cs void @atomic_store_f32x2_monotonic_agent(<2 x float> %in, ptr addrspace(3) %out) {
; GFX9-LABEL: atomic_store_f32x2_monotonic_agent:
; GFX9: ; %bb.0:
; GFX9-NEXT: ds_write_b64 v2, v[0:1]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_f32x2_monotonic_agent:
; GFX10: ; %bb.0:
; GFX10-NEXT: ds_write_b64 v2, v[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_f32x2_monotonic_agent:
; GFX11: ; %bb.0:
; GFX11-NEXT: ds_store_b64 v2, v[0:1]
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_f32x2_monotonic_agent:
; GFX12: ; %bb.0:
; GFX12-NEXT: ds_store_b64 v2, v[0:1]
; GFX12-NEXT: s_endpgm
store atomic <2 x float> %in, ptr addrspace(3) %out syncscope("agent") monotonic, align 8
ret void
}
define amdgpu_cs void @atomic_store_f32x2_seq_cst_agent(<2 x float> %in, ptr addrspace(3) %out) {
; GFX9-LABEL: atomic_store_f32x2_seq_cst_agent:
; GFX9: ; %bb.0:
; GFX9-NEXT: ds_write_b64 v2, v[0:1]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_f32x2_seq_cst_agent:
; GFX10: ; %bb.0:
; GFX10-NEXT: ds_write_b64 v2, v[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_f32x2_seq_cst_agent:
; GFX11: ; %bb.0:
; GFX11-NEXT: ds_store_b64 v2, v[0:1]
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_f32x2_seq_cst_agent:
; GFX12: ; %bb.0:
; GFX12-NEXT: ds_store_b64 v2, v[0:1]
; GFX12-NEXT: s_endpgm
store atomic <2 x float> %in, ptr addrspace(3) %out syncscope("agent") seq_cst, align 8
ret void
}
define amdgpu_cs void @atomic_store_f32x2_seq_cst_wavefront(<2 x float> %in, ptr addrspace(3) %out) {
; GFX9-LABEL: atomic_store_f32x2_seq_cst_wavefront:
; GFX9: ; %bb.0:
; GFX9-NEXT: ds_write_b64 v2, v[0:1]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_f32x2_seq_cst_wavefront:
; GFX10: ; %bb.0:
; GFX10-NEXT: ds_write_b64 v2, v[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_f32x2_seq_cst_wavefront:
; GFX11: ; %bb.0:
; GFX11-NEXT: ds_store_b64 v2, v[0:1]
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_f32x2_seq_cst_wavefront:
; GFX12: ; %bb.0:
; GFX12-NEXT: ds_store_b64 v2, v[0:1]
; GFX12-NEXT: s_endpgm
store atomic <2 x float> %in, ptr addrspace(3) %out syncscope("wavefront") seq_cst, align 8
ret void
}
define amdgpu_cs void @atomic_store_f16x2_monotonic_agent(<2 x half> %in, ptr addrspace(3) %out) {
; GFX9-LABEL: atomic_store_f16x2_monotonic_agent:
; GFX9: ; %bb.0:
; GFX9-NEXT: ds_write_b32 v1, v0
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_f16x2_monotonic_agent:
; GFX10: ; %bb.0:
; GFX10-NEXT: ds_write_b32 v1, v0
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_f16x2_monotonic_agent:
; GFX11: ; %bb.0:
; GFX11-NEXT: ds_store_b32 v1, v0
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_f16x2_monotonic_agent:
; GFX12: ; %bb.0:
; GFX12-NEXT: ds_store_b32 v1, v0
; GFX12-NEXT: s_endpgm
store atomic <2 x half> %in, ptr addrspace(3) %out syncscope("agent") monotonic, align 4
ret void
}
define amdgpu_cs void @atomic_store_f16x2_seq_cst_agent(<2 x half> %in, ptr addrspace(3) %out) {
; GFX9-LABEL: atomic_store_f16x2_seq_cst_agent:
; GFX9: ; %bb.0:
; GFX9-NEXT: ds_write_b32 v1, v0
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_f16x2_seq_cst_agent:
; GFX10: ; %bb.0:
; GFX10-NEXT: ds_write_b32 v1, v0
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_f16x2_seq_cst_agent:
; GFX11: ; %bb.0:
; GFX11-NEXT: ds_store_b32 v1, v0
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_f16x2_seq_cst_agent:
; GFX12: ; %bb.0:
; GFX12-NEXT: ds_store_b32 v1, v0
; GFX12-NEXT: s_endpgm
store atomic <2 x half> %in, ptr addrspace(3) %out syncscope("agent") seq_cst, align 4
ret void
}
define amdgpu_cs void @atomic_store_f16x2_monotonic_wavefront(<2 x half> %in, ptr addrspace(3) %out) {
; GFX9-LABEL: atomic_store_f16x2_monotonic_wavefront:
; GFX9: ; %bb.0:
; GFX9-NEXT: ds_write_b32 v1, v0
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_f16x2_monotonic_wavefront:
; GFX10: ; %bb.0:
; GFX10-NEXT: ds_write_b32 v1, v0
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_f16x2_monotonic_wavefront:
; GFX11: ; %bb.0:
; GFX11-NEXT: ds_store_b32 v1, v0
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_f16x2_monotonic_wavefront:
; GFX12: ; %bb.0:
; GFX12-NEXT: ds_store_b32 v1, v0
; GFX12-NEXT: s_endpgm
store atomic <2 x half> %in, ptr addrspace(3) %out syncscope("wavefront") monotonic, align 4
ret void
}
define amdgpu_cs void @atomic_store_i16x2_monotonic_agent(<2 x i16> %in, ptr addrspace(3) %out) {
; GFX9-LABEL: atomic_store_i16x2_monotonic_agent:
; GFX9: ; %bb.0:
; GFX9-NEXT: ds_write_b32 v1, v0
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_i16x2_monotonic_agent:
; GFX10: ; %bb.0:
; GFX10-NEXT: ds_write_b32 v1, v0
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_i16x2_monotonic_agent:
; GFX11: ; %bb.0:
; GFX11-NEXT: ds_store_b32 v1, v0
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_i16x2_monotonic_agent:
; GFX12: ; %bb.0:
; GFX12-NEXT: ds_store_b32 v1, v0
; GFX12-NEXT: s_endpgm
store atomic <2 x i16> %in, ptr addrspace(3) %out syncscope("agent") monotonic, align 4
ret void
}
define amdgpu_cs void @atomic_store_i16x2_seq_cst_agent(<2 x i16> %in, ptr addrspace(3) %out) {
; GFX9-LABEL: atomic_store_i16x2_seq_cst_agent:
; GFX9: ; %bb.0:
; GFX9-NEXT: ds_write_b32 v1, v0
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_i16x2_seq_cst_agent:
; GFX10: ; %bb.0:
; GFX10-NEXT: ds_write_b32 v1, v0
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_i16x2_seq_cst_agent:
; GFX11: ; %bb.0:
; GFX11-NEXT: ds_store_b32 v1, v0
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_i16x2_seq_cst_agent:
; GFX12: ; %bb.0:
; GFX12-NEXT: ds_store_b32 v1, v0
; GFX12-NEXT: s_endpgm
store atomic <2 x i16> %in, ptr addrspace(3) %out syncscope("agent") seq_cst, align 4
ret void
}
define amdgpu_cs void @atomic_store_i16x2_monotonic_wavefront(<2 x i16> %in, ptr addrspace(3) %out) {
; GFX9-LABEL: atomic_store_i16x2_monotonic_wavefront:
; GFX9: ; %bb.0:
; GFX9-NEXT: ds_write_b32 v1, v0
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_i16x2_monotonic_wavefront:
; GFX10: ; %bb.0:
; GFX10-NEXT: ds_write_b32 v1, v0
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_i16x2_monotonic_wavefront:
; GFX11: ; %bb.0:
; GFX11-NEXT: ds_store_b32 v1, v0
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_i16x2_monotonic_wavefront:
; GFX12: ; %bb.0:
; GFX12-NEXT: ds_store_b32 v1, v0
; GFX12-NEXT: s_endpgm
store atomic <2 x i16> %in, ptr addrspace(3) %out syncscope("wavefront") monotonic, align 4
ret void
}
define amdgpu_cs void @atomic_store_f16x4_monotonic_agent(<4 x half> %in, ptr addrspace(3) %out) {
; GFX9-LABEL: atomic_store_f16x4_monotonic_agent:
; GFX9: ; %bb.0:
; GFX9-NEXT: ds_write_b64 v2, v[0:1]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_f16x4_monotonic_agent:
; GFX10: ; %bb.0:
; GFX10-NEXT: ds_write_b64 v2, v[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_f16x4_monotonic_agent:
; GFX11: ; %bb.0:
; GFX11-NEXT: ds_store_b64 v2, v[0:1]
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_f16x4_monotonic_agent:
; GFX12: ; %bb.0:
; GFX12-NEXT: ds_store_b64 v2, v[0:1]
; GFX12-NEXT: s_endpgm
store atomic <4 x half> %in, ptr addrspace(3) %out syncscope("agent") monotonic, align 8
ret void
}
define amdgpu_cs void @atomic_store_f16x4_seq_cst_agent(<4 x half> %in, ptr addrspace(3) %out) {
; GFX9-LABEL: atomic_store_f16x4_seq_cst_agent:
; GFX9: ; %bb.0:
; GFX9-NEXT: ds_write_b64 v2, v[0:1]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_f16x4_seq_cst_agent:
; GFX10: ; %bb.0:
; GFX10-NEXT: ds_write_b64 v2, v[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_f16x4_seq_cst_agent:
; GFX11: ; %bb.0:
; GFX11-NEXT: ds_store_b64 v2, v[0:1]
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_f16x4_seq_cst_agent:
; GFX12: ; %bb.0:
; GFX12-NEXT: ds_store_b64 v2, v[0:1]
; GFX12-NEXT: s_endpgm
store atomic <4 x half> %in, ptr addrspace(3) %out syncscope("agent") seq_cst, align 8
ret void
}
define amdgpu_cs void @atomic_store_f16x4_monotonic_wavefront(<4 x half> %in, ptr addrspace(3) %out) {
; GFX9-LABEL: atomic_store_f16x4_monotonic_wavefront:
; GFX9: ; %bb.0:
; GFX9-NEXT: ds_write_b64 v2, v[0:1]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_f16x4_monotonic_wavefront:
; GFX10: ; %bb.0:
; GFX10-NEXT: ds_write_b64 v2, v[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_f16x4_monotonic_wavefront:
; GFX11: ; %bb.0:
; GFX11-NEXT: ds_store_b64 v2, v[0:1]
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_f16x4_monotonic_wavefront:
; GFX12: ; %bb.0:
; GFX12-NEXT: ds_store_b64 v2, v[0:1]
; GFX12-NEXT: s_endpgm
store atomic <4 x half> %in, ptr addrspace(3) %out syncscope("wavefront") monotonic, align 8
ret void
}
define amdgpu_cs void @atomic_store_i16x4_monotonic_agent(<4 x i16> %in, ptr addrspace(3) %out) {
; GFX9-LABEL: atomic_store_i16x4_monotonic_agent:
; GFX9: ; %bb.0:
; GFX9-NEXT: ds_write_b64 v2, v[0:1]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_i16x4_monotonic_agent:
; GFX10: ; %bb.0:
; GFX10-NEXT: ds_write_b64 v2, v[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_i16x4_monotonic_agent:
; GFX11: ; %bb.0:
; GFX11-NEXT: ds_store_b64 v2, v[0:1]
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_i16x4_monotonic_agent:
; GFX12: ; %bb.0:
; GFX12-NEXT: ds_store_b64 v2, v[0:1]
; GFX12-NEXT: s_endpgm
store atomic <4 x i16> %in, ptr addrspace(3) %out syncscope("agent") monotonic, align 8
ret void
}
define amdgpu_cs void @atomic_store_i16x4_seq_cst_agent(<4 x i16> %in, ptr addrspace(3) %out) {
; GFX9-LABEL: atomic_store_i16x4_seq_cst_agent:
; GFX9: ; %bb.0:
; GFX9-NEXT: ds_write_b64 v2, v[0:1]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_i16x4_seq_cst_agent:
; GFX10: ; %bb.0:
; GFX10-NEXT: ds_write_b64 v2, v[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_i16x4_seq_cst_agent:
; GFX11: ; %bb.0:
; GFX11-NEXT: ds_store_b64 v2, v[0:1]
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_i16x4_seq_cst_agent:
; GFX12: ; %bb.0:
; GFX12-NEXT: ds_store_b64 v2, v[0:1]
; GFX12-NEXT: s_endpgm
store atomic <4 x i16> %in, ptr addrspace(3) %out syncscope("agent") seq_cst, align 8
ret void
}
define amdgpu_cs void @atomic_store_i16x4_monotonic_wavefront(<4 x i16> %in, ptr addrspace(3) %out) {
; GFX9-LABEL: atomic_store_i16x4_monotonic_wavefront:
; GFX9: ; %bb.0:
; GFX9-NEXT: ds_write_b64 v2, v[0:1]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_i16x4_monotonic_wavefront:
; GFX10: ; %bb.0:
; GFX10-NEXT: ds_write_b64 v2, v[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_i16x4_monotonic_wavefront:
; GFX11: ; %bb.0:
; GFX11-NEXT: ds_store_b64 v2, v[0:1]
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_i16x4_monotonic_wavefront:
; GFX12: ; %bb.0:
; GFX12-NEXT: ds_store_b64 v2, v[0:1]
; GFX12-NEXT: s_endpgm
store atomic <4 x i16> %in, ptr addrspace(3) %out syncscope("wavefront") monotonic, align 8
ret void
}
define amdgpu_cs void @atomic_store_i16x4_monotonic_agent_offset_1(<4 x i16> %in, ptr addrspace(3) %out) {
; GFX9-LABEL: atomic_store_i16x4_monotonic_agent_offset_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: ds_write_b64 v2, v[0:1] offset:1
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_i16x4_monotonic_agent_offset_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: ds_write_b64 v2, v[0:1] offset:1
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_i16x4_monotonic_agent_offset_1:
; GFX11: ; %bb.0:
; GFX11-NEXT: ds_store_b64 v2, v[0:1] offset:1
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_i16x4_monotonic_agent_offset_1:
; GFX12: ; %bb.0:
; GFX12-NEXT: ds_store_b64 v2, v[0:1] offset:1
; GFX12-NEXT: s_endpgm
%gep = getelementptr inbounds i8, ptr addrspace(3) %out, i64 1
store atomic <4 x i16> %in, ptr addrspace(3) %gep syncscope("agent") monotonic, align 8
ret void
}
define amdgpu_cs void @atomic_store_i16x4_monotonic_agent_offset_max(<4 x i16> %in, ptr addrspace(3) %out) {
; GFX9-LABEL: atomic_store_i16x4_monotonic_agent_offset_max:
; GFX9: ; %bb.0:
; GFX9-NEXT: ds_write_b64 v2, v[0:1] offset:4088
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: atomic_store_i16x4_monotonic_agent_offset_max:
; GFX10: ; %bb.0:
; GFX10-NEXT: ds_write_b64 v2, v[0:1] offset:4088
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: atomic_store_i16x4_monotonic_agent_offset_max:
; GFX11: ; %bb.0:
; GFX11-NEXT: ds_store_b64 v2, v[0:1] offset:4088
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_store_i16x4_monotonic_agent_offset_max:
; GFX12: ; %bb.0:
; GFX12-NEXT: ds_store_b64 v2, v[0:1] offset:4088
; GFX12-NEXT: s_endpgm
%gep = getelementptr inbounds i8, ptr addrspace(3) %out, i64 4088
store atomic <4 x i16> %in, ptr addrspace(3) %gep syncscope("agent") monotonic, align 8
ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GCN: {{.*}}
; GFX10-GISEL: {{.*}}
; GFX10-SDAG: {{.*}}
; GFX11-GISEL: {{.*}}
; GFX11-SDAG: {{.*}}
; GFX12-GISEL: {{.*}}
; GFX12-SDAG: {{.*}}
; GFX9-GISEL: {{.*}}
; GFX9-SDAG: {{.*}}