blob: ab12e3c19992de45127444bcc097729b68e0a09c [file]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU,GFX12-CU-DAGISEL %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU,GFX12-CU-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-DAGISEL %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-GISEL %s
define void @flat_i32_nonatomic(ptr addrspace(0) %in, ptr addrspace(0) %out) {
; GFX12-CU-LABEL: flat_i32_nonatomic:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: s_wait_expcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
; GFX12-CU-NEXT: flat_load_b32 v0, v[0:1]
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[2:3], v0
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: flat_i32_nonatomic:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v0, v[0:1]
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v[2:3], v0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%val = load i32, ptr addrspace(0) %in
store i32 %val, ptr addrspace(0) %out
ret void
}
define i32 @md_invariant__flat_i32_nonatomic(ptr addrspace(0) %in, ptr addrspace(0) %out) {
; GFX12-CU-LABEL: md_invariant__flat_i32_nonatomic:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: s_wait_expcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
; GFX12-CU-NEXT: flat_load_b32 v0, v[0:1]
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: md_invariant__flat_i32_nonatomic:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] nv
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%val = load i32, ptr addrspace(0) %in, !invariant.load !0
ret i32 %val
}
define void @global_i32_nonatomic(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX12-CU-LABEL: global_i32_nonatomic:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: s_wait_expcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
; GFX12-CU-NEXT: global_load_b32 v0, v[0:1], off
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v[2:3], v0, off
; GFX12-CU-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: global_i32_nonatomic:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v0, v[0:1], off
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v[2:3], v0, off
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%val = load i32, ptr addrspace(1) %in
store i32 %val, ptr addrspace(1) %out
ret void
}
define i32 @md_invariant__global_i32_nonatomic(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX12-CU-LABEL: md_invariant__global_i32_nonatomic:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: s_wait_expcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
; GFX12-CU-NEXT: global_load_b32 v0, v[0:1], off
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: md_invariant__global_i32_nonatomic:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v0, v[0:1], off nv
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%val = load i32, ptr addrspace(1) %in, !invariant.load !0
ret i32 %val
}
define i32 @scalar_i32_nonatomic(ptr addrspace(4) inreg %in) {
; GFX12-CU-LABEL: scalar_i32_nonatomic:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: s_wait_expcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
; GFX12-CU-NEXT: s_load_b32 s0, s[0:1], 0x0
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: scalar_i32_nonatomic:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b32 s0, s[0:1], 0x0 nv
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%val = load i32, ptr addrspace(4) %in
ret i32 %val
}
define i32 @md_invariant__scalar_i32_nonatomic(ptr addrspace(4) inreg %in) {
; GFX12-CU-LABEL: md_invariant__scalar_i32_nonatomic:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: s_wait_expcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
; GFX12-CU-NEXT: s_load_b32 s0, s[0:1], 0x0
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: md_invariant__scalar_i32_nonatomic:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b32 s0, s[0:1], 0x0 nv
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%val = load i32, ptr addrspace(4) %in, !invariant.load !0
ret i32 %val
}
define void @scratch_i32_nonatomic(ptr addrspace(5) %in, ptr addrspace(5) %out) {
; GFX12-CU-LABEL: scratch_i32_nonatomic:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: s_wait_expcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
; GFX12-CU-NEXT: scratch_load_b32 v0, v0, off
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: scratch_store_b32 v1, v0, off
; GFX12-CU-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: scratch_i32_nonatomic:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: scratch_load_b32 v0, v0, off
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: scratch_store_b32 v1, v0, off
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%val = load i32, ptr addrspace(5) %in
store i32 %val, ptr addrspace(5) %out
ret void
}
define i32 @md_invariant__scratch_i32_nonatomic(ptr addrspace(5) %in, ptr addrspace(5) %out) {
; GFX12-CU-LABEL: md_invariant__scratch_i32_nonatomic:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: s_wait_expcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
; GFX12-CU-NEXT: scratch_load_b32 v0, v0, off
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: md_invariant__scratch_i32_nonatomic:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: scratch_load_b32 v0, v0, off nv
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%val = load i32, ptr addrspace(5) %in, !invariant.load !0
ret i32 %val
}
define i32 @scalar32_i32_nonatomic(ptr addrspace(6) inreg %in) {
; GFX12-CU-LABEL: scalar32_i32_nonatomic:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: s_wait_expcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
; GFX12-CU-NEXT: s_mov_b32 s1, 0
; GFX12-CU-NEXT: s_load_b32 s0, s[0:1], 0x0
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: scalar32_i32_nonatomic:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_mov_b32 s1, 0
; GFX1250-NEXT: s_load_b32 s0, s[0:1], 0x0 nv
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%val = load i32, ptr addrspace(6) %in
ret i32 %val
}
define i32 @md_invariant__scalar32_i32_nonatomic(ptr addrspace(6) inreg %in) {
; GFX12-CU-LABEL: md_invariant__scalar32_i32_nonatomic:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: s_wait_expcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
; GFX12-CU-NEXT: s_mov_b32 s1, 0
; GFX12-CU-NEXT: s_load_b32 s0, s[0:1], 0x0
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: md_invariant__scalar32_i32_nonatomic:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_mov_b32 s1, 0
; GFX1250-NEXT: s_load_b32 s0, s[0:1], 0x0 nv
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%val = load i32, ptr addrspace(6) %in, !invariant.load !0
ret i32 %val
}
define void @buffer_i32_nonatomic(ptr addrspace(7) inreg %in, ptr addrspace(7) inreg %out) {
; GFX12-CU-DAGISEL-LABEL: buffer_i32_nonatomic:
; GFX12-CU-DAGISEL: ; %bb.0: ; %entry
; GFX12-CU-DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-DAGISEL-NEXT: s_wait_expcnt 0x0
; GFX12-CU-DAGISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-DAGISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-DAGISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-CU-DAGISEL-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s21
; GFX12-CU-DAGISEL-NEXT: s_mov_b32 s7, s20
; GFX12-CU-DAGISEL-NEXT: s_mov_b32 s6, s19
; GFX12-CU-DAGISEL-NEXT: s_mov_b32 s5, s18
; GFX12-CU-DAGISEL-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen
; GFX12-CU-DAGISEL-NEXT: s_mov_b32 s4, s17
; GFX12-CU-DAGISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-DAGISEL-NEXT: buffer_store_b32 v0, v1, s[4:7], null offen
; GFX12-CU-DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-CU-GISEL-LABEL: buffer_i32_nonatomic:
; GFX12-CU-GISEL: ; %bb.0: ; %entry
; GFX12-CU-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-GISEL-NEXT: s_wait_expcnt 0x0
; GFX12-CU-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-CU-GISEL-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s21
; GFX12-CU-GISEL-NEXT: s_mov_b32 s4, s17
; GFX12-CU-GISEL-NEXT: s_mov_b32 s5, s18
; GFX12-CU-GISEL-NEXT: s_mov_b32 s6, s19
; GFX12-CU-GISEL-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen
; GFX12-CU-GISEL-NEXT: s_mov_b32 s7, s20
; GFX12-CU-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-GISEL-NEXT: buffer_store_b32 v0, v1, s[4:7], null offen
; GFX12-CU-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-DAGISEL-LABEL: buffer_i32_nonatomic:
; GFX1250-DAGISEL: ; %bb.0: ; %entry
; GFX1250-DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-DAGISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-DAGISEL-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s21
; GFX1250-DAGISEL-NEXT: s_mov_b32 s7, s20
; GFX1250-DAGISEL-NEXT: s_mov_b32 s6, s19
; GFX1250-DAGISEL-NEXT: s_mov_b32 s5, s18
; GFX1250-DAGISEL-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen
; GFX1250-DAGISEL-NEXT: s_mov_b32 s4, s17
; GFX1250-DAGISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-DAGISEL-NEXT: buffer_store_b32 v0, v1, s[4:7], null offen
; GFX1250-DAGISEL-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-GISEL-LABEL: buffer_i32_nonatomic:
; GFX1250-GISEL: ; %bb.0: ; %entry
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s21
; GFX1250-GISEL-NEXT: s_mov_b32 s4, s17
; GFX1250-GISEL-NEXT: s_mov_b32 s5, s18
; GFX1250-GISEL-NEXT: s_mov_b32 s6, s19
; GFX1250-GISEL-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen
; GFX1250-GISEL-NEXT: s_mov_b32 s7, s20
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: buffer_store_b32 v0, v1, s[4:7], null offen
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
entry:
%val = load i32, ptr addrspace(7) %in
store i32 %val, ptr addrspace(7) %out
ret void
}
define i32 @md_invariant__buffer_i32_nonatomic(ptr addrspace(7) inreg %in, ptr addrspace(7) inreg %out) {
; GFX12-CU-LABEL: md_invariant__buffer_i32_nonatomic:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: s_wait_expcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s16
; GFX12-CU-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: md_invariant__buffer_i32_nonatomic:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s16
; GFX1250-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen nv
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
%val = load i32, ptr addrspace(7) %in, !invariant.load !0
ret i32 %val
}
!0 = !{}