blob: de82dcdecda48c02eca5aa4f2c4122e1119ddc20 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 %s -o - | FileCheck -check-prefix=GFX1250-SDAG %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -global-isel %s -o - | FileCheck -check-prefix=GFX1250-GISEL %s
define void @test_workgroup_id_x_non_kernel(ptr addrspace(1) %out) {
; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c
; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp6, 15
; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1
; GFX1250-SDAG-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4)
; GFX1250-SDAG-NEXT: s_mul_i32 s0, ttmp9, s0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: s_add_co_i32 s1, s1, s0
; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s2, 0
; GFX1250-SDAG-NEXT: s_cselect_b32 s0, ttmp9, s1
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c
; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp6, 15
; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1
; GFX1250-GISEL-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4)
; GFX1250-GISEL-NEXT: s_mul_i32 s0, ttmp9, s0
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX1250-GISEL-NEXT: s_add_co_i32 s1, s1, s0
; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s2, 0
; GFX1250-GISEL-NEXT: s_cselect_b32 s0, ttmp9, s1
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
%id = call i32 @llvm.amdgcn.workgroup.id.x()
store i32 %id, ptr addrspace(1) %out
ret void
}
define void @test_workgroup_id_x_non_kernel_optimized_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="1024,1024,1024" {
; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel_optimized_used:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c
; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp6, 15
; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: s_mul_i32 s0, ttmp9, s0
; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s1, s0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel_optimized_used:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c
; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp6, 15
; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1250-GISEL-NEXT: s_mul_i32 s0, ttmp9, s0
; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s1, s0
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
%id = call i32 @llvm.amdgcn.workgroup.id.x()
store i32 %id, ptr addrspace(1) %out
ret void
}
define void @test_workgroup_id_x_non_kernel_optimized_not_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="0,0,0" {
; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel_optimized_not_used:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, ttmp9
; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel_optimized_not_used:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, ttmp9
; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
%id = call i32 @llvm.amdgcn.workgroup.id.x()
store i32 %id, ptr addrspace(1) %out
ret void
}
define void @test_workgroup_id_x_non_kernel_optimized_fixed(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,1,2" {
; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel_optimized_fixed:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_and_b32 s0, ttmp6, 15
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: s_lshl1_add_u32 s0, ttmp9, s0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel_optimized_fixed:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: s_and_b32 s0, ttmp6, 15
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1250-GISEL-NEXT: s_lshl1_add_u32 s0, ttmp9, s0
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
%id = call i32 @llvm.amdgcn.workgroup.id.x()
store i32 %id, ptr addrspace(1) %out
ret void
}
define void @test_workgroup_id_y_non_kernel(ptr addrspace(1) %out) {
; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40010
; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff
; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1
; GFX1250-SDAG-NEXT: s_bfe_u32 s2, ttmp6, 0x40004
; GFX1250-SDAG-NEXT: s_mul_i32 s0, s1, s0
; GFX1250-SDAG-NEXT: s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4)
; GFX1250-SDAG-NEXT: s_add_co_i32 s2, s2, s0
; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s3, 0
; GFX1250-SDAG-NEXT: s_cselect_b32 s0, s1, s2
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x40010
; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff
; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1
; GFX1250-GISEL-NEXT: s_bfe_u32 s2, ttmp6, 0x40004
; GFX1250-GISEL-NEXT: s_mul_i32 s0, s1, s0
; GFX1250-GISEL-NEXT: s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4)
; GFX1250-GISEL-NEXT: s_add_co_i32 s2, s2, s0
; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s3, 0
; GFX1250-GISEL-NEXT: s_cselect_b32 s0, s1, s2
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
%id = call i32 @llvm.amdgcn.workgroup.id.y()
store i32 %id, ptr addrspace(1) %out
ret void
}
define void @test_workgroup_id_y_non_kernel_optimized_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="1024,1024,1024" {
; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel_optimized_used:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40010
; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff
; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: s_mul_i32 s1, s1, s0
; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40004
; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, s1
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel_optimized_used:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x40010
; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff
; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1
; GFX1250-GISEL-NEXT: s_bfe_u32 s2, ttmp6, 0x40004
; GFX1250-GISEL-NEXT: s_mul_i32 s1, s1, s0
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s2, s1
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
%id = call i32 @llvm.amdgcn.workgroup.id.y()
store i32 %id, ptr addrspace(1) %out
ret void
}
define void @test_workgroup_id_y_non_kernel_optimized_not_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="0,0,0" {
; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel_optimized_not_used:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel_optimized_not_used:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: s_and_b32 s0, ttmp7, 0xffff
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
%id = call i32 @llvm.amdgcn.workgroup.id.y()
store i32 %id, ptr addrspace(1) %out
ret void
}
define void @test_workgroup_id_y_non_kernel_optimized_fixed(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,1,2" {
; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel_optimized_fixed:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel_optimized_fixed:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: s_and_b32 s0, ttmp7, 0xffff
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
%id = call i32 @llvm.amdgcn.workgroup.id.y()
store i32 %id, ptr addrspace(1) %out
ret void
}
define void @test_workgroup_id_z_non_kernel(ptr addrspace(1) %out) {
; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40014
; GFX1250-SDAG-NEXT: s_lshr_b32 s1, ttmp7, 16
; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1
; GFX1250-SDAG-NEXT: s_bfe_u32 s2, ttmp6, 0x40008
; GFX1250-SDAG-NEXT: s_mul_i32 s0, s1, s0
; GFX1250-SDAG-NEXT: s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4)
; GFX1250-SDAG-NEXT: s_add_co_i32 s2, s2, s0
; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s3, 0
; GFX1250-SDAG-NEXT: s_cselect_b32 s0, s1, s2
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x40014
; GFX1250-GISEL-NEXT: s_lshr_b32 s1, ttmp7, 16
; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1
; GFX1250-GISEL-NEXT: s_bfe_u32 s2, ttmp6, 0x40008
; GFX1250-GISEL-NEXT: s_mul_i32 s0, s1, s0
; GFX1250-GISEL-NEXT: s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4)
; GFX1250-GISEL-NEXT: s_add_co_i32 s2, s2, s0
; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s3, 0
; GFX1250-GISEL-NEXT: s_cselect_b32 s0, s1, s2
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
%id = call i32 @llvm.amdgcn.workgroup.id.z()
store i32 %id, ptr addrspace(1) %out
ret void
}
define void @test_workgroup_id_z_non_kernel_optimized_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="1024,1024,1024" {
; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel_optimized_used:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40014
; GFX1250-SDAG-NEXT: s_lshr_b32 s1, ttmp7, 16
; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: s_mul_i32 s1, s1, s0
; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40008
; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, s1
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel_optimized_used:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x40014
; GFX1250-GISEL-NEXT: s_lshr_b32 s1, ttmp7, 16
; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1
; GFX1250-GISEL-NEXT: s_bfe_u32 s2, ttmp6, 0x40008
; GFX1250-GISEL-NEXT: s_mul_i32 s1, s1, s0
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s2, s1
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
%id = call i32 @llvm.amdgcn.workgroup.id.z()
store i32 %id, ptr addrspace(1) %out
ret void
}
define void @test_workgroup_id_z_non_kernel_optimized_not_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="0,0,0" {
; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel_optimized_not_used:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel_optimized_not_used:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: s_lshr_b32 s0, ttmp7, 16
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
%id = call i32 @llvm.amdgcn.workgroup.id.z()
store i32 %id, ptr addrspace(1) %out
ret void
}
define void @test_workgroup_id_z_non_kernel_optimized_fixed(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,1,2" {
; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel_optimized_fixed:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 15
; GFX1250-SDAG-NEXT: s_bfe_u32 s1, ttmp6, 0x40008
; GFX1250-SDAG-NEXT: s_and_b32 s0, s0, 0x1fffe
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s1, s0
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel_optimized_fixed:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: s_lshr_b32 s0, ttmp7, 16
; GFX1250-GISEL-NEXT: s_bfe_u32 s1, ttmp6, 0x40008
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1250-GISEL-NEXT: s_lshl1_add_u32 s0, s0, s1
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
%id = call i32 @llvm.amdgcn.workgroup.id.z()
store i32 %id, ptr addrspace(1) %out
ret void
}
declare i32 @llvm.amdgcn.workgroup.id.x()
declare i32 @llvm.amdgcn.workgroup.id.y()
declare i32 @llvm.amdgcn.workgroup.id.z()