| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "load" --filter-out "store" --version 6 |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX6,GFX6-SDAG %s |
| ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX6,GFX6-GISEL %s |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s |
| ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10,GFX10-SDAG %s |
| ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10,GFX10-GISEL %s |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11,GFX11-SDAG %s |
| ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11,GFX11-GISEL %s |
| |
| ; Minimum offset |
| define amdgpu_kernel void @gws_init_offset0(i32 %val) #0 { |
| ; GFX6-LABEL: gws_init_offset0: |
| ; GFX6: ; %bb.0: |
| ; GFX6: s_mov_b32 m0, 0 |
| ; GFX6: s_waitcnt lgkmcnt(0) |
| ; GFX6: v_mov_b32_e32 v0, s0 |
| ; GFX6: .LBB0_1: ; =>This Inner Loop Header: Depth=1 |
| ; GFX6: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0 |
| ; GFX6: ds_gws_init v0 gds |
| ; GFX6: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6: s_getreg_b32 s0, hwreg(HW_REG_TRAPSTS, 8, 1) |
| ; GFX6: s_cmp_lg_u32 s0, 0 |
| ; GFX6: s_cbranch_scc1 .LBB0_1 |
| ; GFX6: ; %bb.2: |
| ; GFX6: s_endpgm |
| ; |
| ; GCN-LABEL: gws_init_offset0: |
| ; GCN: ; %bb.0: |
| ; GCN: s_mov_b32 m0, 0 |
| ; GCN: s_waitcnt lgkmcnt(0) |
| ; GCN: v_mov_b32_e32 v0, s0 |
| ; GCN: ds_gws_init v0 gds |
| ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN: s_endpgm |
| call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 0) |
| ret void |
| } |
| |
| ; Maximum offset |
| define amdgpu_kernel void @gws_init_offset63(i32 %val) #0 { |
| ; GFX6-LABEL: gws_init_offset63: |
| ; GFX6: ; %bb.0: |
| ; GFX6: s_mov_b32 m0, 0 |
| ; GFX6: s_waitcnt lgkmcnt(0) |
| ; GFX6: v_mov_b32_e32 v0, s0 |
| ; GFX6: .LBB1_1: ; =>This Inner Loop Header: Depth=1 |
| ; GFX6: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0 |
| ; GFX6: ds_gws_init v0 offset:63 gds |
| ; GFX6: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6: s_getreg_b32 s0, hwreg(HW_REG_TRAPSTS, 8, 1) |
| ; GFX6: s_cmp_lg_u32 s0, 0 |
| ; GFX6: s_cbranch_scc1 .LBB1_1 |
| ; GFX6: ; %bb.2: |
| ; GFX6: s_endpgm |
| ; |
| ; GCN-LABEL: gws_init_offset63: |
| ; GCN: ; %bb.0: |
| ; GCN: s_mov_b32 m0, 0 |
| ; GCN: s_waitcnt lgkmcnt(0) |
| ; GCN: v_mov_b32_e32 v0, s0 |
| ; GCN: ds_gws_init v0 offset:63 gds |
| ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN: s_endpgm |
| call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 63) |
| ret void |
| } |
| |
| ; FIXME: Should be able to shift directly into m0 |
| define amdgpu_kernel void @gws_init_sgpr_offset(i32 %val, i32 %offset) #0 { |
| ; GFX6-LABEL: gws_init_sgpr_offset: |
| ; GFX6: ; %bb.0: |
| ; GFX6: s_waitcnt lgkmcnt(0) |
| ; GFX6: v_mov_b32_e32 v0, s0 |
| ; GFX6: s_lshl_b32 m0, s1, 16 |
| ; GFX6: .LBB2_1: ; =>This Inner Loop Header: Depth=1 |
| ; GFX6: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0 |
| ; GFX6: ds_gws_init v0 gds |
| ; GFX6: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6: s_getreg_b32 s0, hwreg(HW_REG_TRAPSTS, 8, 1) |
| ; GFX6: s_cmp_lg_u32 s0, 0 |
| ; GFX6: s_cbranch_scc1 .LBB2_1 |
| ; GFX6: ; %bb.2: |
| ; GFX6: s_endpgm |
| ; |
| ; GFX9-SDAG-LABEL: gws_init_sgpr_offset: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG: s_waitcnt lgkmcnt(0) |
| ; GFX9-SDAG: s_lshl_b32 m0, s1, 16 |
| ; GFX9-SDAG: v_mov_b32_e32 v0, s0 |
| ; GFX9-SDAG: ds_gws_init v0 gds |
| ; GFX9-SDAG: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG: s_endpgm |
| ; |
| ; GFX9-GISEL-LABEL: gws_init_sgpr_offset: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL: s_waitcnt lgkmcnt(0) |
| ; GFX9-GISEL: v_mov_b32_e32 v0, s0 |
| ; GFX9-GISEL: s_lshl_b32 m0, s1, 16 |
| ; GFX9-GISEL: s_nop 0 |
| ; GFX9-GISEL: ds_gws_init v0 gds |
| ; GFX9-GISEL: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL: s_endpgm |
| ; |
| ; GFX1011-LABEL: gws_init_sgpr_offset: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011: s_waitcnt lgkmcnt(0) |
| ; GFX1011: v_mov_b32_e32 v0, s0 |
| ; GFX1011: s_lshl_b32 m0, s1, 16 |
| ; GFX1011: ds_gws_init v0 gds |
| ; GFX1011: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011: s_endpgm |
| call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %offset) |
| ret void |
| } |
| |
| ; Variable offset in SGPR with constant add |
| define amdgpu_kernel void @gws_init_sgpr_offset_add1(i32 %val, i32 %offset.base) #0 { |
| ; GFX6-LABEL: gws_init_sgpr_offset_add1: |
| ; GFX6: ; %bb.0: |
| ; GFX6: s_waitcnt lgkmcnt(0) |
| ; GFX6: v_mov_b32_e32 v0, s0 |
| ; GFX6: s_lshl_b32 m0, s1, 16 |
| ; GFX6: .LBB3_1: ; =>This Inner Loop Header: Depth=1 |
| ; GFX6: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0 |
| ; GFX6: ds_gws_init v0 offset:1 gds |
| ; GFX6: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6: s_getreg_b32 s0, hwreg(HW_REG_TRAPSTS, 8, 1) |
| ; GFX6: s_cmp_lg_u32 s0, 0 |
| ; GFX6: s_cbranch_scc1 .LBB3_1 |
| ; GFX6: ; %bb.2: |
| ; GFX6: s_endpgm |
| ; |
| ; GFX9-SDAG-LABEL: gws_init_sgpr_offset_add1: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG: s_waitcnt lgkmcnt(0) |
| ; GFX9-SDAG: s_lshl_b32 m0, s1, 16 |
| ; GFX9-SDAG: v_mov_b32_e32 v0, s0 |
| ; GFX9-SDAG: ds_gws_init v0 offset:1 gds |
| ; GFX9-SDAG: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG: s_endpgm |
| ; |
| ; GFX9-GISEL-LABEL: gws_init_sgpr_offset_add1: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL: s_waitcnt lgkmcnt(0) |
| ; GFX9-GISEL: v_mov_b32_e32 v0, s0 |
| ; GFX9-GISEL: s_lshl_b32 m0, s1, 16 |
| ; GFX9-GISEL: s_nop 0 |
| ; GFX9-GISEL: ds_gws_init v0 offset:1 gds |
| ; GFX9-GISEL: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL: s_endpgm |
| ; |
| ; GFX1011-LABEL: gws_init_sgpr_offset_add1: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011: s_waitcnt lgkmcnt(0) |
| ; GFX1011: v_mov_b32_e32 v0, s0 |
| ; GFX1011: s_lshl_b32 m0, s1, 16 |
| ; GFX1011: ds_gws_init v0 offset:1 gds |
| ; GFX1011: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011: s_endpgm |
| %offset = add i32 %offset.base, 1 |
| call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %offset) |
| ret void |
| } |
| |
| define amdgpu_kernel void @gws_init_vgpr_offset(i32 %val) #0 { |
| ; GFX6-LABEL: gws_init_vgpr_offset: |
| ; GFX6: ; %bb.0: |
| ; GFX6: v_readfirstlane_b32 s1, v0 |
| ; GFX6: s_lshl_b32 m0, s1, 16 |
| ; GFX6: s_waitcnt lgkmcnt(0) |
| ; GFX6: v_mov_b32_e32 v0, s0 |
| ; GFX6: .LBB4_1: ; =>This Inner Loop Header: Depth=1 |
| ; GFX6: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0 |
| ; GFX6: ds_gws_init v0 gds |
| ; GFX6: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6: s_getreg_b32 s0, hwreg(HW_REG_TRAPSTS, 8, 1) |
| ; GFX6: s_cmp_lg_u32 s0, 0 |
| ; GFX6: s_cbranch_scc1 .LBB4_1 |
| ; GFX6: ; %bb.2: |
| ; GFX6: s_endpgm |
| ; |
| ; GFX9-LABEL: gws_init_vgpr_offset: |
| ; GFX9: ; %bb.0: |
| ; GFX9: v_readfirstlane_b32 s1, v0 |
| ; GFX9: s_lshl_b32 m0, s1, 16 |
| ; GFX9: s_waitcnt lgkmcnt(0) |
| ; GFX9: v_mov_b32_e32 v0, s0 |
| ; GFX9: ds_gws_init v0 gds |
| ; GFX9: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9: s_endpgm |
| ; |
| ; GFX10-LABEL: gws_init_vgpr_offset: |
| ; GFX10: ; %bb.0: |
| ; GFX10: v_readfirstlane_b32 s1, v0 |
| ; GFX10: s_lshl_b32 m0, s1, 16 |
| ; GFX10: s_waitcnt lgkmcnt(0) |
| ; GFX10: v_mov_b32_e32 v0, s0 |
| ; GFX10: ds_gws_init v0 gds |
| ; GFX10: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10: s_endpgm |
| ; |
| ; GFX11-LABEL: gws_init_vgpr_offset: |
| ; GFX11: ; %bb.0: |
| ; GFX11: v_and_b32_e32 v0, 0x3ff, v0 |
| ; GFX11: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11: v_readfirstlane_b32 s1, v0 |
| ; GFX11: s_lshl_b32 m0, s1, 16 |
| ; GFX11: s_waitcnt lgkmcnt(0) |
| ; GFX11: v_mov_b32_e32 v0, s0 |
| ; GFX11: ds_gws_init v0 gds |
| ; GFX11: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11: s_endpgm |
| %vgpr.offset = call i32 @llvm.amdgcn.workitem.id.x() |
| call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %vgpr.offset) |
| ret void |
| } |
| |
| ; Variable offset in VGPR with constant add |
| define amdgpu_kernel void @gws_init_vgpr_offset_add(i32 %val) #0 { |
| ; GFX6-SDAG-LABEL: gws_init_vgpr_offset_add: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG: v_readfirstlane_b32 s1, v0 |
| ; GFX6-SDAG: s_lshl_b32 m0, s1, 16 |
| ; GFX6-SDAG: s_waitcnt lgkmcnt(0) |
| ; GFX6-SDAG: v_mov_b32_e32 v0, s0 |
| ; GFX6-SDAG: .LBB5_1: ; =>This Inner Loop Header: Depth=1 |
| ; GFX6-SDAG: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0 |
| ; GFX6-SDAG: ds_gws_init v0 offset:3 gds |
| ; GFX6-SDAG: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG: s_getreg_b32 s0, hwreg(HW_REG_TRAPSTS, 8, 1) |
| ; GFX6-SDAG: s_cmp_lg_u32 s0, 0 |
| ; GFX6-SDAG: s_cbranch_scc1 .LBB5_1 |
| ; GFX6-SDAG: ; %bb.2: |
| ; GFX6-SDAG: s_endpgm |
| ; |
| ; GFX6-GISEL-LABEL: gws_init_vgpr_offset_add: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL: v_add_i32_e32 v0, vcc, 3, v0 |
| ; GFX6-GISEL: v_readfirstlane_b32 s1, v0 |
| ; GFX6-GISEL: s_lshl_b32 m0, s1, 16 |
| ; GFX6-GISEL: s_waitcnt lgkmcnt(0) |
| ; GFX6-GISEL: v_mov_b32_e32 v0, s0 |
| ; GFX6-GISEL: .LBB5_1: ; =>This Inner Loop Header: Depth=1 |
| ; GFX6-GISEL: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0 |
| ; GFX6-GISEL: ds_gws_init v0 gds |
| ; GFX6-GISEL: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL: s_getreg_b32 s0, hwreg(HW_REG_TRAPSTS, 8, 1) |
| ; GFX6-GISEL: s_cmp_lg_u32 s0, 0 |
| ; GFX6-GISEL: s_cbranch_scc1 .LBB5_1 |
| ; GFX6-GISEL: ; %bb.2: |
| ; GFX6-GISEL: s_endpgm |
| ; |
| ; GFX9-SDAG-LABEL: gws_init_vgpr_offset_add: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG: v_readfirstlane_b32 s1, v0 |
| ; GFX9-SDAG: s_lshl_b32 m0, s1, 16 |
| ; GFX9-SDAG: s_waitcnt lgkmcnt(0) |
| ; GFX9-SDAG: v_mov_b32_e32 v0, s0 |
| ; GFX9-SDAG: ds_gws_init v0 offset:3 gds |
| ; GFX9-SDAG: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG: s_endpgm |
| ; |
| ; GFX9-GISEL-LABEL: gws_init_vgpr_offset_add: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL: v_add_u32_e32 v0, 3, v0 |
| ; GFX9-GISEL: v_readfirstlane_b32 s1, v0 |
| ; GFX9-GISEL: s_lshl_b32 m0, s1, 16 |
| ; GFX9-GISEL: s_waitcnt lgkmcnt(0) |
| ; GFX9-GISEL: v_mov_b32_e32 v0, s0 |
| ; GFX9-GISEL: ds_gws_init v0 gds |
| ; GFX9-GISEL: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL: s_endpgm |
| ; |
| ; GFX10-SDAG-LABEL: gws_init_vgpr_offset_add: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG: v_readfirstlane_b32 s1, v0 |
| ; GFX10-SDAG: s_lshl_b32 m0, s1, 16 |
| ; GFX10-SDAG: s_waitcnt lgkmcnt(0) |
| ; GFX10-SDAG: v_mov_b32_e32 v0, s0 |
| ; GFX10-SDAG: ds_gws_init v0 offset:3 gds |
| ; GFX10-SDAG: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG: s_endpgm |
| ; |
| ; GFX10-GISEL-LABEL: gws_init_vgpr_offset_add: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL: v_add_nc_u32_e32 v0, 3, v0 |
| ; GFX10-GISEL: v_readfirstlane_b32 s1, v0 |
| ; GFX10-GISEL: s_lshl_b32 m0, s1, 16 |
| ; GFX10-GISEL: s_waitcnt lgkmcnt(0) |
| ; GFX10-GISEL: v_mov_b32_e32 v0, s0 |
| ; GFX10-GISEL: ds_gws_init v0 gds |
| ; GFX10-GISEL: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL: s_endpgm |
| ; |
| ; GFX11-SDAG-LABEL: gws_init_vgpr_offset_add: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG: v_and_b32_e32 v0, 0x3ff, v0 |
| ; GFX11-SDAG: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-SDAG: v_readfirstlane_b32 s1, v0 |
| ; GFX11-SDAG: s_lshl_b32 m0, s1, 16 |
| ; GFX11-SDAG: s_waitcnt lgkmcnt(0) |
| ; GFX11-SDAG: v_mov_b32_e32 v0, s0 |
| ; GFX11-SDAG: ds_gws_init v0 offset:3 gds |
| ; GFX11-SDAG: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG: s_endpgm |
| ; |
| ; GFX11-GISEL-LABEL: gws_init_vgpr_offset_add: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL: v_and_b32_e32 v0, 0x3ff, v0 |
| ; GFX11-GISEL: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX11-GISEL: v_add_nc_u32_e32 v0, 3, v0 |
| ; GFX11-GISEL: v_readfirstlane_b32 s1, v0 |
| ; GFX11-GISEL: s_lshl_b32 m0, s1, 16 |
| ; GFX11-GISEL: s_waitcnt lgkmcnt(0) |
| ; GFX11-GISEL: v_mov_b32_e32 v0, s0 |
| ; GFX11-GISEL: ds_gws_init v0 gds |
| ; GFX11-GISEL: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL: s_endpgm |
| %vgpr.offset.base = call i32 @llvm.amdgcn.workitem.id.x() |
| %vgpr.offset = add i32 %vgpr.offset.base, 3 |
| call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %vgpr.offset) |
| ret void |
| } |
| |
| @lds = internal unnamed_addr addrspace(3) global i32 poison |
| |
| ; Check if m0 initialization is shared. |
| define amdgpu_kernel void @gws_init_save_m0_init_constant_offset(i32 %val) #0 { |
| ; GFX6-SDAG-LABEL: gws_init_save_m0_init_constant_offset: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG: v_mov_b32_e32 v1, 1 |
| ; GFX6-SDAG: v_mov_b32_e32 v0, 0 |
| ; GFX6-SDAG: s_mov_b32 m0, -1 |
| ; GFX6-SDAG: ds_write_b32 v0, v1 |
| ; GFX6-SDAG: s_waitcnt lgkmcnt(0) |
| ; GFX6-SDAG: v_mov_b32_e32 v1, s0 |
| ; GFX6-SDAG: s_mov_b32 m0, 0 |
| ; GFX6-SDAG: .LBB6_1: ; =>This Inner Loop Header: Depth=1 |
| ; GFX6-SDAG: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0 |
| ; GFX6-SDAG: ds_gws_init v1 offset:10 gds |
| ; GFX6-SDAG: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG: s_getreg_b32 s0, hwreg(HW_REG_TRAPSTS, 8, 1) |
| ; GFX6-SDAG: s_cmp_lg_u32 s0, 0 |
| ; GFX6-SDAG: s_cbranch_scc1 .LBB6_1 |
| ; GFX6-SDAG: ; %bb.2: |
| ; GFX6-SDAG: v_mov_b32_e32 v1, 2 |
| ; GFX6-SDAG: s_mov_b32 m0, -1 |
| ; GFX6-SDAG: ds_write_b32 v0, v1 |
| ; GFX6-SDAG: s_endpgm |
| ; |
| ; GFX6-GISEL-LABEL: gws_init_save_m0_init_constant_offset: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL: v_mov_b32_e32 v0, 1 |
| ; GFX6-GISEL: v_mov_b32_e32 v1, 0 |
| ; GFX6-GISEL: s_mov_b32 m0, -1 |
| ; GFX6-GISEL: ds_write_b32 v1, v0 |
| ; GFX6-GISEL: s_waitcnt lgkmcnt(0) |
| ; GFX6-GISEL: v_mov_b32_e32 v0, s0 |
| ; GFX6-GISEL: s_mov_b32 m0, 0 |
| ; GFX6-GISEL: .LBB6_1: ; =>This Inner Loop Header: Depth=1 |
| ; GFX6-GISEL: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0 |
| ; GFX6-GISEL: ds_gws_init v0 offset:10 gds |
| ; GFX6-GISEL: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL: s_getreg_b32 s0, hwreg(HW_REG_TRAPSTS, 8, 1) |
| ; GFX6-GISEL: s_cmp_lg_u32 s0, 0 |
| ; GFX6-GISEL: s_cbranch_scc1 .LBB6_1 |
| ; GFX6-GISEL: ; %bb.2: |
| ; GFX6-GISEL: v_mov_b32_e32 v0, 2 |
| ; GFX6-GISEL: v_mov_b32_e32 v1, 0 |
| ; GFX6-GISEL: s_mov_b32 m0, -1 |
| ; GFX6-GISEL: ds_write_b32 v1, v0 |
| ; GFX6-GISEL: s_endpgm |
| ; |
| ; GFX9-LABEL: gws_init_save_m0_init_constant_offset: |
| ; GFX9: ; %bb.0: |
| ; GFX9: v_mov_b32_e32 v0, 1 |
| ; GFX9: v_mov_b32_e32 v1, 0 |
| ; GFX9: ds_write_b32 v1, v0 |
| ; GFX9: s_mov_b32 m0, 0 |
| ; GFX9: s_waitcnt lgkmcnt(0) |
| ; GFX9: v_mov_b32_e32 v0, s0 |
| ; GFX9: ds_gws_init v0 offset:10 gds |
| ; GFX9: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9: v_mov_b32_e32 v0, 2 |
| ; GFX9: ds_write_b32 v1, v0 |
| ; GFX9: s_endpgm |
| ; |
| ; GFX10-LABEL: gws_init_save_m0_init_constant_offset: |
| ; GFX10: ; %bb.0: |
| ; GFX10: v_mov_b32_e32 v0, 1 |
| ; GFX10: v_mov_b32_e32 v1, 0 |
| ; GFX10: v_mov_b32_e32 v3, 2 |
| ; GFX10: s_mov_b32 m0, 0 |
| ; GFX10: ds_write_b32 v1, v0 |
| ; GFX10: s_waitcnt lgkmcnt(0) |
| ; GFX10: v_mov_b32_e32 v2, s0 |
| ; GFX10: ds_gws_init v2 offset:10 gds |
| ; GFX10: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10: ds_write_b32 v1, v3 |
| ; GFX10: s_endpgm |
| ; |
| ; GFX11-LABEL: gws_init_save_m0_init_constant_offset: |
| ; GFX11: ; %bb.0: |
| ; GFX11: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 0 |
| ; GFX11: s_mov_b32 m0, 0 |
| ; GFX11: v_mov_b32_e32 v3, 2 |
| ; GFX11: s_waitcnt lgkmcnt(0) |
| ; GFX11: v_mov_b32_e32 v2, s0 |
| ; GFX11: ds_gws_init v2 offset:10 gds |
| ; GFX11: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11: s_endpgm |
| store volatile i32 1, ptr addrspace(3) @lds |
| call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 10) |
| store i32 2, ptr addrspace(3) @lds |
| ret void |
| } |
| |
| define void @gws_init_lgkmcnt(i32 %val) { |
| ; GFX6-LABEL: gws_init_lgkmcnt: |
| ; GFX6: ; %bb.0: |
| ; GFX6: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6: s_mov_b32 m0, 0 |
| ; GFX6: .LBB7_1: ; =>This Inner Loop Header: Depth=1 |
| ; GFX6: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0 |
| ; GFX6: ds_gws_init v0 gds |
| ; GFX6: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6: s_getreg_b32 s4, hwreg(HW_REG_TRAPSTS, 8, 1) |
| ; GFX6: s_cmp_lg_u32 s4, 0 |
| ; GFX6: s_cbranch_scc1 .LBB7_1 |
| ; GFX6: ; %bb.2: |
| ; GFX6: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: gws_init_lgkmcnt: |
| ; GFX9: ; %bb.0: |
| ; GFX9: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9: s_mov_b32 m0, 0 |
| ; GFX9: s_nop 0 |
| ; GFX9: ds_gws_init v0 gds |
| ; GFX9: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: gws_init_lgkmcnt: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011: s_mov_b32 m0, 0 |
| ; GFX1011: ds_gws_init v0 gds |
| ; GFX1011: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011: s_setpc_b64 s[30:31] |
| call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 0) |
| ret void |
| } |
| |
| ; Does not imply memory fence on its own |
| define amdgpu_kernel void @gws_init_wait_before(i32 %val, ptr addrspace(1) %ptr) #0 { |
| ; GFX6-SDAG-LABEL: gws_init_wait_before: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG: s_mov_b32 s3, 0x100f000 |
| ; GFX6-SDAG: s_mov_b32 s2, -1 |
| ; GFX6-SDAG: v_mov_b32_e32 v0, 0 |
| ; GFX6-SDAG: s_waitcnt lgkmcnt(0) |
| ; GFX6-SDAG: s_waitcnt expcnt(0) |
| ; GFX6-SDAG: v_mov_b32_e32 v0, s4 |
| ; GFX6-SDAG: s_mov_b32 m0, 0 |
| ; GFX6-SDAG: .LBB8_1: ; =>This Inner Loop Header: Depth=1 |
| ; GFX6-SDAG: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0 |
| ; GFX6-SDAG: ds_gws_init v0 offset:7 gds |
| ; GFX6-SDAG: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG: s_getreg_b32 s0, hwreg(HW_REG_TRAPSTS, 8, 1) |
| ; GFX6-SDAG: s_cmp_lg_u32 s0, 0 |
| ; GFX6-SDAG: s_cbranch_scc1 .LBB8_1 |
| ; GFX6-SDAG: ; %bb.2: |
| ; GFX6-SDAG: s_endpgm |
| ; |
| ; GFX6-GISEL-LABEL: gws_init_wait_before: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL: v_mov_b32_e32 v0, 0 |
| ; GFX6-GISEL: s_mov_b32 s2, -1 |
| ; GFX6-GISEL: s_mov_b32 s3, 0x100f000 |
| ; GFX6-GISEL: s_waitcnt lgkmcnt(0) |
| ; GFX6-GISEL: s_waitcnt expcnt(0) |
| ; GFX6-GISEL: v_mov_b32_e32 v0, s4 |
| ; GFX6-GISEL: s_mov_b32 m0, 0 |
| ; GFX6-GISEL: .LBB8_1: ; =>This Inner Loop Header: Depth=1 |
| ; GFX6-GISEL: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0 |
| ; GFX6-GISEL: ds_gws_init v0 offset:7 gds |
| ; GFX6-GISEL: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL: s_getreg_b32 s0, hwreg(HW_REG_TRAPSTS, 8, 1) |
| ; GFX6-GISEL: s_cmp_lg_u32 s0, 0 |
| ; GFX6-GISEL: s_cbranch_scc1 .LBB8_1 |
| ; GFX6-GISEL: ; %bb.2: |
| ; GFX6-GISEL: s_endpgm |
| ; |
| ; GFX9-LABEL: gws_init_wait_before: |
| ; GFX9: ; %bb.0: |
| ; GFX9: v_mov_b32_e32 v0, 0 |
| ; GFX9: s_mov_b32 m0, 0 |
| ; GFX9: s_waitcnt lgkmcnt(0) |
| ; GFX9: v_mov_b32_e32 v0, s2 |
| ; GFX9: ds_gws_init v0 offset:7 gds |
| ; GFX9: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9: s_endpgm |
| ; |
| ; GFX1011-LABEL: gws_init_wait_before: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011: s_clause 0x1 |
| ; GFX1011: v_mov_b32_e32 v0, 0 |
| ; GFX1011: s_mov_b32 m0, 0 |
| ; GFX1011: s_waitcnt lgkmcnt(0) |
| ; GFX1011: v_mov_b32_e32 v1, s2 |
| ; GFX1011: ds_gws_init v1 offset:7 gds |
| ; GFX1011: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011: s_endpgm |
| store i32 0, ptr addrspace(1) %ptr |
| call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 7) |
| ret void |
| } |
| |
| declare void @llvm.amdgcn.ds.gws.init(i32, i32) #1 |
| declare i32 @llvm.amdgcn.workitem.id.x() #2 |
| |
| attributes #0 = { nounwind } |
| attributes #1 = { convergent inaccessiblememonly nounwind writeonly } |
| attributes #2 = { nounwind readnone speculatable } |