| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck %s |
| |
| define amdgpu_kernel void @delay_alu() { |
| ; CHECK-LABEL: delay_alu: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; CHECK-NEXT: v_add_nc_u32_e32 v0, 0xff, v0 |
| ; CHECK-NEXT: v_lshrrev_b32_e32 v0, 8, v0 |
| ; CHECK-NEXT: buffer_store_b32 v0, v0, s[0:3], null offen |
| ; CHECK-NEXT: s_endpgm |
| entry: |
| %15 = tail call i32 @llvm.amdgcn.workitem.id.x() |
| %16 = add i32 %15, 255 |
| %17 = sdiv i32 %16, 256 |
| tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %17, ptr addrspace(8) poison, i32 poison, i32 0, i32 0) |
| ret void |
| } |
| |
| define amdgpu_kernel void @delay_alu_waves_1_8() "amdgpu-waves-per-eu"="1,8" { |
| ; CHECK-LABEL: delay_alu_waves_1_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; CHECK-NEXT: v_add_nc_u32_e32 v0, 0xff, v0 |
| ; CHECK-NEXT: v_lshrrev_b32_e32 v0, 8, v0 |
| ; CHECK-NEXT: buffer_store_b32 v0, v0, s[0:3], null offen |
| ; CHECK-NEXT: s_endpgm |
| entry: |
| %15 = tail call i32 @llvm.amdgcn.workitem.id.x() |
| %16 = add i32 %15, 255 |
| %17 = sdiv i32 %16, 256 |
| tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %17, ptr addrspace(8) poison, i32 poison, i32 0, i32 0) |
| ret void |
| } |
| |
| define amdgpu_kernel void @delay_alu_waves_1_1() "amdgpu-waves-per-eu"="1,1" { |
| ; CHECK-LABEL: delay_alu_waves_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; CHECK-NEXT: v_add_nc_u32_e32 v0, 0xff, v0 |
| ; CHECK-NEXT: v_lshrrev_b32_e32 v0, 8, v0 |
| ; CHECK-NEXT: buffer_store_b32 v0, v0, s[0:3], null offen |
| ; CHECK-NEXT: s_endpgm |
| entry: |
| %15 = tail call i32 @llvm.amdgcn.workitem.id.x() |
| %16 = add i32 %15, 255 |
| %17 = sdiv i32 %16, 256 |
| tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %17, ptr addrspace(8) poison, i32 poison, i32 0, i32 0) |
| ret void |
| } |
| |
| define amdgpu_kernel void @delay_alu_waves_1_1_wgsize() "amdgpu-flat-work-group-size"="1,128" "amdgpu-waves-per-eu"="1,1" { |
| ; CHECK-LABEL: delay_alu_waves_1_1_wgsize: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| ; CHECK-NEXT: v_add_nc_u32_e32 v0, 0xff, v0 |
| ; CHECK-NEXT: v_lshrrev_b32_e32 v0, 8, v0 |
| ; CHECK-NEXT: buffer_store_b32 v0, v0, s[0:3], null offen |
| ; CHECK-NEXT: s_endpgm |
| entry: |
| %15 = tail call i32 @llvm.amdgcn.workitem.id.x() |
| %16 = add i32 %15, 255 |
| %17 = sdiv i32 %16, 256 |
| tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %17, ptr addrspace(8) poison, i32 poison, i32 0, i32 0) |
| ret void |
| } |