| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; gfx8 required knowing no overflow happened to fold the addressing mode |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s |
| |
| define void @gep_noflags_alloca(i32 %idx, i32 %val) #0 { |
| ; GFX8-LABEL: gep_noflags_alloca: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0 |
| ; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: gep_noflags_alloca: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32 |
| ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2 |
| ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %alloca = alloca [32 x i32], addrspace(5) |
| %gep0 = getelementptr [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx |
| %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4 |
| store volatile i32 %val, ptr addrspace(5) %gep1 |
| ret void |
| } |
| |
| define void @gep_inbounds_alloca(i32 %idx, i32 %val) #0 { |
| ; GFX8-LABEL: gep_inbounds_alloca: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0 |
| ; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: gep_inbounds_alloca: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32 |
| ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2 |
| ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %alloca = alloca [32 x i32], addrspace(5) |
| %gep0 = getelementptr inbounds [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx |
| %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4 |
| store volatile i32 %val, ptr addrspace(5) %gep1 |
| ret void |
| } |
| |
| define void @gep_nuw_alloca(i32 %idx, i32 %val) #0 { |
| ; GFX8-LABEL: gep_nuw_alloca: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0 |
| ; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: gep_nuw_alloca: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32 |
| ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2 |
| ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %alloca = alloca [32 x i32], addrspace(5) |
| %gep0 = getelementptr nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx |
| %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4 |
| store volatile i32 %val, ptr addrspace(5) %gep1 |
| ret void |
| } |
| |
| define void @gep_nusw_alloca(i32 %idx, i32 %val) #0 { |
| ; GFX8-LABEL: gep_nusw_alloca: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0 |
| ; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: gep_nusw_alloca: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32 |
| ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2 |
| ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %alloca = alloca [32 x i32], addrspace(5) |
| %gep0 = getelementptr nusw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx |
| %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4 |
| store volatile i32 %val, ptr addrspace(5) %gep1 |
| ret void |
| } |
| |
| define void @gep_inbounds_nuw_alloca(i32 %idx, i32 %val) #0 { |
| ; GFX8-LABEL: gep_inbounds_nuw_alloca: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0 |
| ; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: gep_inbounds_nuw_alloca: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32 |
| ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2 |
| ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %alloca = alloca [32 x i32], addrspace(5) |
| %gep0 = getelementptr inbounds nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx |
| %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4 |
| store volatile i32 %val, ptr addrspace(5) %gep1 |
| ret void |
| } |
| |
| define void @gep_nusw_nuw_alloca(i32 %idx, i32 %val) #0 { |
| ; GFX8-LABEL: gep_nusw_nuw_alloca: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0 |
| ; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: gep_nusw_nuw_alloca: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32 |
| ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2 |
| ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %alloca = alloca [32 x i32], addrspace(5) |
| %gep0 = getelementptr nusw nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx |
| %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4 |
| store volatile i32 %val, ptr addrspace(5) %gep1 |
| ret void |
| } |
| |
| define void @gep_inbounds_nuw_alloca_nonpow2_scale(i32 %idx, i32 %val) #0 { |
| ; GFX8-LABEL: gep_inbounds_nuw_alloca_nonpow2_scale: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_movk_i32 s4, 0x84 |
| ; GFX8-NEXT: v_mul_lo_u32 v0, v0, s4 |
| ; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0 |
| ; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: gep_inbounds_nuw_alloca_nonpow2_scale: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_movk_i32 s4, 0x84 |
| ; GFX9-NEXT: v_mul_lo_u32 v0, v0, s4 |
| ; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32 |
| ; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 |
| ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %alloca = alloca [5 x [33 x i32]], align 4, addrspace(5) |
| %gep1 = getelementptr inbounds nuw [5 x [33 x i32]], ptr addrspace(5) %alloca, i32 0, i32 %idx, i32 4 |
| store volatile i32 %val, ptr addrspace(5) %gep1, align 4 |
| ret void |
| } |
| |
| attributes #0 = { nounwind } |