| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s |
| |
| ; LDS is allocated per-kernel. Module scope variables are gathered into a struct which is |
| ; allocated at address zero, if used by the kernel. Kernel scope variables are gathered into |
| ; a per-kernel struct and allocated immediately after the module scope. |
| ; This test checks that the module and kernel scope variables are allocated in deterministic |
| ; order without spurious alignment padding between the two |
| |
| ; External LDS is checked because it influences LDS padding in general and because it will |
| ; not be moved into either module or kernel struct |
| |
| @module_variable = addrspace(3) global i16 poison |
| |
| ; Variables are allocated into module scope block when used by a non-kernel function |
| define void @use_module() #0 { |
| ; CHECK-LABEL: use_module: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-NEXT: ds_write_b16 v0, v0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| store i16 0, ptr addrspace(3) @module_variable |
| ret void |
| } |
| |
| ; Variables only used by kernels are specialised and allocated per-kernel |
| @kernel_normal = addrspace(3) global i16 poison |
| @kernel_overalign = addrspace(3) global i16 poison, align 4 |
| |
| ; External LDS shall not introduce padding between module and kernel scope variables |
| @extern_normal = external addrspace(3) global [0 x float] |
| @extern_overalign = external addrspace(3) global [0 x float], align 8 |
| |
| |
| ; External LDS does not influence the frame when called indirectly either |
| define void @use_extern_normal() #0 { |
| ; CHECK-LABEL: use_extern_normal: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_getpc_b64 s[6:7] |
| ; CHECK-NEXT: s_add_u32 s6, s6, llvm.amdgcn.dynlds.offset.table@rel32@lo+4 |
| ; CHECK-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.dynlds.offset.table@rel32@hi+12 |
| ; CHECK-NEXT: s_mov_b32 s4, s15 |
| ; CHECK-NEXT: s_ashr_i32 s5, s15, 31 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0x4048f5c3 |
| ; CHECK-NEXT: s_lshl_b64 s[4:5], s[4:5], 2 |
| ; CHECK-NEXT: s_add_u32 s4, s4, s6 |
| ; CHECK-NEXT: s_addc_u32 s5, s5, s7 |
| ; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s4 |
| ; CHECK-NEXT: ds_write_b32 v1, v0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| %arrayidx = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 0 |
| store float 0x40091EB860000000, ptr addrspace(3) %arrayidx |
| ret void |
| } |
| |
| define void @use_extern_overalign() #0 { |
| ; CHECK-LABEL: use_extern_overalign: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_getpc_b64 s[6:7] |
| ; CHECK-NEXT: s_add_u32 s6, s6, llvm.amdgcn.dynlds.offset.table@rel32@lo+4 |
| ; CHECK-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.dynlds.offset.table@rel32@hi+12 |
| ; CHECK-NEXT: s_mov_b32 s4, s15 |
| ; CHECK-NEXT: s_ashr_i32 s5, s15, 31 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0x42280000 |
| ; CHECK-NEXT: s_lshl_b64 s[4:5], s[4:5], 2 |
| ; CHECK-NEXT: s_add_u32 s4, s4, s6 |
| ; CHECK-NEXT: s_addc_u32 s5, s5, s7 |
| ; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s4 |
| ; CHECK-NEXT: ds_write_b32 v1, v0 offset:4 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| %arrayidx = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 1 |
| store float 4.200000e+01, ptr addrspace(3) %arrayidx |
| ret void |
| } |
| |
| |
| ; First 2^3 of 2^4 cases encoded into function names |
| ; no use of extern variable from nested function |
| ; module_variable used/not-used |
| ; kernel variable normal/overaligned |
| ; extern variable normal/overaligned |
| |
| define amdgpu_kernel void @module_0_kernel_normal_extern_normal(i32 %idx) { |
| ; CHECK-LABEL: module_0_kernel_normal_extern_normal: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dword s0, s[8:9], 0x0 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_lshl_b32 s0, s0, 2 |
| ; CHECK-NEXT: s_add_i32 s0, s0, 4 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, s0 |
| ; CHECK-NEXT: ds_write_b16 v1, v0 |
| ; CHECK-NEXT: ds_write_b32 v2, v1 |
| ; CHECK-NEXT: s_endpgm |
| store i16 2, ptr addrspace(3) @kernel_normal |
| |
| %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 %idx |
| store float 0.0, ptr addrspace(3) %arrayidx1 |
| ret void |
| } |
| |
| define amdgpu_kernel void @module_1_kernel_normal_extern_normal(i32 %idx) { |
| ; CHECK-LABEL: module_1_kernel_normal_extern_normal: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_add_u32 s12, s12, s17 |
| ; CHECK-NEXT: s_mov_b32 s32, 0 |
| ; CHECK-NEXT: s_addc_u32 s13, s13, 0 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 |
| ; CHECK-NEXT: s_add_u32 s0, s0, s17 |
| ; CHECK-NEXT: s_addc_u32 s1, s1, 0 |
| ; CHECK-NEXT: s_add_u32 s12, s8, 8 |
| ; CHECK-NEXT: s_addc_u32 s13, s9, 0 |
| ; CHECK-NEXT: s_getpc_b64 s[18:19] |
| ; CHECK-NEXT: s_add_u32 s18, s18, use_module@gotpcrel32@lo+4 |
| ; CHECK-NEXT: s_addc_u32 s19, s19, use_module@gotpcrel32@hi+12 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
| ; CHECK-NEXT: s_load_dwordx2 s[20:21], s[18:19], 0x0 |
| ; CHECK-NEXT: s_load_dword s17, s[8:9], 0x0 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; CHECK-NEXT: s_mov_b64 s[8:9], s[12:13] |
| ; CHECK-NEXT: s_mov_b32 s12, s14 |
| ; CHECK-NEXT: s_mov_b32 s13, s15 |
| ; CHECK-NEXT: s_mov_b32 s14, s16 |
| ; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_swappc_b64 s[30:31], s[20:21] |
| ; CHECK-NEXT: s_lshl_b32 s4, s17, 2 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 1 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 0 |
| ; CHECK-NEXT: s_add_i32 s4, s4, 4 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 2 |
| ; CHECK-NEXT: v_mov_b32_e32 v3, s4 |
| ; CHECK-NEXT: ds_write_b16 v1, v0 |
| ; CHECK-NEXT: ds_write_b16 v1, v2 offset:2 |
| ; CHECK-NEXT: ds_write_b32 v3, v1 |
| ; CHECK-NEXT: s_endpgm |
| call void @use_module() |
| store i16 1, ptr addrspace(3) @module_variable |
| |
| store i16 2, ptr addrspace(3) @kernel_normal |
| |
| %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 %idx |
| store float 0.0, ptr addrspace(3) %arrayidx1 |
| ret void |
| } |
| |
| define amdgpu_kernel void @module_0_kernel_overalign_extern_normal(i32 %idx) { |
| ; CHECK-LABEL: module_0_kernel_overalign_extern_normal: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dword s0, s[8:9], 0x0 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_lshl_b32 s0, s0, 2 |
| ; CHECK-NEXT: s_add_i32 s0, s0, 4 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, s0 |
| ; CHECK-NEXT: ds_write_b16 v1, v0 |
| ; CHECK-NEXT: ds_write_b32 v2, v1 |
| ; CHECK-NEXT: s_endpgm |
| store i16 2, ptr addrspace(3) @kernel_overalign |
| |
| %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 %idx |
| store float 0.0, ptr addrspace(3) %arrayidx1 |
| ret void |
| } |
| |
| define amdgpu_kernel void @module_1_kernel_overalign_extern_normal(i32 %idx) { |
| ; CHECK-LABEL: module_1_kernel_overalign_extern_normal: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_add_u32 s12, s12, s17 |
| ; CHECK-NEXT: s_mov_b32 s32, 0 |
| ; CHECK-NEXT: s_addc_u32 s13, s13, 0 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 |
| ; CHECK-NEXT: s_add_u32 s0, s0, s17 |
| ; CHECK-NEXT: s_addc_u32 s1, s1, 0 |
| ; CHECK-NEXT: s_add_u32 s12, s8, 8 |
| ; CHECK-NEXT: s_addc_u32 s13, s9, 0 |
| ; CHECK-NEXT: s_getpc_b64 s[18:19] |
| ; CHECK-NEXT: s_add_u32 s18, s18, use_module@gotpcrel32@lo+4 |
| ; CHECK-NEXT: s_addc_u32 s19, s19, use_module@gotpcrel32@hi+12 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
| ; CHECK-NEXT: s_load_dwordx2 s[20:21], s[18:19], 0x0 |
| ; CHECK-NEXT: s_load_dword s17, s[8:9], 0x0 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; CHECK-NEXT: s_mov_b64 s[8:9], s[12:13] |
| ; CHECK-NEXT: s_mov_b32 s12, s14 |
| ; CHECK-NEXT: s_mov_b32 s13, s15 |
| ; CHECK-NEXT: s_mov_b32 s14, s16 |
| ; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_swappc_b64 s[30:31], s[20:21] |
| ; CHECK-NEXT: s_lshl_b32 s4, s17, 2 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 1 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 0 |
| ; CHECK-NEXT: s_add_i32 s4, s4, 8 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 2 |
| ; CHECK-NEXT: v_mov_b32_e32 v3, s4 |
| ; CHECK-NEXT: ds_write_b16 v1, v0 |
| ; CHECK-NEXT: ds_write_b16 v1, v2 offset:4 |
| ; CHECK-NEXT: ds_write_b32 v3, v1 |
| ; CHECK-NEXT: s_endpgm |
| call void @use_module() |
| store i16 1, ptr addrspace(3) @module_variable |
| |
| store i16 2, ptr addrspace(3) @kernel_overalign |
| |
| %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 %idx |
| store float 0.0, ptr addrspace(3) %arrayidx1 |
| ret void |
| } |
| |
| define amdgpu_kernel void @module_0_kernel_normal_extern_overalign(i32 %idx) { |
| ; CHECK-LABEL: module_0_kernel_normal_extern_overalign: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dword s0, s[8:9], 0x0 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_lshl_b32 s0, s0, 2 |
| ; CHECK-NEXT: s_add_i32 s0, s0, 8 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, s0 |
| ; CHECK-NEXT: ds_write_b16 v1, v0 |
| ; CHECK-NEXT: ds_write_b32 v2, v1 |
| ; CHECK-NEXT: s_endpgm |
| store i16 2, ptr addrspace(3) @kernel_normal |
| |
| %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 %idx |
| store float 0.0, ptr addrspace(3) %arrayidx1 |
| ret void |
| } |
| |
| define amdgpu_kernel void @module_1_kernel_normal_extern_overalign(i32 %idx) { |
| ; CHECK-LABEL: module_1_kernel_normal_extern_overalign: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_add_u32 s12, s12, s17 |
| ; CHECK-NEXT: s_mov_b32 s32, 0 |
| ; CHECK-NEXT: s_addc_u32 s13, s13, 0 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 |
| ; CHECK-NEXT: s_add_u32 s0, s0, s17 |
| ; CHECK-NEXT: s_addc_u32 s1, s1, 0 |
| ; CHECK-NEXT: s_add_u32 s12, s8, 8 |
| ; CHECK-NEXT: s_addc_u32 s13, s9, 0 |
| ; CHECK-NEXT: s_getpc_b64 s[18:19] |
| ; CHECK-NEXT: s_add_u32 s18, s18, use_module@gotpcrel32@lo+4 |
| ; CHECK-NEXT: s_addc_u32 s19, s19, use_module@gotpcrel32@hi+12 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
| ; CHECK-NEXT: s_load_dwordx2 s[20:21], s[18:19], 0x0 |
| ; CHECK-NEXT: s_load_dword s17, s[8:9], 0x0 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; CHECK-NEXT: s_mov_b64 s[8:9], s[12:13] |
| ; CHECK-NEXT: s_mov_b32 s12, s14 |
| ; CHECK-NEXT: s_mov_b32 s13, s15 |
| ; CHECK-NEXT: s_mov_b32 s14, s16 |
| ; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_swappc_b64 s[30:31], s[20:21] |
| ; CHECK-NEXT: s_lshl_b32 s4, s17, 2 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 1 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 0 |
| ; CHECK-NEXT: s_add_i32 s4, s4, 8 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 2 |
| ; CHECK-NEXT: v_mov_b32_e32 v3, s4 |
| ; CHECK-NEXT: ds_write_b16 v1, v0 |
| ; CHECK-NEXT: ds_write_b16 v1, v2 offset:2 |
| ; CHECK-NEXT: ds_write_b32 v3, v1 |
| ; CHECK-NEXT: s_endpgm |
| call void @use_module() |
| store i16 1, ptr addrspace(3) @module_variable |
| |
| store i16 2, ptr addrspace(3) @kernel_normal |
| |
| %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 %idx |
| store float 0.0, ptr addrspace(3) %arrayidx1 |
| ret void |
| } |
| |
| define amdgpu_kernel void @module_0_kernel_overalign_extern_overalign(i32 %idx) { |
| ; CHECK-LABEL: module_0_kernel_overalign_extern_overalign: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_load_dword s0, s[8:9], 0x0 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 2 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_lshl_b32 s0, s0, 2 |
| ; CHECK-NEXT: s_add_i32 s0, s0, 8 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, s0 |
| ; CHECK-NEXT: ds_write_b16 v1, v0 |
| ; CHECK-NEXT: ds_write_b32 v2, v1 |
| ; CHECK-NEXT: s_endpgm |
| store i16 2, ptr addrspace(3) @kernel_overalign |
| |
| %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 %idx |
| store float 0.0, ptr addrspace(3) %arrayidx1 |
| ret void |
| } |
| |
| define amdgpu_kernel void @module_1_kernel_overalign_extern_overalign(i32 %idx) { |
| ; CHECK-LABEL: module_1_kernel_overalign_extern_overalign: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_add_u32 s12, s12, s17 |
| ; CHECK-NEXT: s_mov_b32 s32, 0 |
| ; CHECK-NEXT: s_addc_u32 s13, s13, 0 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 |
| ; CHECK-NEXT: s_add_u32 s0, s0, s17 |
| ; CHECK-NEXT: s_addc_u32 s1, s1, 0 |
| ; CHECK-NEXT: s_add_u32 s12, s8, 8 |
| ; CHECK-NEXT: s_addc_u32 s13, s9, 0 |
| ; CHECK-NEXT: s_getpc_b64 s[18:19] |
| ; CHECK-NEXT: s_add_u32 s18, s18, use_module@gotpcrel32@lo+4 |
| ; CHECK-NEXT: s_addc_u32 s19, s19, use_module@gotpcrel32@hi+12 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
| ; CHECK-NEXT: s_load_dwordx2 s[20:21], s[18:19], 0x0 |
| ; CHECK-NEXT: s_load_dword s17, s[8:9], 0x0 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; CHECK-NEXT: s_mov_b64 s[8:9], s[12:13] |
| ; CHECK-NEXT: s_mov_b32 s12, s14 |
| ; CHECK-NEXT: s_mov_b32 s13, s15 |
| ; CHECK-NEXT: s_mov_b32 s14, s16 |
| ; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_swappc_b64 s[30:31], s[20:21] |
| ; CHECK-NEXT: s_lshl_b32 s4, s17, 2 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 1 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 0 |
| ; CHECK-NEXT: s_add_i32 s4, s4, 8 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 2 |
| ; CHECK-NEXT: v_mov_b32_e32 v3, s4 |
| ; CHECK-NEXT: ds_write_b16 v1, v0 |
| ; CHECK-NEXT: ds_write_b16 v1, v2 offset:4 |
| ; CHECK-NEXT: ds_write_b32 v3, v1 |
| ; CHECK-NEXT: s_endpgm |
| call void @use_module() |
| store i16 1, ptr addrspace(3) @module_variable |
| |
| store i16 2, ptr addrspace(3) @kernel_overalign |
| |
| %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 %idx |
| store float 0.0, ptr addrspace(3) %arrayidx1 |
| ret void |
| } |
| |
| |
| ;; Second 2^3 of 2^4 cases encoded into function names |
| ; with extern variable from nested function |
| ; module_variable used/not-used |
| ; kernel variable normal/overaligned |
| ; extern variable normal/overaligned |
| |
| define amdgpu_kernel void @module_0_kernel_normal_indirect_extern_normal(i32 %idx) { |
| ; CHECK-LABEL: module_0_kernel_normal_indirect_extern_normal: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_add_u32 s12, s12, s17 |
| ; CHECK-NEXT: s_mov_b32 s32, 0 |
| ; CHECK-NEXT: s_addc_u32 s13, s13, 0 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 |
| ; CHECK-NEXT: s_add_u32 s0, s0, s17 |
| ; CHECK-NEXT: s_addc_u32 s1, s1, 0 |
| ; CHECK-NEXT: s_add_u32 s8, s8, 8 |
| ; CHECK-NEXT: s_addc_u32 s9, s9, 0 |
| ; CHECK-NEXT: s_mov_b32 s13, s15 |
| ; CHECK-NEXT: s_mov_b32 s12, s14 |
| ; CHECK-NEXT: s_getpc_b64 s[14:15] |
| ; CHECK-NEXT: s_add_u32 s14, s14, use_extern_normal@gotpcrel32@lo+4 |
| ; CHECK-NEXT: s_addc_u32 s15, s15, use_extern_normal@gotpcrel32@hi+12 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
| ; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; CHECK-NEXT: v_mov_b32_e32 v3, 2 |
| ; CHECK-NEXT: v_mov_b32_e32 v4, 0 |
| ; CHECK-NEXT: s_mov_b32 s14, s16 |
| ; CHECK-NEXT: s_mov_b32 s15, 0 |
| ; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 |
| ; CHECK-NEXT: ds_write_b16 v4, v3 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] |
| ; CHECK-NEXT: s_endpgm |
| store i16 2, ptr addrspace(3) @kernel_normal |
| |
| call void @use_extern_normal() |
| ret void |
| } |
| |
| define amdgpu_kernel void @module_1_kernel_normal_indirect_extern_normal(i32 %idx) { |
| ; CHECK-LABEL: module_1_kernel_normal_indirect_extern_normal: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_add_u32 s12, s12, s17 |
| ; CHECK-NEXT: s_mov_b32 s32, 0 |
| ; CHECK-NEXT: s_addc_u32 s13, s13, 0 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 |
| ; CHECK-NEXT: s_add_u32 s0, s0, s17 |
| ; CHECK-NEXT: s_addc_u32 s1, s1, 0 |
| ; CHECK-NEXT: s_add_u32 s8, s8, 8 |
| ; CHECK-NEXT: s_addc_u32 s9, s9, 0 |
| ; CHECK-NEXT: s_mov_b32 s13, s15 |
| ; CHECK-NEXT: s_mov_b32 s12, s14 |
| ; CHECK-NEXT: s_getpc_b64 s[14:15] |
| ; CHECK-NEXT: s_add_u32 s14, s14, use_module@gotpcrel32@lo+4 |
| ; CHECK-NEXT: s_addc_u32 s15, s15, use_module@gotpcrel32@hi+12 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
| ; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; CHECK-NEXT: s_mov_b32 s14, s16 |
| ; CHECK-NEXT: s_mov_b32 s15, 4 |
| ; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] |
| ; CHECK-NEXT: s_getpc_b64 s[14:15] |
| ; CHECK-NEXT: s_add_u32 s14, s14, use_extern_normal@gotpcrel32@lo+4 |
| ; CHECK-NEXT: s_addc_u32 s15, s15, use_extern_normal@gotpcrel32@hi+12 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 1 |
| ; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 2 |
| ; CHECK-NEXT: s_mov_b32 s14, s16 |
| ; CHECK-NEXT: s_mov_b32 s15, 4 |
| ; CHECK-NEXT: ds_write_b16 v1, v0 |
| ; CHECK-NEXT: ds_write_b16 v1, v2 offset:2 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] |
| ; CHECK-NEXT: s_endpgm |
| call void @use_module() |
| store i16 1, ptr addrspace(3) @module_variable |
| |
| store i16 2, ptr addrspace(3) @kernel_normal |
| |
| call void @use_extern_normal() |
| ret void |
| } |
| |
| define amdgpu_kernel void @module_0_kernel_overalign_indirect_extern_normal(i32 %idx) { |
| ; CHECK-LABEL: module_0_kernel_overalign_indirect_extern_normal: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_add_u32 s12, s12, s17 |
| ; CHECK-NEXT: s_mov_b32 s32, 0 |
| ; CHECK-NEXT: s_addc_u32 s13, s13, 0 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 |
| ; CHECK-NEXT: s_add_u32 s0, s0, s17 |
| ; CHECK-NEXT: s_addc_u32 s1, s1, 0 |
| ; CHECK-NEXT: s_add_u32 s8, s8, 8 |
| ; CHECK-NEXT: s_addc_u32 s9, s9, 0 |
| ; CHECK-NEXT: s_mov_b32 s13, s15 |
| ; CHECK-NEXT: s_mov_b32 s12, s14 |
| ; CHECK-NEXT: s_getpc_b64 s[14:15] |
| ; CHECK-NEXT: s_add_u32 s14, s14, use_extern_normal@gotpcrel32@lo+4 |
| ; CHECK-NEXT: s_addc_u32 s15, s15, use_extern_normal@gotpcrel32@hi+12 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
| ; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; CHECK-NEXT: v_mov_b32_e32 v3, 2 |
| ; CHECK-NEXT: v_mov_b32_e32 v4, 0 |
| ; CHECK-NEXT: s_mov_b32 s14, s16 |
| ; CHECK-NEXT: s_mov_b32 s15, 2 |
| ; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 |
| ; CHECK-NEXT: ds_write_b16 v4, v3 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] |
| ; CHECK-NEXT: s_endpgm |
| store i16 2, ptr addrspace(3) @kernel_overalign |
| |
| call void @use_extern_normal() |
| ret void |
| } |
| |
| define amdgpu_kernel void @module_1_kernel_overalign_indirect_extern_normal(i32 %idx) { |
| ; CHECK-LABEL: module_1_kernel_overalign_indirect_extern_normal: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_add_u32 s12, s12, s17 |
| ; CHECK-NEXT: s_mov_b32 s32, 0 |
| ; CHECK-NEXT: s_addc_u32 s13, s13, 0 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 |
| ; CHECK-NEXT: s_add_u32 s0, s0, s17 |
| ; CHECK-NEXT: s_addc_u32 s1, s1, 0 |
| ; CHECK-NEXT: s_add_u32 s8, s8, 8 |
| ; CHECK-NEXT: s_addc_u32 s9, s9, 0 |
| ; CHECK-NEXT: s_mov_b32 s13, s15 |
| ; CHECK-NEXT: s_mov_b32 s12, s14 |
| ; CHECK-NEXT: s_getpc_b64 s[14:15] |
| ; CHECK-NEXT: s_add_u32 s14, s14, use_module@gotpcrel32@lo+4 |
| ; CHECK-NEXT: s_addc_u32 s15, s15, use_module@gotpcrel32@hi+12 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
| ; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; CHECK-NEXT: s_mov_b32 s14, s16 |
| ; CHECK-NEXT: s_mov_b32 s15, 6 |
| ; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] |
| ; CHECK-NEXT: s_getpc_b64 s[14:15] |
| ; CHECK-NEXT: s_add_u32 s14, s14, use_extern_normal@gotpcrel32@lo+4 |
| ; CHECK-NEXT: s_addc_u32 s15, s15, use_extern_normal@gotpcrel32@hi+12 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 1 |
| ; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 2 |
| ; CHECK-NEXT: s_mov_b32 s14, s16 |
| ; CHECK-NEXT: s_mov_b32 s15, 6 |
| ; CHECK-NEXT: ds_write_b16 v1, v0 |
| ; CHECK-NEXT: ds_write_b16 v1, v2 offset:4 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] |
| ; CHECK-NEXT: s_endpgm |
| call void @use_module() |
| store i16 1, ptr addrspace(3) @module_variable |
| |
| store i16 2, ptr addrspace(3) @kernel_overalign |
| |
| call void @use_extern_normal() |
| ret void |
| } |
| |
| define amdgpu_kernel void @module_0_kernel_normal_indirect_extern_overalign(i32 %idx) { |
| ; CHECK-LABEL: module_0_kernel_normal_indirect_extern_overalign: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_add_u32 s12, s12, s17 |
| ; CHECK-NEXT: s_mov_b32 s32, 0 |
| ; CHECK-NEXT: s_addc_u32 s13, s13, 0 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 |
| ; CHECK-NEXT: s_add_u32 s0, s0, s17 |
| ; CHECK-NEXT: s_addc_u32 s1, s1, 0 |
| ; CHECK-NEXT: s_add_u32 s8, s8, 8 |
| ; CHECK-NEXT: s_addc_u32 s9, s9, 0 |
| ; CHECK-NEXT: s_mov_b32 s13, s15 |
| ; CHECK-NEXT: s_mov_b32 s12, s14 |
| ; CHECK-NEXT: s_getpc_b64 s[14:15] |
| ; CHECK-NEXT: s_add_u32 s14, s14, use_extern_overalign@gotpcrel32@lo+4 |
| ; CHECK-NEXT: s_addc_u32 s15, s15, use_extern_overalign@gotpcrel32@hi+12 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
| ; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; CHECK-NEXT: v_mov_b32_e32 v3, 2 |
| ; CHECK-NEXT: v_mov_b32_e32 v4, 0 |
| ; CHECK-NEXT: s_mov_b32 s14, s16 |
| ; CHECK-NEXT: s_mov_b32 s15, 1 |
| ; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 |
| ; CHECK-NEXT: ds_write_b16 v4, v3 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] |
| ; CHECK-NEXT: s_endpgm |
| store i16 2, ptr addrspace(3) @kernel_normal |
| |
| call void @use_extern_overalign() |
| ret void |
| } |
| |
| define amdgpu_kernel void @module_1_kernel_normal_indirect_extern_overalign(i32 %idx) { |
| ; CHECK-LABEL: module_1_kernel_normal_indirect_extern_overalign: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_add_u32 s12, s12, s17 |
| ; CHECK-NEXT: s_mov_b32 s32, 0 |
| ; CHECK-NEXT: s_addc_u32 s13, s13, 0 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 |
| ; CHECK-NEXT: s_add_u32 s0, s0, s17 |
| ; CHECK-NEXT: s_addc_u32 s1, s1, 0 |
| ; CHECK-NEXT: s_add_u32 s8, s8, 8 |
| ; CHECK-NEXT: s_addc_u32 s9, s9, 0 |
| ; CHECK-NEXT: s_mov_b32 s13, s15 |
| ; CHECK-NEXT: s_mov_b32 s12, s14 |
| ; CHECK-NEXT: s_getpc_b64 s[14:15] |
| ; CHECK-NEXT: s_add_u32 s14, s14, use_module@gotpcrel32@lo+4 |
| ; CHECK-NEXT: s_addc_u32 s15, s15, use_module@gotpcrel32@hi+12 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
| ; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; CHECK-NEXT: s_mov_b32 s14, s16 |
| ; CHECK-NEXT: s_mov_b32 s15, 5 |
| ; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] |
| ; CHECK-NEXT: s_getpc_b64 s[14:15] |
| ; CHECK-NEXT: s_add_u32 s14, s14, use_extern_overalign@gotpcrel32@lo+4 |
| ; CHECK-NEXT: s_addc_u32 s15, s15, use_extern_overalign@gotpcrel32@hi+12 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 1 |
| ; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 2 |
| ; CHECK-NEXT: s_mov_b32 s14, s16 |
| ; CHECK-NEXT: s_mov_b32 s15, 5 |
| ; CHECK-NEXT: ds_write_b16 v1, v0 |
| ; CHECK-NEXT: ds_write_b16 v1, v2 offset:2 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] |
| ; CHECK-NEXT: s_endpgm |
| call void @use_module() |
| store i16 1, ptr addrspace(3) @module_variable |
| |
| store i16 2, ptr addrspace(3) @kernel_normal |
| |
| call void @use_extern_overalign() |
| ret void |
| } |
| |
| define amdgpu_kernel void @module_0_kernel_overalign_indirect_extern_overalign(i32 %idx) { |
| ; CHECK-LABEL: module_0_kernel_overalign_indirect_extern_overalign: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_add_u32 s12, s12, s17 |
| ; CHECK-NEXT: s_mov_b32 s32, 0 |
| ; CHECK-NEXT: s_addc_u32 s13, s13, 0 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 |
| ; CHECK-NEXT: s_add_u32 s0, s0, s17 |
| ; CHECK-NEXT: s_addc_u32 s1, s1, 0 |
| ; CHECK-NEXT: s_add_u32 s8, s8, 8 |
| ; CHECK-NEXT: s_addc_u32 s9, s9, 0 |
| ; CHECK-NEXT: s_mov_b32 s13, s15 |
| ; CHECK-NEXT: s_mov_b32 s12, s14 |
| ; CHECK-NEXT: s_getpc_b64 s[14:15] |
| ; CHECK-NEXT: s_add_u32 s14, s14, use_extern_overalign@gotpcrel32@lo+4 |
| ; CHECK-NEXT: s_addc_u32 s15, s15, use_extern_overalign@gotpcrel32@hi+12 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
| ; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; CHECK-NEXT: v_mov_b32_e32 v3, 2 |
| ; CHECK-NEXT: v_mov_b32_e32 v4, 0 |
| ; CHECK-NEXT: s_mov_b32 s14, s16 |
| ; CHECK-NEXT: s_mov_b32 s15, 3 |
| ; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 |
| ; CHECK-NEXT: ds_write_b16 v4, v3 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] |
| ; CHECK-NEXT: s_endpgm |
| store i16 2, ptr addrspace(3) @kernel_overalign |
| |
| call void @use_extern_overalign() |
| ret void |
| } |
| |
| define amdgpu_kernel void @module_1_kernel_overalign_indirect_extern_overalign(i32 %idx) { |
| ; CHECK-LABEL: module_1_kernel_overalign_indirect_extern_overalign: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_add_u32 s12, s12, s17 |
| ; CHECK-NEXT: s_mov_b32 s32, 0 |
| ; CHECK-NEXT: s_addc_u32 s13, s13, 0 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 |
| ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 |
| ; CHECK-NEXT: s_add_u32 s0, s0, s17 |
| ; CHECK-NEXT: s_addc_u32 s1, s1, 0 |
| ; CHECK-NEXT: s_add_u32 s8, s8, 8 |
| ; CHECK-NEXT: s_addc_u32 s9, s9, 0 |
| ; CHECK-NEXT: s_mov_b32 s13, s15 |
| ; CHECK-NEXT: s_mov_b32 s12, s14 |
| ; CHECK-NEXT: s_getpc_b64 s[14:15] |
| ; CHECK-NEXT: s_add_u32 s14, s14, use_module@gotpcrel32@lo+4 |
| ; CHECK-NEXT: s_addc_u32 s15, s15, use_module@gotpcrel32@hi+12 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
| ; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; CHECK-NEXT: s_mov_b32 s14, s16 |
| ; CHECK-NEXT: s_mov_b32 s15, 7 |
| ; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] |
| ; CHECK-NEXT: s_getpc_b64 s[14:15] |
| ; CHECK-NEXT: s_add_u32 s14, s14, use_extern_overalign@gotpcrel32@lo+4 |
| ; CHECK-NEXT: s_addc_u32 s15, s15, use_extern_overalign@gotpcrel32@hi+12 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 1 |
| ; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, 0 |
| ; CHECK-NEXT: v_mov_b32_e32 v2, 2 |
| ; CHECK-NEXT: s_mov_b32 s14, s16 |
| ; CHECK-NEXT: s_mov_b32 s15, 7 |
| ; CHECK-NEXT: ds_write_b16 v1, v0 |
| ; CHECK-NEXT: ds_write_b16 v1, v2 offset:4 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] |
| ; CHECK-NEXT: s_endpgm |
| call void @use_module() |
| store i16 1, ptr addrspace(3) @module_variable |
| |
| store i16 2, ptr addrspace(3) @kernel_overalign |
| |
| call void @use_extern_overalign() |
| ret void |
| } |
| |
| |
| attributes #0 = { noinline } |
| |
| !llvm.module.flags = !{!0} |
| !0 = !{i32 1, !"amdhsa_code_object_version", i32 500} |