| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt -passes=amdgpu-attributor -mcpu=kaveri < %s | llc -mcpu=gfx90a -enable-ipra=0 | FileCheck -enable-var-scope -check-prefixes=GCN,GFX7 %s |
| ; RUN: opt -passes=amdgpu-attributor -mcpu=gfx90a -mattr=-xnack < %s | llc -mcpu=gfx90a -mattr=-xnack -enable-ipra=0 | FileCheck -enable-var-scope -check-prefixes=GCN,GFX90A %s |
| |
| target triple = "amdgcn-amd-amdhsa" |
| |
| define void @use_workitem_id_x() #1 { |
| ; GFX7-LABEL: use_workitem_id_x: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_and_b32_e32 v0, 0x3ff, v31 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX90A-LABEL: use_workitem_id_x: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX90A-NEXT: v_and_b32_e32 v0, 0x3ff, v31 |
| ; GFX90A-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: s_setpc_b64 s[30:31] |
| %val = call i32 @llvm.amdgcn.workitem.id.x() |
| store volatile i32 %val, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @use_workitem_id_y() #1 { |
| ; GFX7-LABEL: use_workitem_id_y: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_bfe_u32 v0, v31, 10, 10 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX90A-LABEL: use_workitem_id_y: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX90A-NEXT: v_bfe_u32 v0, v31, 10, 10 |
| ; GFX90A-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: s_setpc_b64 s[30:31] |
| %val = call i32 @llvm.amdgcn.workitem.id.y() |
| store volatile i32 %val, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @use_workitem_id_z() #1 { |
| ; GFX7-LABEL: use_workitem_id_z: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_bfe_u32 v0, v31, 20, 10 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX90A-LABEL: use_workitem_id_z: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX90A-NEXT: v_bfe_u32 v0, v31, 20, 10 |
| ; GFX90A-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: s_setpc_b64 s[30:31] |
| %val = call i32 @llvm.amdgcn.workitem.id.z() |
| store volatile i32 %val, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @use_workitem_id_xy() #1 { |
| ; GFX7-LABEL: use_workitem_id_xy: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_and_b32_e32 v0, 0x3ff, v31 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: v_bfe_u32 v0, v31, 10, 10 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX90A-LABEL: use_workitem_id_xy: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX90A-NEXT: v_and_b32_e32 v0, 0x3ff, v31 |
| ; GFX90A-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: v_bfe_u32 v0, v31, 10, 10 |
| ; GFX90A-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: s_setpc_b64 s[30:31] |
| %val0 = call i32 @llvm.amdgcn.workitem.id.x() |
| %val1 = call i32 @llvm.amdgcn.workitem.id.y() |
| store volatile i32 %val0, ptr addrspace(1) poison |
| store volatile i32 %val1, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @use_workitem_id_xyz() #1 { |
| ; GFX7-LABEL: use_workitem_id_xyz: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_and_b32_e32 v0, 0x3ff, v31 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: v_bfe_u32 v0, v31, 10, 10 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: v_bfe_u32 v0, v31, 20, 10 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX90A-LABEL: use_workitem_id_xyz: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX90A-NEXT: v_and_b32_e32 v0, 0x3ff, v31 |
| ; GFX90A-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: v_bfe_u32 v0, v31, 10, 10 |
| ; GFX90A-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: v_bfe_u32 v0, v31, 20, 10 |
| ; GFX90A-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: s_setpc_b64 s[30:31] |
| %val0 = call i32 @llvm.amdgcn.workitem.id.x() |
| %val1 = call i32 @llvm.amdgcn.workitem.id.y() |
| %val2 = call i32 @llvm.amdgcn.workitem.id.z() |
| store volatile i32 %val0, ptr addrspace(1) poison |
| store volatile i32 %val1, ptr addrspace(1) poison |
| store volatile i32 %val2, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @use_workitem_id_xz() #1 { |
| ; GFX7-LABEL: use_workitem_id_xz: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_and_b32_e32 v0, 0x3ff, v31 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: v_bfe_u32 v0, v31, 20, 10 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX90A-LABEL: use_workitem_id_xz: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX90A-NEXT: v_and_b32_e32 v0, 0x3ff, v31 |
| ; GFX90A-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: v_bfe_u32 v0, v31, 20, 10 |
| ; GFX90A-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: s_setpc_b64 s[30:31] |
| %val0 = call i32 @llvm.amdgcn.workitem.id.x() |
| %val1 = call i32 @llvm.amdgcn.workitem.id.z() |
| store volatile i32 %val0, ptr addrspace(1) poison |
| store volatile i32 %val1, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @use_workitem_id_yz() #1 { |
| ; GFX7-LABEL: use_workitem_id_yz: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_bfe_u32 v0, v31, 10, 10 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: v_bfe_u32 v0, v31, 20, 10 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX90A-LABEL: use_workitem_id_yz: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX90A-NEXT: v_bfe_u32 v0, v31, 10, 10 |
| ; GFX90A-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: v_bfe_u32 v0, v31, 20, 10 |
| ; GFX90A-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: s_setpc_b64 s[30:31] |
| %val0 = call i32 @llvm.amdgcn.workitem.id.y() |
| %val1 = call i32 @llvm.amdgcn.workitem.id.z() |
| store volatile i32 %val0, ptr addrspace(1) poison |
| store volatile i32 %val1, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 { |
| ; GCN-LABEL: kern_indirect_use_workitem_id_x: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_add_u32 s0, s0, s5 |
| ; GCN-NEXT: s_addc_u32 s1, s1, 0 |
| ; GCN-NEXT: s_getpc_b64 s[4:5] |
| ; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_x@gotpcrel32@lo+4 |
| ; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_x@gotpcrel32@hi+12 |
| ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GCN-NEXT: v_mov_b32_e32 v31, v0 |
| ; GCN-NEXT: s_mov_b32 s32, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GCN-NEXT: s_endpgm |
| call void @use_workitem_id_x() |
| ret void |
| } |
| ; GCN: .amdhsa_system_vgpr_workitem_id 0 |
| |
| define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 { |
| ; GFX7-LABEL: kern_indirect_use_workitem_id_y: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_add_u32 s0, s0, s5 |
| ; GFX7-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX7-NEXT: s_getpc_b64 s[4:5] |
| ; GFX7-NEXT: s_add_u32 s4, s4, use_workitem_id_y@gotpcrel32@lo+4 |
| ; GFX7-NEXT: s_addc_u32 s5, s5, use_workitem_id_y@gotpcrel32@hi+12 |
| ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX7-NEXT: v_lshlrev_b32_e32 v31, 10, v1 |
| ; GFX7-NEXT: s_mov_b32 s32, 0 |
| ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GFX7-NEXT: s_endpgm |
| ; |
| ; GFX90A-LABEL: kern_indirect_use_workitem_id_y: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_add_u32 s0, s0, s5 |
| ; GFX90A-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX90A-NEXT: s_getpc_b64 s[4:5] |
| ; GFX90A-NEXT: s_add_u32 s4, s4, use_workitem_id_y@gotpcrel32@lo+4 |
| ; GFX90A-NEXT: s_addc_u32 s5, s5, use_workitem_id_y@gotpcrel32@hi+12 |
| ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 |
| ; GFX90A-NEXT: s_mov_b32 s32, 0 |
| ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GFX90A-NEXT: s_endpgm |
| call void @use_workitem_id_y() |
| ret void |
| } |
| ; GCN: .amdhsa_system_vgpr_workitem_id 1 |
| |
| define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 { |
| ; GFX7-LABEL: kern_indirect_use_workitem_id_z: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_add_u32 s0, s0, s5 |
| ; GFX7-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX7-NEXT: s_getpc_b64 s[4:5] |
| ; GFX7-NEXT: s_add_u32 s4, s4, use_workitem_id_z@gotpcrel32@lo+4 |
| ; GFX7-NEXT: s_addc_u32 s5, s5, use_workitem_id_z@gotpcrel32@hi+12 |
| ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX7-NEXT: v_lshlrev_b32_e32 v31, 20, v2 |
| ; GFX7-NEXT: s_mov_b32 s32, 0 |
| ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GFX7-NEXT: s_endpgm |
| ; |
| ; GFX90A-LABEL: kern_indirect_use_workitem_id_z: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_add_u32 s0, s0, s5 |
| ; GFX90A-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX90A-NEXT: s_getpc_b64 s[4:5] |
| ; GFX90A-NEXT: s_add_u32 s4, s4, use_workitem_id_z@gotpcrel32@lo+4 |
| ; GFX90A-NEXT: s_addc_u32 s5, s5, use_workitem_id_z@gotpcrel32@hi+12 |
| ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 |
| ; GFX90A-NEXT: s_mov_b32 s32, 0 |
| ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GFX90A-NEXT: s_endpgm |
| call void @use_workitem_id_z() |
| ret void |
| } |
| ; GCN: .amdhsa_system_vgpr_workitem_id 2 |
| |
| define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 { |
| ; GFX7-LABEL: kern_indirect_use_workitem_id_xy: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_add_u32 s0, s0, s5 |
| ; GFX7-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX7-NEXT: s_getpc_b64 s[4:5] |
| ; GFX7-NEXT: s_add_u32 s4, s4, use_workitem_id_xy@gotpcrel32@lo+4 |
| ; GFX7-NEXT: s_addc_u32 s5, s5, use_workitem_id_xy@gotpcrel32@hi+12 |
| ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; GFX7-NEXT: v_or_b32_e32 v31, v0, v1 |
| ; GFX7-NEXT: s_mov_b32 s32, 0 |
| ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GFX7-NEXT: s_endpgm |
| ; |
| ; GFX90A-LABEL: kern_indirect_use_workitem_id_xy: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_add_u32 s0, s0, s5 |
| ; GFX90A-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX90A-NEXT: s_getpc_b64 s[4:5] |
| ; GFX90A-NEXT: s_add_u32 s4, s4, use_workitem_id_xy@gotpcrel32@lo+4 |
| ; GFX90A-NEXT: s_addc_u32 s5, s5, use_workitem_id_xy@gotpcrel32@hi+12 |
| ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 |
| ; GFX90A-NEXT: s_mov_b32 s32, 0 |
| ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GFX90A-NEXT: s_endpgm |
| call void @use_workitem_id_xy() |
| ret void |
| } |
| |
| define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 { |
| ; GFX7-LABEL: kern_indirect_use_workitem_id_xz: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_add_u32 s0, s0, s5 |
| ; GFX7-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX7-NEXT: s_getpc_b64 s[4:5] |
| ; GFX7-NEXT: s_add_u32 s4, s4, use_workitem_id_xz@gotpcrel32@lo+4 |
| ; GFX7-NEXT: s_addc_u32 s5, s5, use_workitem_id_xz@gotpcrel32@hi+12 |
| ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 20, v2 |
| ; GFX7-NEXT: v_or_b32_e32 v31, v0, v1 |
| ; GFX7-NEXT: s_mov_b32 s32, 0 |
| ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GFX7-NEXT: s_endpgm |
| ; |
| ; GFX90A-LABEL: kern_indirect_use_workitem_id_xz: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_add_u32 s0, s0, s5 |
| ; GFX90A-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX90A-NEXT: s_getpc_b64 s[4:5] |
| ; GFX90A-NEXT: s_add_u32 s4, s4, use_workitem_id_xz@gotpcrel32@lo+4 |
| ; GFX90A-NEXT: s_addc_u32 s5, s5, use_workitem_id_xz@gotpcrel32@hi+12 |
| ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 |
| ; GFX90A-NEXT: s_mov_b32 s32, 0 |
| ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GFX90A-NEXT: s_endpgm |
| call void @use_workitem_id_xz() |
| ret void |
| } |
| |
| define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() #1 { |
| ; GFX7-LABEL: kern_indirect_use_workitem_id_yz: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_add_u32 s0, s0, s5 |
| ; GFX7-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX7-NEXT: s_getpc_b64 s[4:5] |
| ; GFX7-NEXT: s_add_u32 s4, s4, use_workitem_id_yz@gotpcrel32@lo+4 |
| ; GFX7-NEXT: s_addc_u32 s5, s5, use_workitem_id_yz@gotpcrel32@hi+12 |
| ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 20, v2 |
| ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; GFX7-NEXT: v_or_b32_e32 v31, v1, v0 |
| ; GFX7-NEXT: s_mov_b32 s32, 0 |
| ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GFX7-NEXT: s_endpgm |
| ; |
| ; GFX90A-LABEL: kern_indirect_use_workitem_id_yz: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_add_u32 s0, s0, s5 |
| ; GFX90A-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX90A-NEXT: s_getpc_b64 s[4:5] |
| ; GFX90A-NEXT: s_add_u32 s4, s4, use_workitem_id_yz@gotpcrel32@lo+4 |
| ; GFX90A-NEXT: s_addc_u32 s5, s5, use_workitem_id_yz@gotpcrel32@hi+12 |
| ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 |
| ; GFX90A-NEXT: s_mov_b32 s32, 0 |
| ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GFX90A-NEXT: s_endpgm |
| call void @use_workitem_id_yz() |
| ret void |
| } |
| |
| define amdgpu_kernel void @kern_indirect_use_workitem_id_xyz() #1 { |
| ; GFX7-LABEL: kern_indirect_use_workitem_id_xyz: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_add_u32 s0, s0, s5 |
| ; GFX7-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX7-NEXT: s_getpc_b64 s[4:5] |
| ; GFX7-NEXT: s_add_u32 s4, s4, use_workitem_id_xyz@gotpcrel32@lo+4 |
| ; GFX7-NEXT: s_addc_u32 s5, s5, use_workitem_id_xyz@gotpcrel32@hi+12 |
| ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
| ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX7-NEXT: v_or_b32_e32 v31, v0, v2 |
| ; GFX7-NEXT: s_mov_b32 s32, 0 |
| ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GFX7-NEXT: s_endpgm |
| ; |
| ; GFX90A-LABEL: kern_indirect_use_workitem_id_xyz: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_add_u32 s0, s0, s5 |
| ; GFX90A-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX90A-NEXT: s_getpc_b64 s[4:5] |
| ; GFX90A-NEXT: s_add_u32 s4, s4, use_workitem_id_xyz@gotpcrel32@lo+4 |
| ; GFX90A-NEXT: s_addc_u32 s5, s5, use_workitem_id_xyz@gotpcrel32@hi+12 |
| ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 |
| ; GFX90A-NEXT: s_mov_b32 s32, 0 |
| ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GFX90A-NEXT: s_endpgm |
| call void @use_workitem_id_xyz() |
| ret void |
| } |
| |
| define void @func_indirect_use_workitem_id_x() #1 { |
| ; GCN-LABEL: func_indirect_use_workitem_id_x: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b32 s4, s33 |
| ; GCN-NEXT: s_mov_b32 s33, s32 |
| ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 |
| ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill |
| ; GCN-NEXT: s_mov_b64 exec, s[6:7] |
| ; GCN-NEXT: s_addk_i32 s32, 0x400 |
| ; GCN-NEXT: v_writelane_b32 v40, s4, 2 |
| ; GCN-NEXT: s_getpc_b64 s[4:5] |
| ; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_x@gotpcrel32@lo+4 |
| ; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_x@gotpcrel32@hi+12 |
| ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GCN-NEXT: v_writelane_b32 v40, s30, 0 |
| ; GCN-NEXT: v_writelane_b32 v40, s31, 1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GCN-NEXT: v_readlane_b32 s31, v40, 1 |
| ; GCN-NEXT: v_readlane_b32 s30, v40, 0 |
| ; GCN-NEXT: s_mov_b32 s32, s33 |
| ; GCN-NEXT: v_readlane_b32 s4, v40, 2 |
| ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 |
| ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload |
| ; GCN-NEXT: s_mov_b64 exec, s[6:7] |
| ; GCN-NEXT: s_mov_b32 s33, s4 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| call void @use_workitem_id_x() |
| ret void |
| } |
| |
| define void @func_indirect_use_workitem_id_y() #1 { |
| ; GCN-LABEL: func_indirect_use_workitem_id_y: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b32 s4, s33 |
| ; GCN-NEXT: s_mov_b32 s33, s32 |
| ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 |
| ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill |
| ; GCN-NEXT: s_mov_b64 exec, s[6:7] |
| ; GCN-NEXT: s_addk_i32 s32, 0x400 |
| ; GCN-NEXT: v_writelane_b32 v40, s4, 2 |
| ; GCN-NEXT: s_getpc_b64 s[4:5] |
| ; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_y@gotpcrel32@lo+4 |
| ; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_y@gotpcrel32@hi+12 |
| ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GCN-NEXT: v_writelane_b32 v40, s30, 0 |
| ; GCN-NEXT: v_writelane_b32 v40, s31, 1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GCN-NEXT: v_readlane_b32 s31, v40, 1 |
| ; GCN-NEXT: v_readlane_b32 s30, v40, 0 |
| ; GCN-NEXT: s_mov_b32 s32, s33 |
| ; GCN-NEXT: v_readlane_b32 s4, v40, 2 |
| ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 |
| ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload |
| ; GCN-NEXT: s_mov_b64 exec, s[6:7] |
| ; GCN-NEXT: s_mov_b32 s33, s4 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| call void @use_workitem_id_y() |
| ret void |
| } |
| |
| define void @func_indirect_use_workitem_id_z() #1 { |
| ; GCN-LABEL: func_indirect_use_workitem_id_z: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b32 s4, s33 |
| ; GCN-NEXT: s_mov_b32 s33, s32 |
| ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 |
| ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill |
| ; GCN-NEXT: s_mov_b64 exec, s[6:7] |
| ; GCN-NEXT: s_addk_i32 s32, 0x400 |
| ; GCN-NEXT: v_writelane_b32 v40, s4, 2 |
| ; GCN-NEXT: s_getpc_b64 s[4:5] |
| ; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_z@gotpcrel32@lo+4 |
| ; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_z@gotpcrel32@hi+12 |
| ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GCN-NEXT: v_writelane_b32 v40, s30, 0 |
| ; GCN-NEXT: v_writelane_b32 v40, s31, 1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GCN-NEXT: v_readlane_b32 s31, v40, 1 |
| ; GCN-NEXT: v_readlane_b32 s30, v40, 0 |
| ; GCN-NEXT: s_mov_b32 s32, s33 |
| ; GCN-NEXT: v_readlane_b32 s4, v40, 2 |
| ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 |
| ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload |
| ; GCN-NEXT: s_mov_b64 exec, s[6:7] |
| ; GCN-NEXT: s_mov_b32 s33, s4 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| call void @use_workitem_id_z() |
| ret void |
| } |
| |
| define void @other_arg_use_workitem_id_x(i32 %arg0) #1 { |
| ; GFX7-LABEL: other_arg_use_workitem_id_x: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: v_and_b32_e32 v0, 0x3ff, v31 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX90A-LABEL: other_arg_use_workitem_id_x: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: v_and_b32_e32 v0, 0x3ff, v31 |
| ; GFX90A-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: s_setpc_b64 s[30:31] |
| %val = call i32 @llvm.amdgcn.workitem.id.x() |
| store volatile i32 %arg0, ptr addrspace(1) poison |
| store volatile i32 %val, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @other_arg_use_workitem_id_y(i32 %arg0) #1 { |
| ; GFX7-LABEL: other_arg_use_workitem_id_y: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: v_bfe_u32 v0, v31, 10, 10 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX90A-LABEL: other_arg_use_workitem_id_y: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: v_bfe_u32 v0, v31, 10, 10 |
| ; GFX90A-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: s_setpc_b64 s[30:31] |
| %val = call i32 @llvm.amdgcn.workitem.id.y() |
| store volatile i32 %arg0, ptr addrspace(1) poison |
| store volatile i32 %val, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @other_arg_use_workitem_id_z(i32 %arg0) #1 { |
| ; GFX7-LABEL: other_arg_use_workitem_id_z: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: v_bfe_u32 v0, v31, 20, 10 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX90A-LABEL: other_arg_use_workitem_id_z: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: v_bfe_u32 v0, v31, 20, 10 |
| ; GFX90A-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: s_setpc_b64 s[30:31] |
| %val = call i32 @llvm.amdgcn.workitem.id.z() |
| store volatile i32 %arg0, ptr addrspace(1) poison |
| store volatile i32 %val, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 { |
| ; GCN-LABEL: kern_indirect_other_arg_use_workitem_id_x: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_add_u32 s0, s0, s5 |
| ; GCN-NEXT: s_addc_u32 s1, s1, 0 |
| ; GCN-NEXT: s_getpc_b64 s[4:5] |
| ; GCN-NEXT: s_add_u32 s4, s4, other_arg_use_workitem_id_x@gotpcrel32@lo+4 |
| ; GCN-NEXT: s_addc_u32 s5, s5, other_arg_use_workitem_id_x@gotpcrel32@hi+12 |
| ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GCN-NEXT: v_mov_b32_e32 v31, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v0, 0x22b |
| ; GCN-NEXT: s_mov_b32 s32, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GCN-NEXT: s_endpgm |
| call void @other_arg_use_workitem_id_x(i32 555) |
| ret void |
| } |
| ; GCN: .amdhsa_system_vgpr_workitem_id 0 |
| |
| define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 { |
| ; GFX7-LABEL: kern_indirect_other_arg_use_workitem_id_y: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_add_u32 s0, s0, s5 |
| ; GFX7-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX7-NEXT: s_getpc_b64 s[4:5] |
| ; GFX7-NEXT: s_add_u32 s4, s4, other_arg_use_workitem_id_y@gotpcrel32@lo+4 |
| ; GFX7-NEXT: s_addc_u32 s5, s5, other_arg_use_workitem_id_y@gotpcrel32@hi+12 |
| ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX7-NEXT: v_lshlrev_b32_e32 v31, 10, v1 |
| ; GFX7-NEXT: v_mov_b32_e32 v0, 0x22b |
| ; GFX7-NEXT: s_mov_b32 s32, 0 |
| ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GFX7-NEXT: s_endpgm |
| ; |
| ; GFX90A-LABEL: kern_indirect_other_arg_use_workitem_id_y: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_add_u32 s0, s0, s5 |
| ; GFX90A-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX90A-NEXT: s_getpc_b64 s[4:5] |
| ; GFX90A-NEXT: s_add_u32 s4, s4, other_arg_use_workitem_id_y@gotpcrel32@lo+4 |
| ; GFX90A-NEXT: s_addc_u32 s5, s5, other_arg_use_workitem_id_y@gotpcrel32@hi+12 |
| ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v0, 0x22b |
| ; GFX90A-NEXT: s_mov_b32 s32, 0 |
| ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GFX90A-NEXT: s_endpgm |
| call void @other_arg_use_workitem_id_y(i32 555) |
| ret void |
| } |
| ; GCN: .amdhsa_system_vgpr_workitem_id 1 |
| |
| define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 { |
| ; GFX7-LABEL: kern_indirect_other_arg_use_workitem_id_z: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_add_u32 s0, s0, s5 |
| ; GFX7-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX7-NEXT: s_getpc_b64 s[4:5] |
| ; GFX7-NEXT: s_add_u32 s4, s4, other_arg_use_workitem_id_z@gotpcrel32@lo+4 |
| ; GFX7-NEXT: s_addc_u32 s5, s5, other_arg_use_workitem_id_z@gotpcrel32@hi+12 |
| ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX7-NEXT: v_lshlrev_b32_e32 v31, 20, v2 |
| ; GFX7-NEXT: v_mov_b32_e32 v0, 0x22b |
| ; GFX7-NEXT: s_mov_b32 s32, 0 |
| ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GFX7-NEXT: s_endpgm |
| ; |
| ; GFX90A-LABEL: kern_indirect_other_arg_use_workitem_id_z: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_add_u32 s0, s0, s5 |
| ; GFX90A-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX90A-NEXT: s_getpc_b64 s[4:5] |
| ; GFX90A-NEXT: s_add_u32 s4, s4, other_arg_use_workitem_id_z@gotpcrel32@lo+4 |
| ; GFX90A-NEXT: s_addc_u32 s5, s5, other_arg_use_workitem_id_z@gotpcrel32@hi+12 |
| ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v0, 0x22b |
| ; GFX90A-NEXT: s_mov_b32 s32, 0 |
| ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GFX90A-NEXT: s_endpgm |
| call void @other_arg_use_workitem_id_z(i32 555) |
| ret void |
| } |
| ; GCN: .amdhsa_system_vgpr_workitem_id 2 |
| |
| define void @too_many_args_use_workitem_id_x( |
| ; GFX7-LABEL: too_many_args_use_workitem_id_x: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_and_b32_e32 v31, 0x3ff, v31 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v31 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v1 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v2 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v3 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v4 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v5 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v6 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v7 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v8 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v9 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v10 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v11 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v12 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v13 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v14 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v15 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v16 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v17 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v18 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v19 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v20 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v21 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v22 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v23 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v24 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v25 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v26 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v27 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v28 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v29 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v30 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v31 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX90A-LABEL: too_many_args_use_workitem_id_x: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX90A-NEXT: buffer_load_dword v32, off, s[0:3], s32 |
| ; GFX90A-NEXT: v_and_b32_e32 v31, 0x3ff, v31 |
| ; GFX90A-NEXT: global_store_dword v[0:1], v31, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v1, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v2, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v3, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v4, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v5, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v6, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v7, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v8, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v9, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v10, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v11, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v12, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v13, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v14, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v15, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v16, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v17, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v18, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v19, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v20, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v21, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v22, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v23, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v24, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v25, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v26, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v27, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v28, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v29, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v30, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v32, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: s_setpc_b64 s[30:31] |
| i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, |
| i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15, |
| i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23, |
| i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 { |
| %val = call i32 @llvm.amdgcn.workitem.id.x() |
| store volatile i32 %val, ptr addrspace(1) poison |
| |
| store volatile i32 %arg0, ptr addrspace(1) poison |
| store volatile i32 %arg1, ptr addrspace(1) poison |
| store volatile i32 %arg2, ptr addrspace(1) poison |
| store volatile i32 %arg3, ptr addrspace(1) poison |
| store volatile i32 %arg4, ptr addrspace(1) poison |
| store volatile i32 %arg5, ptr addrspace(1) poison |
| store volatile i32 %arg6, ptr addrspace(1) poison |
| store volatile i32 %arg7, ptr addrspace(1) poison |
| |
| store volatile i32 %arg8, ptr addrspace(1) poison |
| store volatile i32 %arg9, ptr addrspace(1) poison |
| store volatile i32 %arg10, ptr addrspace(1) poison |
| store volatile i32 %arg11, ptr addrspace(1) poison |
| store volatile i32 %arg12, ptr addrspace(1) poison |
| store volatile i32 %arg13, ptr addrspace(1) poison |
| store volatile i32 %arg14, ptr addrspace(1) poison |
| store volatile i32 %arg15, ptr addrspace(1) poison |
| |
| store volatile i32 %arg16, ptr addrspace(1) poison |
| store volatile i32 %arg17, ptr addrspace(1) poison |
| store volatile i32 %arg18, ptr addrspace(1) poison |
| store volatile i32 %arg19, ptr addrspace(1) poison |
| store volatile i32 %arg20, ptr addrspace(1) poison |
| store volatile i32 %arg21, ptr addrspace(1) poison |
| store volatile i32 %arg22, ptr addrspace(1) poison |
| store volatile i32 %arg23, ptr addrspace(1) poison |
| |
| store volatile i32 %arg24, ptr addrspace(1) poison |
| store volatile i32 %arg25, ptr addrspace(1) poison |
| store volatile i32 %arg26, ptr addrspace(1) poison |
| store volatile i32 %arg27, ptr addrspace(1) poison |
| store volatile i32 %arg28, ptr addrspace(1) poison |
| store volatile i32 %arg29, ptr addrspace(1) poison |
| store volatile i32 %arg30, ptr addrspace(1) poison |
| store volatile i32 %arg31, ptr addrspace(1) poison |
| |
| ret void |
| } |
| |
| define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 { |
| ; GCN-LABEL: kern_call_too_many_args_use_workitem_id_x: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_add_u32 s0, s0, s5 |
| ; GCN-NEXT: s_addc_u32 s1, s1, 0 |
| ; GCN-NEXT: s_getpc_b64 s[4:5] |
| ; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x@gotpcrel32@lo+4 |
| ; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x@gotpcrel32@hi+12 |
| ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GCN-NEXT: s_mov_b32 s32, 0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0x140 |
| ; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 |
| ; GCN-NEXT: v_mov_b32_e32 v31, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v0, 10 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 20 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 30 |
| ; GCN-NEXT: v_mov_b32_e32 v3, 40 |
| ; GCN-NEXT: v_mov_b32_e32 v4, 50 |
| ; GCN-NEXT: v_mov_b32_e32 v5, 60 |
| ; GCN-NEXT: v_mov_b32_e32 v6, 0x46 |
| ; GCN-NEXT: v_mov_b32_e32 v7, 0x50 |
| ; GCN-NEXT: v_mov_b32_e32 v8, 0x5a |
| ; GCN-NEXT: v_mov_b32_e32 v9, 0x64 |
| ; GCN-NEXT: v_mov_b32_e32 v10, 0x6e |
| ; GCN-NEXT: v_mov_b32_e32 v11, 0x78 |
| ; GCN-NEXT: v_mov_b32_e32 v12, 0x82 |
| ; GCN-NEXT: v_mov_b32_e32 v13, 0x8c |
| ; GCN-NEXT: v_mov_b32_e32 v14, 0x96 |
| ; GCN-NEXT: v_mov_b32_e32 v15, 0xa0 |
| ; GCN-NEXT: v_mov_b32_e32 v16, 0xaa |
| ; GCN-NEXT: v_mov_b32_e32 v17, 0xb4 |
| ; GCN-NEXT: v_mov_b32_e32 v18, 0xbe |
| ; GCN-NEXT: v_mov_b32_e32 v19, 0xc8 |
| ; GCN-NEXT: v_mov_b32_e32 v20, 0xd2 |
| ; GCN-NEXT: v_mov_b32_e32 v21, 0xdc |
| ; GCN-NEXT: v_mov_b32_e32 v22, 0xe6 |
| ; GCN-NEXT: v_mov_b32_e32 v23, 0xf0 |
| ; GCN-NEXT: v_mov_b32_e32 v24, 0xfa |
| ; GCN-NEXT: v_mov_b32_e32 v25, 0x104 |
| ; GCN-NEXT: v_mov_b32_e32 v26, 0x10e |
| ; GCN-NEXT: v_mov_b32_e32 v27, 0x118 |
| ; GCN-NEXT: v_mov_b32_e32 v28, 0x122 |
| ; GCN-NEXT: v_mov_b32_e32 v29, 0x12c |
| ; GCN-NEXT: v_mov_b32_e32 v30, 0x136 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GCN-NEXT: s_endpgm |
| call void @too_many_args_use_workitem_id_x( |
| i32 10, i32 20, i32 30, i32 40, |
| i32 50, i32 60, i32 70, i32 80, |
| i32 90, i32 100, i32 110, i32 120, |
| i32 130, i32 140, i32 150, i32 160, |
| i32 170, i32 180, i32 190, i32 200, |
| i32 210, i32 220, i32 230, i32 240, |
| i32 250, i32 260, i32 270, i32 280, |
| i32 290, i32 300, i32 310, i32 320) |
| ret void |
| } |
| ; GCN: .amdhsa_system_vgpr_workitem_id 0 |
| |
| define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { |
| ; GFX7-LABEL: func_call_too_many_args_use_workitem_id_x: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: s_mov_b32 s4, s33 |
| ; GFX7-NEXT: s_mov_b32 s33, s32 |
| ; GFX7-NEXT: s_or_saveexec_b64 s[6:7], -1 |
| ; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill |
| ; GFX7-NEXT: s_mov_b64 exec, s[6:7] |
| ; GFX7-NEXT: s_addk_i32 s32, 0x400 |
| ; GFX7-NEXT: v_writelane_b32 v40, s4, 2 |
| ; GFX7-NEXT: s_getpc_b64 s[4:5] |
| ; GFX7-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x@gotpcrel32@lo+4 |
| ; GFX7-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x@gotpcrel32@hi+12 |
| ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: v_mov_b32_e32 v0, 0x140 |
| ; GFX7-NEXT: v_writelane_b32 v40, s30, 0 |
| ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s32 |
| ; GFX7-NEXT: v_mov_b32_e32 v0, 10 |
| ; GFX7-NEXT: v_mov_b32_e32 v1, 20 |
| ; GFX7-NEXT: v_mov_b32_e32 v2, 30 |
| ; GFX7-NEXT: v_mov_b32_e32 v3, 40 |
| ; GFX7-NEXT: v_mov_b32_e32 v4, 50 |
| ; GFX7-NEXT: v_mov_b32_e32 v5, 60 |
| ; GFX7-NEXT: v_mov_b32_e32 v6, 0x46 |
| ; GFX7-NEXT: v_mov_b32_e32 v7, 0x50 |
| ; GFX7-NEXT: v_mov_b32_e32 v8, 0x5a |
| ; GFX7-NEXT: v_mov_b32_e32 v9, 0x64 |
| ; GFX7-NEXT: v_mov_b32_e32 v10, 0x6e |
| ; GFX7-NEXT: v_mov_b32_e32 v11, 0x78 |
| ; GFX7-NEXT: v_mov_b32_e32 v12, 0x82 |
| ; GFX7-NEXT: v_mov_b32_e32 v13, 0x8c |
| ; GFX7-NEXT: v_mov_b32_e32 v14, 0x96 |
| ; GFX7-NEXT: v_mov_b32_e32 v15, 0xa0 |
| ; GFX7-NEXT: v_mov_b32_e32 v16, 0xaa |
| ; GFX7-NEXT: v_mov_b32_e32 v17, 0xb4 |
| ; GFX7-NEXT: v_mov_b32_e32 v18, 0xbe |
| ; GFX7-NEXT: v_mov_b32_e32 v19, 0xc8 |
| ; GFX7-NEXT: v_mov_b32_e32 v20, 0xd2 |
| ; GFX7-NEXT: v_mov_b32_e32 v21, 0xdc |
| ; GFX7-NEXT: v_mov_b32_e32 v22, 0xe6 |
| ; GFX7-NEXT: v_mov_b32_e32 v23, 0xf0 |
| ; GFX7-NEXT: v_mov_b32_e32 v24, 0xfa |
| ; GFX7-NEXT: v_mov_b32_e32 v25, 0x104 |
| ; GFX7-NEXT: v_mov_b32_e32 v26, 0x10e |
| ; GFX7-NEXT: v_mov_b32_e32 v27, 0x118 |
| ; GFX7-NEXT: v_mov_b32_e32 v28, 0x122 |
| ; GFX7-NEXT: v_mov_b32_e32 v29, 0x12c |
| ; GFX7-NEXT: v_mov_b32_e32 v30, 0x136 |
| ; GFX7-NEXT: v_writelane_b32 v40, s31, 1 |
| ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GFX7-NEXT: v_readlane_b32 s31, v40, 1 |
| ; GFX7-NEXT: v_readlane_b32 s30, v40, 0 |
| ; GFX7-NEXT: s_mov_b32 s32, s33 |
| ; GFX7-NEXT: v_readlane_b32 s4, v40, 2 |
| ; GFX7-NEXT: s_or_saveexec_b64 s[6:7], -1 |
| ; GFX7-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload |
| ; GFX7-NEXT: s_mov_b64 exec, s[6:7] |
| ; GFX7-NEXT: s_mov_b32 s33, s4 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX90A-LABEL: func_call_too_many_args_use_workitem_id_x: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX90A-NEXT: s_mov_b32 s4, s33 |
| ; GFX90A-NEXT: s_mov_b32 s33, s32 |
| ; GFX90A-NEXT: s_or_saveexec_b64 s[6:7], -1 |
| ; GFX90A-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill |
| ; GFX90A-NEXT: s_mov_b64 exec, s[6:7] |
| ; GFX90A-NEXT: s_addk_i32 s32, 0x400 |
| ; GFX90A-NEXT: v_writelane_b32 v40, s4, 2 |
| ; GFX90A-NEXT: s_getpc_b64 s[4:5] |
| ; GFX90A-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x@gotpcrel32@lo+4 |
| ; GFX90A-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x@gotpcrel32@hi+12 |
| ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX90A-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: v_mov_b32_e32 v0, 0x140 |
| ; GFX90A-NEXT: v_writelane_b32 v40, s30, 0 |
| ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 |
| ; GFX90A-NEXT: v_mov_b32_e32 v0, 10 |
| ; GFX90A-NEXT: v_mov_b32_e32 v1, 20 |
| ; GFX90A-NEXT: v_mov_b32_e32 v2, 30 |
| ; GFX90A-NEXT: v_mov_b32_e32 v3, 40 |
| ; GFX90A-NEXT: v_mov_b32_e32 v4, 50 |
| ; GFX90A-NEXT: v_mov_b32_e32 v5, 60 |
| ; GFX90A-NEXT: v_mov_b32_e32 v6, 0x46 |
| ; GFX90A-NEXT: v_mov_b32_e32 v7, 0x50 |
| ; GFX90A-NEXT: v_mov_b32_e32 v8, 0x5a |
| ; GFX90A-NEXT: v_mov_b32_e32 v9, 0x64 |
| ; GFX90A-NEXT: v_mov_b32_e32 v10, 0x6e |
| ; GFX90A-NEXT: v_mov_b32_e32 v11, 0x78 |
| ; GFX90A-NEXT: v_mov_b32_e32 v12, 0x82 |
| ; GFX90A-NEXT: v_mov_b32_e32 v13, 0x8c |
| ; GFX90A-NEXT: v_mov_b32_e32 v14, 0x96 |
| ; GFX90A-NEXT: v_mov_b32_e32 v15, 0xa0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v16, 0xaa |
| ; GFX90A-NEXT: v_mov_b32_e32 v17, 0xb4 |
| ; GFX90A-NEXT: v_mov_b32_e32 v18, 0xbe |
| ; GFX90A-NEXT: v_mov_b32_e32 v19, 0xc8 |
| ; GFX90A-NEXT: v_mov_b32_e32 v20, 0xd2 |
| ; GFX90A-NEXT: v_mov_b32_e32 v21, 0xdc |
| ; GFX90A-NEXT: v_mov_b32_e32 v22, 0xe6 |
| ; GFX90A-NEXT: v_mov_b32_e32 v23, 0xf0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v24, 0xfa |
| ; GFX90A-NEXT: v_mov_b32_e32 v25, 0x104 |
| ; GFX90A-NEXT: v_mov_b32_e32 v26, 0x10e |
| ; GFX90A-NEXT: v_mov_b32_e32 v27, 0x118 |
| ; GFX90A-NEXT: v_mov_b32_e32 v28, 0x122 |
| ; GFX90A-NEXT: v_mov_b32_e32 v29, 0x12c |
| ; GFX90A-NEXT: v_mov_b32_e32 v30, 0x136 |
| ; GFX90A-NEXT: v_writelane_b32 v40, s31, 1 |
| ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GFX90A-NEXT: v_readlane_b32 s31, v40, 1 |
| ; GFX90A-NEXT: v_readlane_b32 s30, v40, 0 |
| ; GFX90A-NEXT: s_mov_b32 s32, s33 |
| ; GFX90A-NEXT: v_readlane_b32 s4, v40, 2 |
| ; GFX90A-NEXT: s_or_saveexec_b64 s[6:7], -1 |
| ; GFX90A-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload |
| ; GFX90A-NEXT: s_mov_b64 exec, s[6:7] |
| ; GFX90A-NEXT: s_mov_b32 s33, s4 |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: s_setpc_b64 s[30:31] |
| store volatile i32 %arg0, ptr addrspace(1) poison |
| call void @too_many_args_use_workitem_id_x( |
| i32 10, i32 20, i32 30, i32 40, |
| i32 50, i32 60, i32 70, i32 80, |
| i32 90, i32 100, i32 110, i32 120, |
| i32 130, i32 140, i32 150, i32 160, |
| i32 170, i32 180, i32 190, i32 200, |
| i32 210, i32 220, i32 230, i32 240, |
| i32 250, i32 260, i32 270, i32 280, |
| i32 290, i32 300, i32 310, i32 320) |
| ret void |
| } |
| |
| ; Requires loading and storing to stack slot. |
| define void @too_many_args_call_too_many_args_use_workitem_id_x( |
| ; GCN-LABEL: too_many_args_call_too_many_args_use_workitem_id_x: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b32 s4, s33 |
| ; GCN-NEXT: s_mov_b32 s33, s32 |
| ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 |
| ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill |
| ; GCN-NEXT: s_mov_b64 exec, s[6:7] |
| ; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s33 |
| ; GCN-NEXT: s_addk_i32 s32, 0x400 |
| ; GCN-NEXT: v_writelane_b32 v40, s4, 2 |
| ; GCN-NEXT: s_getpc_b64 s[4:5] |
| ; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x@gotpcrel32@lo+4 |
| ; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x@gotpcrel32@hi+12 |
| ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GCN-NEXT: v_writelane_b32 v40, s30, 0 |
| ; GCN-NEXT: v_writelane_b32 v40, s31, 1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GCN-NEXT: v_readlane_b32 s31, v40, 1 |
| ; GCN-NEXT: v_readlane_b32 s30, v40, 0 |
| ; GCN-NEXT: s_mov_b32 s32, s33 |
| ; GCN-NEXT: v_readlane_b32 s4, v40, 2 |
| ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 |
| ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload |
| ; GCN-NEXT: s_mov_b64 exec, s[6:7] |
| ; GCN-NEXT: s_mov_b32 s33, s4 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, |
| i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15, |
| i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23, |
| i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 { |
| call void @too_many_args_use_workitem_id_x( |
| i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, |
| i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15, |
| i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23, |
| i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) |
| ret void |
| } |
| |
| ; stack layout: |
| ; frame[0] = stack passed arg23 |
| ; frame[1] = byval arg32 |
| define void @too_many_args_use_workitem_id_x_byval( |
| ; GFX7-LABEL: too_many_args_use_workitem_id_x_byval: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_and_b32_e32 v31, 0x3ff, v31 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v31 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v1 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v2 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v3 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v4 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v5 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v6 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v7 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v8 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v9 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v10 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v11 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v12 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v13 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v14 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v15 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v16 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v17 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v18 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v19 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v20 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v21 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v22 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v23 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v24 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v25 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v26 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v27 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v28 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v29 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v30 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v31 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 glc |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX90A-LABEL: too_many_args_use_workitem_id_x_byval: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX90A-NEXT: buffer_load_dword v32, off, s[0:3], s32 |
| ; GFX90A-NEXT: v_and_b32_e32 v31, 0x3ff, v31 |
| ; GFX90A-NEXT: global_store_dword v[0:1], v31, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v1, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v2, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v3, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v4, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v5, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v6, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v7, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v8, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v9, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v10, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v11, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v12, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v13, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v14, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v15, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v16, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v17, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v18, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v19, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v20, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v21, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v22, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v23, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v24, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v25, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v26, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v27, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v28, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v29, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v30, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v32, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 glc |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: s_setpc_b64 s[30:31] |
| i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, |
| i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15, |
| i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23, |
| i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, ptr addrspace(5) byval(i32) %arg32) #1 { |
| %val = call i32 @llvm.amdgcn.workitem.id.x() |
| store volatile i32 %val, ptr addrspace(1) poison |
| |
| store volatile i32 %arg0, ptr addrspace(1) poison |
| store volatile i32 %arg1, ptr addrspace(1) poison |
| store volatile i32 %arg2, ptr addrspace(1) poison |
| store volatile i32 %arg3, ptr addrspace(1) poison |
| store volatile i32 %arg4, ptr addrspace(1) poison |
| store volatile i32 %arg5, ptr addrspace(1) poison |
| store volatile i32 %arg6, ptr addrspace(1) poison |
| store volatile i32 %arg7, ptr addrspace(1) poison |
| |
| store volatile i32 %arg8, ptr addrspace(1) poison |
| store volatile i32 %arg9, ptr addrspace(1) poison |
| store volatile i32 %arg10, ptr addrspace(1) poison |
| store volatile i32 %arg11, ptr addrspace(1) poison |
| store volatile i32 %arg12, ptr addrspace(1) poison |
| store volatile i32 %arg13, ptr addrspace(1) poison |
| store volatile i32 %arg14, ptr addrspace(1) poison |
| store volatile i32 %arg15, ptr addrspace(1) poison |
| |
| store volatile i32 %arg16, ptr addrspace(1) poison |
| store volatile i32 %arg17, ptr addrspace(1) poison |
| store volatile i32 %arg18, ptr addrspace(1) poison |
| store volatile i32 %arg19, ptr addrspace(1) poison |
| store volatile i32 %arg20, ptr addrspace(1) poison |
| store volatile i32 %arg21, ptr addrspace(1) poison |
| store volatile i32 %arg22, ptr addrspace(1) poison |
| store volatile i32 %arg23, ptr addrspace(1) poison |
| |
| store volatile i32 %arg24, ptr addrspace(1) poison |
| store volatile i32 %arg25, ptr addrspace(1) poison |
| store volatile i32 %arg26, ptr addrspace(1) poison |
| store volatile i32 %arg27, ptr addrspace(1) poison |
| store volatile i32 %arg28, ptr addrspace(1) poison |
| store volatile i32 %arg29, ptr addrspace(1) poison |
| store volatile i32 %arg30, ptr addrspace(1) poison |
| store volatile i32 %arg31, ptr addrspace(1) poison |
| %private = load volatile i32, ptr addrspace(5) %arg32 |
| ret void |
| } |
| |
| ; sp[0] = stack passed %arg31 |
| ; sp[1] = byval |
| ; Local stack object initialize. Offset 0 is the emergency spill slot. |
| define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 { |
| ; GCN-LABEL: kern_call_too_many_args_use_workitem_id_x_byval: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_add_u32 s0, s0, s5 |
| ; GCN-NEXT: s_addc_u32 s1, s1, 0 |
| ; GCN-NEXT: v_mov_b32_e32 v31, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v0, 0x3e7 |
| ; GCN-NEXT: s_movk_i32 s32, 0x400 |
| ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_mov_b32_e32 v0, 0x140 |
| ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 |
| ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 |
| ; GCN-NEXT: s_getpc_b64 s[4:5] |
| ; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x_byval@gotpcrel32@lo+4 |
| ; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x_byval@gotpcrel32@hi+12 |
| ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 20 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 30 |
| ; GCN-NEXT: v_mov_b32_e32 v3, 40 |
| ; GCN-NEXT: v_mov_b32_e32 v4, 50 |
| ; GCN-NEXT: v_mov_b32_e32 v5, 60 |
| ; GCN-NEXT: v_mov_b32_e32 v6, 0x46 |
| ; GCN-NEXT: v_mov_b32_e32 v7, 0x50 |
| ; GCN-NEXT: v_mov_b32_e32 v8, 0x5a |
| ; GCN-NEXT: v_mov_b32_e32 v9, 0x64 |
| ; GCN-NEXT: v_mov_b32_e32 v10, 0x6e |
| ; GCN-NEXT: v_mov_b32_e32 v11, 0x78 |
| ; GCN-NEXT: v_mov_b32_e32 v12, 0x82 |
| ; GCN-NEXT: v_mov_b32_e32 v13, 0x8c |
| ; GCN-NEXT: v_mov_b32_e32 v14, 0x96 |
| ; GCN-NEXT: v_mov_b32_e32 v15, 0xa0 |
| ; GCN-NEXT: v_mov_b32_e32 v16, 0xaa |
| ; GCN-NEXT: v_mov_b32_e32 v17, 0xb4 |
| ; GCN-NEXT: v_mov_b32_e32 v18, 0xbe |
| ; GCN-NEXT: v_mov_b32_e32 v19, 0xc8 |
| ; GCN-NEXT: v_mov_b32_e32 v20, 0xd2 |
| ; GCN-NEXT: v_mov_b32_e32 v21, 0xdc |
| ; GCN-NEXT: v_mov_b32_e32 v22, 0xe6 |
| ; GCN-NEXT: v_mov_b32_e32 v23, 0xf0 |
| ; GCN-NEXT: v_mov_b32_e32 v24, 0xfa |
| ; GCN-NEXT: v_mov_b32_e32 v25, 0x104 |
| ; GCN-NEXT: v_mov_b32_e32 v26, 0x10e |
| ; GCN-NEXT: v_mov_b32_e32 v27, 0x118 |
| ; GCN-NEXT: v_mov_b32_e32 v28, 0x122 |
| ; GCN-NEXT: v_mov_b32_e32 v29, 0x12c |
| ; GCN-NEXT: v_mov_b32_e32 v30, 0x136 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 |
| ; GCN-NEXT: v_mov_b32_e32 v0, 10 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GCN-NEXT: s_endpgm |
| %alloca = alloca i32, align 4, addrspace(5) |
| store volatile i32 999, ptr addrspace(5) %alloca |
| call void @too_many_args_use_workitem_id_x_byval( |
| i32 10, i32 20, i32 30, i32 40, |
| i32 50, i32 60, i32 70, i32 80, |
| i32 90, i32 100, i32 110, i32 120, |
| i32 130, i32 140, i32 150, i32 160, |
| i32 170, i32 180, i32 190, i32 200, |
| i32 210, i32 220, i32 230, i32 240, |
| i32 250, i32 260, i32 270, i32 280, |
| i32 290, i32 300, i32 310, i32 320, |
| ptr addrspace(5) byval(i32) %alloca) |
| ret void |
| } |
| ; GCN: .amdhsa_system_vgpr_workitem_id 0 |
| |
| define void @func_call_too_many_args_use_workitem_id_x_byval() #1 { |
| ; GCN-LABEL: func_call_too_many_args_use_workitem_id_x_byval: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b32 s4, s33 |
| ; GCN-NEXT: s_mov_b32 s33, s32 |
| ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 |
| ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill |
| ; GCN-NEXT: s_mov_b64 exec, s[6:7] |
| ; GCN-NEXT: v_mov_b32_e32 v0, 0x3e7 |
| ; GCN-NEXT: s_addk_i32 s32, 0x400 |
| ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_mov_b32_e32 v0, 0x140 |
| ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 |
| ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 |
| ; GCN-NEXT: v_writelane_b32 v40, s4, 2 |
| ; GCN-NEXT: s_getpc_b64 s[4:5] |
| ; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x_byval@gotpcrel32@lo+4 |
| ; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x_byval@gotpcrel32@hi+12 |
| ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GCN-NEXT: v_writelane_b32 v40, s30, 0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 20 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 30 |
| ; GCN-NEXT: v_mov_b32_e32 v3, 40 |
| ; GCN-NEXT: v_mov_b32_e32 v4, 50 |
| ; GCN-NEXT: v_mov_b32_e32 v5, 60 |
| ; GCN-NEXT: v_mov_b32_e32 v6, 0x46 |
| ; GCN-NEXT: v_mov_b32_e32 v7, 0x50 |
| ; GCN-NEXT: v_mov_b32_e32 v8, 0x5a |
| ; GCN-NEXT: v_mov_b32_e32 v9, 0x64 |
| ; GCN-NEXT: v_mov_b32_e32 v10, 0x6e |
| ; GCN-NEXT: v_mov_b32_e32 v11, 0x78 |
| ; GCN-NEXT: v_mov_b32_e32 v12, 0x82 |
| ; GCN-NEXT: v_mov_b32_e32 v13, 0x8c |
| ; GCN-NEXT: v_mov_b32_e32 v14, 0x96 |
| ; GCN-NEXT: v_mov_b32_e32 v15, 0xa0 |
| ; GCN-NEXT: v_mov_b32_e32 v16, 0xaa |
| ; GCN-NEXT: v_mov_b32_e32 v17, 0xb4 |
| ; GCN-NEXT: v_mov_b32_e32 v18, 0xbe |
| ; GCN-NEXT: v_mov_b32_e32 v19, 0xc8 |
| ; GCN-NEXT: v_mov_b32_e32 v20, 0xd2 |
| ; GCN-NEXT: v_mov_b32_e32 v21, 0xdc |
| ; GCN-NEXT: v_mov_b32_e32 v22, 0xe6 |
| ; GCN-NEXT: v_mov_b32_e32 v23, 0xf0 |
| ; GCN-NEXT: v_mov_b32_e32 v24, 0xfa |
| ; GCN-NEXT: v_mov_b32_e32 v25, 0x104 |
| ; GCN-NEXT: v_mov_b32_e32 v26, 0x10e |
| ; GCN-NEXT: v_mov_b32_e32 v27, 0x118 |
| ; GCN-NEXT: v_mov_b32_e32 v28, 0x122 |
| ; GCN-NEXT: v_mov_b32_e32 v29, 0x12c |
| ; GCN-NEXT: v_mov_b32_e32 v30, 0x136 |
| ; GCN-NEXT: v_writelane_b32 v40, s31, 1 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 |
| ; GCN-NEXT: v_mov_b32_e32 v0, 10 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GCN-NEXT: v_readlane_b32 s31, v40, 1 |
| ; GCN-NEXT: v_readlane_b32 s30, v40, 0 |
| ; GCN-NEXT: s_mov_b32 s32, s33 |
| ; GCN-NEXT: v_readlane_b32 s4, v40, 2 |
| ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 |
| ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload |
| ; GCN-NEXT: s_mov_b64 exec, s[6:7] |
| ; GCN-NEXT: s_mov_b32 s33, s4 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| %alloca = alloca i32, align 4, addrspace(5) |
| store volatile i32 999, ptr addrspace(5) %alloca |
| call void @too_many_args_use_workitem_id_x_byval( |
| i32 10, i32 20, i32 30, i32 40, |
| i32 50, i32 60, i32 70, i32 80, |
| i32 90, i32 100, i32 110, i32 120, |
| i32 130, i32 140, i32 150, i32 160, |
| i32 170, i32 180, i32 190, i32 200, |
| i32 210, i32 220, i32 230, i32 240, |
| i32 250, i32 260, i32 270, i32 280, |
| i32 290, i32 300, i32 310, i32 320, |
| ptr addrspace(5) byval(i32) %alloca) |
| ret void |
| } |
| |
| define void @too_many_args_use_workitem_id_xyz( |
| ; GFX7-LABEL: too_many_args_use_workitem_id_xyz: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_and_b32_e32 v32, 0x3ff, v31 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v32 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: v_bfe_u32 v32, v31, 10, 10 |
| ; GFX7-NEXT: v_bfe_u32 v31, v31, 20, 10 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v32 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v31 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v1 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v2 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v3 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v4 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v5 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v6 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v7 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v8 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v9 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v10 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v11 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v12 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v13 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v14 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v15 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v16 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v17 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v18 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v19 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v20 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v21 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v22 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v23 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v24 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v25 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v26 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v27 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v28 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v29 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v30 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v31 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX90A-LABEL: too_many_args_use_workitem_id_xyz: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX90A-NEXT: buffer_load_dword v32, off, s[0:3], s32 |
| ; GFX90A-NEXT: v_and_b32_e32 v33, 0x3ff, v31 |
| ; GFX90A-NEXT: global_store_dword v[0:1], v33, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: v_bfe_u32 v33, v31, 10, 10 |
| ; GFX90A-NEXT: v_bfe_u32 v31, v31, 20, 10 |
| ; GFX90A-NEXT: global_store_dword v[0:1], v33, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v31, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v1, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v2, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v3, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v4, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v5, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v6, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v7, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v8, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v9, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v10, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v11, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v12, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v13, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v14, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v15, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v16, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v17, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v18, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v19, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v20, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v21, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v22, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v23, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v24, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v25, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v26, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v27, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v28, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v29, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v30, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v32, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: s_setpc_b64 s[30:31] |
| i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, |
| i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15, |
| i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23, |
| i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 { |
| %val0 = call i32 @llvm.amdgcn.workitem.id.x() |
| store volatile i32 %val0, ptr addrspace(1) poison |
| %val1 = call i32 @llvm.amdgcn.workitem.id.y() |
| store volatile i32 %val1, ptr addrspace(1) poison |
| %val2 = call i32 @llvm.amdgcn.workitem.id.z() |
| store volatile i32 %val2, ptr addrspace(1) poison |
| |
| store volatile i32 %arg0, ptr addrspace(1) poison |
| store volatile i32 %arg1, ptr addrspace(1) poison |
| store volatile i32 %arg2, ptr addrspace(1) poison |
| store volatile i32 %arg3, ptr addrspace(1) poison |
| store volatile i32 %arg4, ptr addrspace(1) poison |
| store volatile i32 %arg5, ptr addrspace(1) poison |
| store volatile i32 %arg6, ptr addrspace(1) poison |
| store volatile i32 %arg7, ptr addrspace(1) poison |
| |
| store volatile i32 %arg8, ptr addrspace(1) poison |
| store volatile i32 %arg9, ptr addrspace(1) poison |
| store volatile i32 %arg10, ptr addrspace(1) poison |
| store volatile i32 %arg11, ptr addrspace(1) poison |
| store volatile i32 %arg12, ptr addrspace(1) poison |
| store volatile i32 %arg13, ptr addrspace(1) poison |
| store volatile i32 %arg14, ptr addrspace(1) poison |
| store volatile i32 %arg15, ptr addrspace(1) poison |
| |
| store volatile i32 %arg16, ptr addrspace(1) poison |
| store volatile i32 %arg17, ptr addrspace(1) poison |
| store volatile i32 %arg18, ptr addrspace(1) poison |
| store volatile i32 %arg19, ptr addrspace(1) poison |
| store volatile i32 %arg20, ptr addrspace(1) poison |
| store volatile i32 %arg21, ptr addrspace(1) poison |
| store volatile i32 %arg22, ptr addrspace(1) poison |
| store volatile i32 %arg23, ptr addrspace(1) poison |
| |
| store volatile i32 %arg24, ptr addrspace(1) poison |
| store volatile i32 %arg25, ptr addrspace(1) poison |
| store volatile i32 %arg26, ptr addrspace(1) poison |
| store volatile i32 %arg27, ptr addrspace(1) poison |
| store volatile i32 %arg28, ptr addrspace(1) poison |
| store volatile i32 %arg29, ptr addrspace(1) poison |
| store volatile i32 %arg30, ptr addrspace(1) poison |
| store volatile i32 %arg31, ptr addrspace(1) poison |
| |
| ret void |
| } |
| |
| ; frame[0] = ID { Z, Y, X } |
| define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 { |
| ; GFX7-LABEL: kern_call_too_many_args_use_workitem_id_xyz: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_add_u32 s0, s0, s5 |
| ; GFX7-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX7-NEXT: s_getpc_b64 s[4:5] |
| ; GFX7-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_xyz@gotpcrel32@lo+4 |
| ; GFX7-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_xyz@gotpcrel32@hi+12 |
| ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; GFX7-NEXT: s_mov_b32 s32, 0 |
| ; GFX7-NEXT: v_mov_b32_e32 v3, 0x140 |
| ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
| ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX7-NEXT: buffer_store_dword v3, off, s[0:3], s32 |
| ; GFX7-NEXT: v_or_b32_e32 v31, v0, v2 |
| ; GFX7-NEXT: v_mov_b32_e32 v0, 10 |
| ; GFX7-NEXT: v_mov_b32_e32 v1, 20 |
| ; GFX7-NEXT: v_mov_b32_e32 v2, 30 |
| ; GFX7-NEXT: v_mov_b32_e32 v3, 40 |
| ; GFX7-NEXT: v_mov_b32_e32 v4, 50 |
| ; GFX7-NEXT: v_mov_b32_e32 v5, 60 |
| ; GFX7-NEXT: v_mov_b32_e32 v6, 0x46 |
| ; GFX7-NEXT: v_mov_b32_e32 v7, 0x50 |
| ; GFX7-NEXT: v_mov_b32_e32 v8, 0x5a |
| ; GFX7-NEXT: v_mov_b32_e32 v9, 0x64 |
| ; GFX7-NEXT: v_mov_b32_e32 v10, 0x6e |
| ; GFX7-NEXT: v_mov_b32_e32 v11, 0x78 |
| ; GFX7-NEXT: v_mov_b32_e32 v12, 0x82 |
| ; GFX7-NEXT: v_mov_b32_e32 v13, 0x8c |
| ; GFX7-NEXT: v_mov_b32_e32 v14, 0x96 |
| ; GFX7-NEXT: v_mov_b32_e32 v15, 0xa0 |
| ; GFX7-NEXT: v_mov_b32_e32 v16, 0xaa |
| ; GFX7-NEXT: v_mov_b32_e32 v17, 0xb4 |
| ; GFX7-NEXT: v_mov_b32_e32 v18, 0xbe |
| ; GFX7-NEXT: v_mov_b32_e32 v19, 0xc8 |
| ; GFX7-NEXT: v_mov_b32_e32 v20, 0xd2 |
| ; GFX7-NEXT: v_mov_b32_e32 v21, 0xdc |
| ; GFX7-NEXT: v_mov_b32_e32 v22, 0xe6 |
| ; GFX7-NEXT: v_mov_b32_e32 v23, 0xf0 |
| ; GFX7-NEXT: v_mov_b32_e32 v24, 0xfa |
| ; GFX7-NEXT: v_mov_b32_e32 v25, 0x104 |
| ; GFX7-NEXT: v_mov_b32_e32 v26, 0x10e |
| ; GFX7-NEXT: v_mov_b32_e32 v27, 0x118 |
| ; GFX7-NEXT: v_mov_b32_e32 v28, 0x122 |
| ; GFX7-NEXT: v_mov_b32_e32 v29, 0x12c |
| ; GFX7-NEXT: v_mov_b32_e32 v30, 0x136 |
| ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GFX7-NEXT: s_endpgm |
| ; |
| ; GFX90A-LABEL: kern_call_too_many_args_use_workitem_id_xyz: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_add_u32 s0, s0, s5 |
| ; GFX90A-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX90A-NEXT: s_getpc_b64 s[4:5] |
| ; GFX90A-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_xyz@gotpcrel32@lo+4 |
| ; GFX90A-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_xyz@gotpcrel32@hi+12 |
| ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX90A-NEXT: s_mov_b32 s32, 0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v1, 0x140 |
| ; GFX90A-NEXT: buffer_store_dword v1, off, s[0:3], s32 |
| ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v0, 10 |
| ; GFX90A-NEXT: v_mov_b32_e32 v1, 20 |
| ; GFX90A-NEXT: v_mov_b32_e32 v2, 30 |
| ; GFX90A-NEXT: v_mov_b32_e32 v3, 40 |
| ; GFX90A-NEXT: v_mov_b32_e32 v4, 50 |
| ; GFX90A-NEXT: v_mov_b32_e32 v5, 60 |
| ; GFX90A-NEXT: v_mov_b32_e32 v6, 0x46 |
| ; GFX90A-NEXT: v_mov_b32_e32 v7, 0x50 |
| ; GFX90A-NEXT: v_mov_b32_e32 v8, 0x5a |
| ; GFX90A-NEXT: v_mov_b32_e32 v9, 0x64 |
| ; GFX90A-NEXT: v_mov_b32_e32 v10, 0x6e |
| ; GFX90A-NEXT: v_mov_b32_e32 v11, 0x78 |
| ; GFX90A-NEXT: v_mov_b32_e32 v12, 0x82 |
| ; GFX90A-NEXT: v_mov_b32_e32 v13, 0x8c |
| ; GFX90A-NEXT: v_mov_b32_e32 v14, 0x96 |
| ; GFX90A-NEXT: v_mov_b32_e32 v15, 0xa0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v16, 0xaa |
| ; GFX90A-NEXT: v_mov_b32_e32 v17, 0xb4 |
| ; GFX90A-NEXT: v_mov_b32_e32 v18, 0xbe |
| ; GFX90A-NEXT: v_mov_b32_e32 v19, 0xc8 |
| ; GFX90A-NEXT: v_mov_b32_e32 v20, 0xd2 |
| ; GFX90A-NEXT: v_mov_b32_e32 v21, 0xdc |
| ; GFX90A-NEXT: v_mov_b32_e32 v22, 0xe6 |
| ; GFX90A-NEXT: v_mov_b32_e32 v23, 0xf0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v24, 0xfa |
| ; GFX90A-NEXT: v_mov_b32_e32 v25, 0x104 |
| ; GFX90A-NEXT: v_mov_b32_e32 v26, 0x10e |
| ; GFX90A-NEXT: v_mov_b32_e32 v27, 0x118 |
| ; GFX90A-NEXT: v_mov_b32_e32 v28, 0x122 |
| ; GFX90A-NEXT: v_mov_b32_e32 v29, 0x12c |
| ; GFX90A-NEXT: v_mov_b32_e32 v30, 0x136 |
| ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GFX90A-NEXT: s_endpgm |
| call void @too_many_args_use_workitem_id_xyz( |
| i32 10, i32 20, i32 30, i32 40, |
| i32 50, i32 60, i32 70, i32 80, |
| i32 90, i32 100, i32 110, i32 120, |
| i32 130, i32 140, i32 150, i32 160, |
| i32 170, i32 180, i32 190, i32 200, |
| i32 210, i32 220, i32 230, i32 240, |
| i32 250, i32 260, i32 270, i32 280, |
| i32 290, i32 300, i32 310, i32 320) |
| ret void |
| } |
| ; GCN: .amdhsa_system_vgpr_workitem_id 2 |
| |
| ; workitem ID X in register, yz on stack |
| ; v31 = workitem ID X |
| ; frame[0] = workitem { Z, Y, X } |
| define void @too_many_args_use_workitem_id_x_stack_yz( |
| ; GFX7-LABEL: too_many_args_use_workitem_id_x_stack_yz: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_and_b32_e32 v32, 0x3ff, v31 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v32 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: v_bfe_u32 v32, v31, 10, 10 |
| ; GFX7-NEXT: v_bfe_u32 v31, v31, 20, 10 |
| ; GFX7-NEXT: flat_store_dword v[0:1], v32 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v31 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v1 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v2 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v3 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v4 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v5 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v6 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v7 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v8 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v9 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v10 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v11 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v12 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v13 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v14 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v15 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v16 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v17 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v18 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v19 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v20 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v21 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v22 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v23 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v24 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v25 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v26 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v27 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v28 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v29 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: flat_store_dword v[0:1], v30 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX90A-LABEL: too_many_args_use_workitem_id_x_stack_yz: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX90A-NEXT: v_and_b32_e32 v32, 0x3ff, v31 |
| ; GFX90A-NEXT: global_store_dword v[0:1], v32, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: v_bfe_u32 v32, v31, 10, 10 |
| ; GFX90A-NEXT: v_bfe_u32 v31, v31, 20, 10 |
| ; GFX90A-NEXT: global_store_dword v[0:1], v32, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v31, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v1, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v2, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v3, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v4, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v5, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v6, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v7, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v8, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v9, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v10, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v11, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v12, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v13, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v14, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v15, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v16, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v17, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v18, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v19, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v20, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v21, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v22, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v23, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v24, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v25, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v26, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v27, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v28, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v29, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v[0:1], v30, off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: s_setpc_b64 s[30:31] |
| i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, |
| i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15, |
| i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23, |
| i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30) #1 { |
| %val0 = call i32 @llvm.amdgcn.workitem.id.x() |
| store volatile i32 %val0, ptr addrspace(1) poison |
| %val1 = call i32 @llvm.amdgcn.workitem.id.y() |
| store volatile i32 %val1, ptr addrspace(1) poison |
| %val2 = call i32 @llvm.amdgcn.workitem.id.z() |
| store volatile i32 %val2, ptr addrspace(1) poison |
| |
| store volatile i32 %arg0, ptr addrspace(1) poison |
| store volatile i32 %arg1, ptr addrspace(1) poison |
| store volatile i32 %arg2, ptr addrspace(1) poison |
| store volatile i32 %arg3, ptr addrspace(1) poison |
| store volatile i32 %arg4, ptr addrspace(1) poison |
| store volatile i32 %arg5, ptr addrspace(1) poison |
| store volatile i32 %arg6, ptr addrspace(1) poison |
| store volatile i32 %arg7, ptr addrspace(1) poison |
| |
| store volatile i32 %arg8, ptr addrspace(1) poison |
| store volatile i32 %arg9, ptr addrspace(1) poison |
| store volatile i32 %arg10, ptr addrspace(1) poison |
| store volatile i32 %arg11, ptr addrspace(1) poison |
| store volatile i32 %arg12, ptr addrspace(1) poison |
| store volatile i32 %arg13, ptr addrspace(1) poison |
| store volatile i32 %arg14, ptr addrspace(1) poison |
| store volatile i32 %arg15, ptr addrspace(1) poison |
| |
| store volatile i32 %arg16, ptr addrspace(1) poison |
| store volatile i32 %arg17, ptr addrspace(1) poison |
| store volatile i32 %arg18, ptr addrspace(1) poison |
| store volatile i32 %arg19, ptr addrspace(1) poison |
| store volatile i32 %arg20, ptr addrspace(1) poison |
| store volatile i32 %arg21, ptr addrspace(1) poison |
| store volatile i32 %arg22, ptr addrspace(1) poison |
| store volatile i32 %arg23, ptr addrspace(1) poison |
| |
| store volatile i32 %arg24, ptr addrspace(1) poison |
| store volatile i32 %arg25, ptr addrspace(1) poison |
| store volatile i32 %arg26, ptr addrspace(1) poison |
| store volatile i32 %arg27, ptr addrspace(1) poison |
| store volatile i32 %arg28, ptr addrspace(1) poison |
| store volatile i32 %arg29, ptr addrspace(1) poison |
| store volatile i32 %arg30, ptr addrspace(1) poison |
| |
| ret void |
| } |
| ; GCN: ScratchSize: 0 |
| |
| define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() #1 { |
| ; GFX7-LABEL: kern_call_too_many_args_use_workitem_id_x_stack_yz: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_add_u32 s0, s0, s5 |
| ; GFX7-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX7-NEXT: s_getpc_b64 s[4:5] |
| ; GFX7-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x_stack_yz@gotpcrel32@lo+4 |
| ; GFX7-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x_stack_yz@gotpcrel32@hi+12 |
| ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
| ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX7-NEXT: v_or_b32_e32 v31, v0, v2 |
| ; GFX7-NEXT: v_mov_b32_e32 v0, 10 |
| ; GFX7-NEXT: v_mov_b32_e32 v1, 20 |
| ; GFX7-NEXT: v_mov_b32_e32 v2, 30 |
| ; GFX7-NEXT: v_mov_b32_e32 v3, 40 |
| ; GFX7-NEXT: v_mov_b32_e32 v4, 50 |
| ; GFX7-NEXT: v_mov_b32_e32 v5, 60 |
| ; GFX7-NEXT: v_mov_b32_e32 v6, 0x46 |
| ; GFX7-NEXT: v_mov_b32_e32 v7, 0x50 |
| ; GFX7-NEXT: v_mov_b32_e32 v8, 0x5a |
| ; GFX7-NEXT: v_mov_b32_e32 v9, 0x64 |
| ; GFX7-NEXT: v_mov_b32_e32 v10, 0x6e |
| ; GFX7-NEXT: v_mov_b32_e32 v11, 0x78 |
| ; GFX7-NEXT: v_mov_b32_e32 v12, 0x82 |
| ; GFX7-NEXT: v_mov_b32_e32 v13, 0x8c |
| ; GFX7-NEXT: v_mov_b32_e32 v14, 0x96 |
| ; GFX7-NEXT: v_mov_b32_e32 v15, 0xa0 |
| ; GFX7-NEXT: v_mov_b32_e32 v16, 0xaa |
| ; GFX7-NEXT: v_mov_b32_e32 v17, 0xb4 |
| ; GFX7-NEXT: v_mov_b32_e32 v18, 0xbe |
| ; GFX7-NEXT: v_mov_b32_e32 v19, 0xc8 |
| ; GFX7-NEXT: v_mov_b32_e32 v20, 0xd2 |
| ; GFX7-NEXT: v_mov_b32_e32 v21, 0xdc |
| ; GFX7-NEXT: v_mov_b32_e32 v22, 0xe6 |
| ; GFX7-NEXT: v_mov_b32_e32 v23, 0xf0 |
| ; GFX7-NEXT: v_mov_b32_e32 v24, 0xfa |
| ; GFX7-NEXT: v_mov_b32_e32 v25, 0x104 |
| ; GFX7-NEXT: v_mov_b32_e32 v26, 0x10e |
| ; GFX7-NEXT: v_mov_b32_e32 v27, 0x118 |
| ; GFX7-NEXT: v_mov_b32_e32 v28, 0x122 |
| ; GFX7-NEXT: v_mov_b32_e32 v29, 0x12c |
| ; GFX7-NEXT: v_mov_b32_e32 v30, 0x136 |
| ; GFX7-NEXT: s_mov_b32 s32, 0 |
| ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GFX7-NEXT: s_endpgm |
| ; |
| ; GFX90A-LABEL: kern_call_too_many_args_use_workitem_id_x_stack_yz: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_add_u32 s0, s0, s5 |
| ; GFX90A-NEXT: s_addc_u32 s1, s1, 0 |
| ; GFX90A-NEXT: s_getpc_b64 s[4:5] |
| ; GFX90A-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x_stack_yz@gotpcrel32@lo+4 |
| ; GFX90A-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x_stack_yz@gotpcrel32@hi+12 |
| ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v0, 10 |
| ; GFX90A-NEXT: v_mov_b32_e32 v1, 20 |
| ; GFX90A-NEXT: v_mov_b32_e32 v2, 30 |
| ; GFX90A-NEXT: v_mov_b32_e32 v3, 40 |
| ; GFX90A-NEXT: v_mov_b32_e32 v4, 50 |
| ; GFX90A-NEXT: v_mov_b32_e32 v5, 60 |
| ; GFX90A-NEXT: v_mov_b32_e32 v6, 0x46 |
| ; GFX90A-NEXT: v_mov_b32_e32 v7, 0x50 |
| ; GFX90A-NEXT: v_mov_b32_e32 v8, 0x5a |
| ; GFX90A-NEXT: v_mov_b32_e32 v9, 0x64 |
| ; GFX90A-NEXT: v_mov_b32_e32 v10, 0x6e |
| ; GFX90A-NEXT: v_mov_b32_e32 v11, 0x78 |
| ; GFX90A-NEXT: v_mov_b32_e32 v12, 0x82 |
| ; GFX90A-NEXT: v_mov_b32_e32 v13, 0x8c |
| ; GFX90A-NEXT: v_mov_b32_e32 v14, 0x96 |
| ; GFX90A-NEXT: v_mov_b32_e32 v15, 0xa0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v16, 0xaa |
| ; GFX90A-NEXT: v_mov_b32_e32 v17, 0xb4 |
| ; GFX90A-NEXT: v_mov_b32_e32 v18, 0xbe |
| ; GFX90A-NEXT: v_mov_b32_e32 v19, 0xc8 |
| ; GFX90A-NEXT: v_mov_b32_e32 v20, 0xd2 |
| ; GFX90A-NEXT: v_mov_b32_e32 v21, 0xdc |
| ; GFX90A-NEXT: v_mov_b32_e32 v22, 0xe6 |
| ; GFX90A-NEXT: v_mov_b32_e32 v23, 0xf0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v24, 0xfa |
| ; GFX90A-NEXT: v_mov_b32_e32 v25, 0x104 |
| ; GFX90A-NEXT: v_mov_b32_e32 v26, 0x10e |
| ; GFX90A-NEXT: v_mov_b32_e32 v27, 0x118 |
| ; GFX90A-NEXT: v_mov_b32_e32 v28, 0x122 |
| ; GFX90A-NEXT: v_mov_b32_e32 v29, 0x12c |
| ; GFX90A-NEXT: v_mov_b32_e32 v30, 0x136 |
| ; GFX90A-NEXT: s_mov_b32 s32, 0 |
| ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| ; GFX90A-NEXT: s_endpgm |
| call void @too_many_args_use_workitem_id_x_stack_yz( |
| i32 10, i32 20, i32 30, i32 40, |
| i32 50, i32 60, i32 70, i32 80, |
| i32 90, i32 100, i32 110, i32 120, |
| i32 130, i32 140, i32 150, i32 160, |
| i32 170, i32 180, i32 190, i32 200, |
| i32 210, i32 220, i32 230, i32 240, |
| i32 250, i32 260, i32 270, i32 280, |
| i32 290, i32 300, i32 310) |
| ret void |
| } |
| ; GCN: .amdhsa_system_vgpr_workitem_id 2 |
| |
| declare i32 @llvm.amdgcn.workitem.id.x() #0 |
| declare i32 @llvm.amdgcn.workitem.id.y() #0 |
| declare i32 @llvm.amdgcn.workitem.id.z() #0 |
| |
| attributes #0 = { nounwind readnone speculatable "amdgpu-flat-work-group-size"="1,512" } |
| attributes #1 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" } |