| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-remove-redundant-endcf -amdgpu-codegenprepare-break-large-phis=0 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s |
| |
| ; Disabled endcf collapse at -O0. |
| ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -O0 -amdgpu-remove-redundant-endcf -amdgpu-codegenprepare-break-large-phis=0 < %s | FileCheck -enable-var-scope -check-prefix=GCN-O0 %s |
| |
| ; Note: Breaking large PHIs is disabled to branches from being eliminated (in scc_liveness) |
| |
| define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) { |
| ; GCN-LABEL: simple_nested_if: |
| ; GCN: ; %bb.0: ; %bb |
| ; GCN-NEXT: v_cmp_lt_u32_e32 vcc, 1, v0 |
| ; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc |
| ; GCN-NEXT: s_cbranch_execz .LBB0_3 |
| ; GCN-NEXT: ; %bb.1: ; %bb.outer.then |
| ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_mov_b32 s3, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s2, 0 |
| ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 2, v0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_store_dword v2, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_and_b64 exec, exec, vcc |
| ; GCN-NEXT: s_cbranch_execz .LBB0_3 |
| ; GCN-NEXT: ; %bb.2: ; %bb.inner.then |
| ; GCN-NEXT: s_waitcnt expcnt(0) |
| ; GCN-NEXT: v_mov_b32_e32 v2, s1 |
| ; GCN-NEXT: v_add_i32_e32 v0, vcc, s0, v1 |
| ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v2, vcc |
| ; GCN-NEXT: s_mov_b32 s0, s2 |
| ; GCN-NEXT: s_mov_b32 s1, s2 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 1 |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:4 |
| ; GCN-NEXT: .LBB0_3: ; %bb.outer.end |
| ; GCN-NEXT: s_or_b64 exec, exec, s[4:5] |
| ; GCN-NEXT: v_mov_b32_e32 v0, 3 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_mov_b32 m0, -1 |
| ; GCN-NEXT: ds_write_b32 v1, v0 |
| ; GCN-NEXT: s_endpgm |
| ; |
| ; GCN-O0-LABEL: simple_nested_if: |
| ; GCN-O0: ; %bb.0: ; %bb |
| ; GCN-O0-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 |
| ; GCN-O0-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 |
| ; GCN-O0-NEXT: s_mov_b32 s10, -1 |
| ; GCN-O0-NEXT: s_mov_b32 s11, 0xe8f000 |
| ; GCN-O0-NEXT: s_add_u32 s8, s8, s3 |
| ; GCN-O0-NEXT: s_addc_u32 s9, s9, 0 |
| ; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s0, 0 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s1, 1 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, v0 |
| ; GCN-O0-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: s_mov_b32 s0, 1 |
| ; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v0, s0 |
| ; GCN-O0-NEXT: s_mov_b64 s[0:1], exec |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s0, 2 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s1, 3 |
| ; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] |
| ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] |
| ; GCN-O0-NEXT: s_cbranch_execz .LBB0_4 |
| ; GCN-O0-NEXT: ; %bb.1: ; %bb.outer.then |
| ; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: v_readlane_b32 s4, v1, 0 |
| ; GCN-O0-NEXT: v_readlane_b32 s5, v1, 1 |
| ; GCN-O0-NEXT: s_mov_b32 s2, 0xf000 |
| ; GCN-O0-NEXT: s_mov_b32 s0, 0 |
| ; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 |
| ; GCN-O0-NEXT: s_mov_b32 s1, s2 |
| ; GCN-O0-NEXT: ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7 |
| ; GCN-O0-NEXT: s_mov_b64 s[6:7], s[0:1] |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: v_ashrrev_i32_e64 v4, 31, v0 |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, v0 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v3, v4 |
| ; GCN-O0-NEXT: s_mov_b32 s0, 2 |
| ; GCN-O0-NEXT: v_lshl_b64 v[3:4], v[2:3], s0 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-O0-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64 |
| ; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[2:3], v0, s0 |
| ; GCN-O0-NEXT: s_mov_b64 s[0:1], exec |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s0, 4 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s1, 5 |
| ; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] |
| ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] |
| ; GCN-O0-NEXT: s_cbranch_execz .LBB0_3 |
| ; GCN-O0-NEXT: ; %bb.2: ; %bb.inner.then |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 0 |
| ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 1 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v0, 1 |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: v_add_i32_e64 v2, s[2:3], v2, v0 |
| ; GCN-O0-NEXT: v_ashrrev_i32_e64 v4, 31, v2 |
| ; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec |
| ; GCN-O0-NEXT: v_mov_b32_e32 v3, v4 |
| ; GCN-O0-NEXT: s_mov_b32 s2, 2 |
| ; GCN-O0-NEXT: v_lshl_b64 v[2:3], v[2:3], s2 |
| ; GCN-O0-NEXT: s_mov_b32 s2, 0xf000 |
| ; GCN-O0-NEXT: s_mov_b32 s4, 0 |
| ; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 |
| ; GCN-O0-NEXT: s_mov_b32 s5, s2 |
| ; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] |
| ; GCN-O0-NEXT: buffer_store_dword v0, v[2:3], s[0:3], 0 addr64 |
| ; GCN-O0-NEXT: .LBB0_3: ; %Flow |
| ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 4 |
| ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 5 |
| ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] |
| ; GCN-O0-NEXT: .LBB0_4: ; %bb.outer.end |
| ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 2 |
| ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 3 |
| ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, 3 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v0, 0 |
| ; GCN-O0-NEXT: s_mov_b32 m0, -1 |
| ; GCN-O0-NEXT: ds_write_b32 v0, v2 |
| ; GCN-O0-NEXT: s_endpgm |
| bb: |
| %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() |
| %tmp1 = icmp ugt i32 %tmp, 1 |
| br i1 %tmp1, label %bb.outer.then, label %bb.outer.end |
| |
| bb.outer.then: ; preds = %bb |
| %tmp4 = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %tmp |
| store i32 0, ptr addrspace(1) %tmp4, align 4 |
| %tmp5 = icmp eq i32 %tmp, 2 |
| br i1 %tmp5, label %bb.outer.end, label %bb.inner.then |
| |
| bb.inner.then: ; preds = %bb.outer.then |
| %tmp7 = add i32 %tmp, 1 |
| %tmp9 = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %tmp7 |
| store i32 1, ptr addrspace(1) %tmp9, align 4 |
| br label %bb.outer.end |
| |
| bb.outer.end: ; preds = %bb.outer.then, %bb.inner.then, %bb |
| store i32 3, ptr addrspace(3) null |
| ret void |
| } |
| |
| define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %arg) { |
| ; GCN-LABEL: uncollapsable_nested_if: |
| ; GCN: ; %bb.0: ; %bb |
| ; GCN-NEXT: v_cmp_lt_u32_e32 vcc, 1, v0 |
| ; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc |
| ; GCN-NEXT: s_cbranch_execz .LBB1_4 |
| ; GCN-NEXT: ; %bb.1: ; %bb.outer.then |
| ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v3, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v4, 0 |
| ; GCN-NEXT: s_mov_b32 s3, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: v_mov_b32_e32 v2, s1 |
| ; GCN-NEXT: v_add_i32_e32 v1, vcc, s0, v3 |
| ; GCN-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc |
| ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 2, v0 |
| ; GCN-NEXT: buffer_store_dword v4, v[3:4], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc |
| ; GCN-NEXT: s_cbranch_execz .LBB1_3 |
| ; GCN-NEXT: ; %bb.2: ; %bb.inner.then |
| ; GCN-NEXT: s_mov_b32 s0, s2 |
| ; GCN-NEXT: s_mov_b32 s1, s2 |
| ; GCN-NEXT: v_mov_b32_e32 v0, 1 |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 offset:4 |
| ; GCN-NEXT: .LBB1_3: ; %bb.inner.end |
| ; GCN-NEXT: s_or_b64 exec, exec, s[6:7] |
| ; GCN-NEXT: s_mov_b32 s0, s2 |
| ; GCN-NEXT: s_mov_b32 s1, s2 |
| ; GCN-NEXT: s_waitcnt expcnt(0) |
| ; GCN-NEXT: v_mov_b32_e32 v0, 2 |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 offset:8 |
| ; GCN-NEXT: .LBB1_4: ; %Flow |
| ; GCN-NEXT: s_or_b64 exec, exec, s[4:5] |
| ; GCN-NEXT: s_waitcnt expcnt(0) |
| ; GCN-NEXT: v_mov_b32_e32 v0, 3 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_mov_b32 m0, -1 |
| ; GCN-NEXT: ds_write_b32 v1, v0 |
| ; GCN-NEXT: s_endpgm |
| ; |
| ; GCN-O0-LABEL: uncollapsable_nested_if: |
| ; GCN-O0: ; %bb.0: ; %bb |
| ; GCN-O0-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 |
| ; GCN-O0-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 |
| ; GCN-O0-NEXT: s_mov_b32 s10, -1 |
| ; GCN-O0-NEXT: s_mov_b32 s11, 0xe8f000 |
| ; GCN-O0-NEXT: s_add_u32 s8, s8, s3 |
| ; GCN-O0-NEXT: s_addc_u32 s9, s9, 0 |
| ; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s0, 0 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s1, 1 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, v0 |
| ; GCN-O0-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: s_mov_b32 s0, 1 |
| ; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v0, s0 |
| ; GCN-O0-NEXT: s_mov_b64 s[0:1], exec |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s0, 2 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s1, 3 |
| ; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] |
| ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] |
| ; GCN-O0-NEXT: s_cbranch_execz .LBB1_3 |
| ; GCN-O0-NEXT: ; %bb.1: ; %bb.outer.then |
| ; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: v_readlane_b32 s4, v1, 0 |
| ; GCN-O0-NEXT: v_readlane_b32 s5, v1, 1 |
| ; GCN-O0-NEXT: s_mov_b32 s2, 0xf000 |
| ; GCN-O0-NEXT: s_mov_b32 s0, 0 |
| ; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 |
| ; GCN-O0-NEXT: s_mov_b32 s1, s2 |
| ; GCN-O0-NEXT: ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7 |
| ; GCN-O0-NEXT: s_mov_b64 s[6:7], s[0:1] |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: v_ashrrev_i32_e64 v4, 31, v0 |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, v0 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v3, v4 |
| ; GCN-O0-NEXT: s_mov_b32 s0, 2 |
| ; GCN-O0-NEXT: v_lshl_b64 v[3:4], v[2:3], s0 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-O0-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64 |
| ; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[2:3], v0, s0 |
| ; GCN-O0-NEXT: s_mov_b64 s[0:1], exec |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s0, 4 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s1, 5 |
| ; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] |
| ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] |
| ; GCN-O0-NEXT: s_cbranch_execz .LBB1_4 |
| ; GCN-O0-NEXT: ; %bb.2: ; %bb.inner.then |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 0 |
| ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 1 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v0, 1 |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: v_add_i32_e64 v2, s[2:3], v2, v0 |
| ; GCN-O0-NEXT: v_ashrrev_i32_e64 v4, 31, v2 |
| ; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec |
| ; GCN-O0-NEXT: v_mov_b32_e32 v3, v4 |
| ; GCN-O0-NEXT: s_mov_b32 s2, 2 |
| ; GCN-O0-NEXT: v_lshl_b64 v[2:3], v[2:3], s2 |
| ; GCN-O0-NEXT: s_mov_b32 s2, 0xf000 |
| ; GCN-O0-NEXT: s_mov_b32 s4, 0 |
| ; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 |
| ; GCN-O0-NEXT: s_mov_b32 s5, s2 |
| ; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] |
| ; GCN-O0-NEXT: buffer_store_dword v0, v[2:3], s[0:3], 0 addr64 |
| ; GCN-O0-NEXT: s_branch .LBB1_4 |
| ; GCN-O0-NEXT: .LBB1_3: ; %Flow |
| ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 2 |
| ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 3 |
| ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] |
| ; GCN-O0-NEXT: s_branch .LBB1_5 |
| ; GCN-O0-NEXT: .LBB1_4: ; %bb.inner.end |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: v_readlane_b32 s2, v1, 4 |
| ; GCN-O0-NEXT: v_readlane_b32 s3, v1, 5 |
| ; GCN-O0-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 0 |
| ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 1 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v0, 2 |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: v_add_i32_e64 v2, s[2:3], v2, v0 |
| ; GCN-O0-NEXT: v_ashrrev_i32_e64 v4, 31, v2 |
| ; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec |
| ; GCN-O0-NEXT: v_mov_b32_e32 v3, v4 |
| ; GCN-O0-NEXT: v_lshl_b64 v[2:3], v[2:3], v0 |
| ; GCN-O0-NEXT: s_mov_b32 s2, 0xf000 |
| ; GCN-O0-NEXT: s_mov_b32 s4, 0 |
| ; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 |
| ; GCN-O0-NEXT: s_mov_b32 s5, s2 |
| ; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] |
| ; GCN-O0-NEXT: buffer_store_dword v0, v[2:3], s[0:3], 0 addr64 |
| ; GCN-O0-NEXT: s_branch .LBB1_3 |
| ; GCN-O0-NEXT: .LBB1_5: ; %bb.outer.end |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, 3 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v0, 0 |
| ; GCN-O0-NEXT: s_mov_b32 m0, -1 |
| ; GCN-O0-NEXT: ds_write_b32 v0, v2 |
| ; GCN-O0-NEXT: s_endpgm |
| bb: |
| %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() |
| %tmp1 = icmp ugt i32 %tmp, 1 |
| br i1 %tmp1, label %bb.outer.then, label %bb.outer.end |
| |
| bb.outer.then: ; preds = %bb |
| %tmp4 = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %tmp |
| store i32 0, ptr addrspace(1) %tmp4, align 4 |
| %tmp5 = icmp eq i32 %tmp, 2 |
| br i1 %tmp5, label %bb.inner.end, label %bb.inner.then |
| |
| bb.inner.then: ; preds = %bb.outer.then |
| %tmp7 = add i32 %tmp, 1 |
| %tmp8 = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %tmp7 |
| store i32 1, ptr addrspace(1) %tmp8, align 4 |
| br label %bb.inner.end |
| |
| bb.inner.end: ; preds = %bb.inner.then, %bb.outer.then |
| %tmp9 = add i32 %tmp, 2 |
| %tmp10 = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %tmp9 |
| store i32 2, ptr addrspace(1) %tmp10, align 4 |
| br label %bb.outer.end |
| |
| bb.outer.end: ; preds = %bb.inner.then, %bb |
| store i32 3, ptr addrspace(3) null |
| ret void |
| } |
| |
| define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) { |
| ; GCN-LABEL: nested_if_if_else: |
| ; GCN: ; %bb.0: ; %bb |
| ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_mov_b32 s3, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s2, 0 |
| ; GCN-NEXT: v_cmp_lt_u32_e32 vcc, 1, v0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_store_dword v2, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GCN-NEXT: s_cbranch_execz .LBB2_5 |
| ; GCN-NEXT: ; %bb.1: ; %bb.outer.then |
| ; GCN-NEXT: v_mov_b32_e32 v4, s1 |
| ; GCN-NEXT: v_add_i32_e32 v3, vcc, s0, v1 |
| ; GCN-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc |
| ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 2, v0 |
| ; GCN-NEXT: s_and_saveexec_b64 s[0:1], vcc |
| ; GCN-NEXT: s_xor_b64 s[0:1], exec, s[0:1] |
| ; GCN-NEXT: s_cbranch_execz .LBB2_3 |
| ; GCN-NEXT: ; %bb.2: ; %bb.else |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s4, s6 |
| ; GCN-NEXT: s_mov_b32 s5, s6 |
| ; GCN-NEXT: v_mov_b32_e32 v0, 2 |
| ; GCN-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64 offset:8 |
| ; GCN-NEXT: ; implicit-def: $vgpr3_vgpr4 |
| ; GCN-NEXT: .LBB2_3: ; %Flow |
| ; GCN-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] |
| ; GCN-NEXT: s_cbranch_execz .LBB2_5 |
| ; GCN-NEXT: ; %bb.4: ; %bb.then |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s4, s6 |
| ; GCN-NEXT: s_mov_b32 s5, s6 |
| ; GCN-NEXT: s_waitcnt expcnt(0) |
| ; GCN-NEXT: v_mov_b32_e32 v0, 1 |
| ; GCN-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64 offset:4 |
| ; GCN-NEXT: .LBB2_5: ; %bb.outer.end |
| ; GCN-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GCN-NEXT: s_waitcnt expcnt(0) |
| ; GCN-NEXT: v_mov_b32_e32 v0, 3 |
| ; GCN-NEXT: s_mov_b32 m0, -1 |
| ; GCN-NEXT: ds_write_b32 v2, v0 |
| ; GCN-NEXT: s_endpgm |
| ; |
| ; GCN-O0-LABEL: nested_if_if_else: |
| ; GCN-O0: ; %bb.0: ; %bb |
| ; GCN-O0-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 |
| ; GCN-O0-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 |
| ; GCN-O0-NEXT: s_mov_b32 s10, -1 |
| ; GCN-O0-NEXT: s_mov_b32 s11, 0xe8f000 |
| ; GCN-O0-NEXT: s_add_u32 s8, s8, s3 |
| ; GCN-O0-NEXT: s_addc_u32 s9, s9, 0 |
| ; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-O0-NEXT: s_mov_b64 s[2:3], s[0:1] |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s2, 0 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s3, 1 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, v0 |
| ; GCN-O0-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: s_mov_b32 s2, 0xf000 |
| ; GCN-O0-NEXT: s_mov_b32 s4, 0 |
| ; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 |
| ; GCN-O0-NEXT: s_mov_b32 s5, s2 |
| ; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] |
| ; GCN-O0-NEXT: s_mov_b32 s4, 0 |
| ; GCN-O0-NEXT: ; implicit-def: $sgpr4 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v4, 0 |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, v0 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v3, v4 |
| ; GCN-O0-NEXT: s_mov_b32 s4, 2 |
| ; GCN-O0-NEXT: v_lshl_b64 v[3:4], v[2:3], s4 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-O0-NEXT: buffer_store_dword v2, v[3:4], s[0:3], 0 addr64 |
| ; GCN-O0-NEXT: s_mov_b32 s0, 1 |
| ; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v0, s0 |
| ; GCN-O0-NEXT: s_mov_b64 s[0:1], exec |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s0, 2 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s1, 3 |
| ; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] |
| ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] |
| ; GCN-O0-NEXT: s_cbranch_execz .LBB2_6 |
| ; GCN-O0-NEXT: ; %bb.1: ; %bb.outer.then |
| ; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: s_mov_b32 s0, 2 |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[0:1], v0, s0 |
| ; GCN-O0-NEXT: s_mov_b64 s[2:3], exec |
| ; GCN-O0-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] |
| ; GCN-O0-NEXT: s_xor_b64 s[2:3], s[0:1], s[2:3] |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s2, 4 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s3, 5 |
| ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] |
| ; GCN-O0-NEXT: s_cbranch_execz .LBB2_2 |
| ; GCN-O0-NEXT: s_branch .LBB2_4 |
| ; GCN-O0-NEXT: .LBB2_2: ; %Flow |
| ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 4 |
| ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 5 |
| ; GCN-O0-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] |
| ; GCN-O0-NEXT: s_and_b64 s[0:1], exec, s[0:1] |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s0, 6 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s1, 7 |
| ; GCN-O0-NEXT: s_xor_b64 exec, exec, s[0:1] |
| ; GCN-O0-NEXT: s_cbranch_execz .LBB2_5 |
| ; GCN-O0-NEXT: ; %bb.3: ; %bb.then |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 0 |
| ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 1 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v0, 1 |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: v_add_i32_e64 v2, s[2:3], v2, v0 |
| ; GCN-O0-NEXT: v_ashrrev_i32_e64 v4, 31, v2 |
| ; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec |
| ; GCN-O0-NEXT: v_mov_b32_e32 v3, v4 |
| ; GCN-O0-NEXT: s_mov_b32 s2, 2 |
| ; GCN-O0-NEXT: v_lshl_b64 v[2:3], v[2:3], s2 |
| ; GCN-O0-NEXT: s_mov_b32 s2, 0xf000 |
| ; GCN-O0-NEXT: s_mov_b32 s4, 0 |
| ; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 |
| ; GCN-O0-NEXT: s_mov_b32 s5, s2 |
| ; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] |
| ; GCN-O0-NEXT: buffer_store_dword v0, v[2:3], s[0:3], 0 addr64 |
| ; GCN-O0-NEXT: s_branch .LBB2_5 |
| ; GCN-O0-NEXT: .LBB2_4: ; %bb.else |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 0 |
| ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 1 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v0, 2 |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: v_add_i32_e64 v2, s[2:3], v2, v0 |
| ; GCN-O0-NEXT: v_ashrrev_i32_e64 v4, 31, v2 |
| ; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec |
| ; GCN-O0-NEXT: v_mov_b32_e32 v3, v4 |
| ; GCN-O0-NEXT: v_lshl_b64 v[2:3], v[2:3], v0 |
| ; GCN-O0-NEXT: s_mov_b32 s2, 0xf000 |
| ; GCN-O0-NEXT: s_mov_b32 s4, 0 |
| ; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 |
| ; GCN-O0-NEXT: s_mov_b32 s5, s2 |
| ; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] |
| ; GCN-O0-NEXT: buffer_store_dword v0, v[2:3], s[0:3], 0 addr64 |
| ; GCN-O0-NEXT: s_branch .LBB2_2 |
| ; GCN-O0-NEXT: .LBB2_5: ; %Flow1 |
| ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 6 |
| ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 7 |
| ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] |
| ; GCN-O0-NEXT: .LBB2_6: ; %bb.outer.end |
| ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 2 |
| ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 3 |
| ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, 3 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v0, 0 |
| ; GCN-O0-NEXT: s_mov_b32 m0, -1 |
| ; GCN-O0-NEXT: ds_write_b32 v0, v2 |
| ; GCN-O0-NEXT: s_endpgm |
| bb: |
| %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() |
| %tmp1 = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %tmp |
| store i32 0, ptr addrspace(1) %tmp1, align 4 |
| %tmp2 = icmp ugt i32 %tmp, 1 |
| br i1 %tmp2, label %bb.outer.then, label %bb.outer.end |
| |
| bb.outer.then: ; preds = %bb |
| %tmp5 = icmp eq i32 %tmp, 2 |
| br i1 %tmp5, label %bb.then, label %bb.else |
| |
| bb.then: ; preds = %bb.outer.then |
| %tmp3 = add i32 %tmp, 1 |
| %tmp4 = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %tmp3 |
| store i32 1, ptr addrspace(1) %tmp4, align 4 |
| br label %bb.outer.end |
| |
| bb.else: ; preds = %bb.outer.then |
| %tmp7 = add i32 %tmp, 2 |
| %tmp9 = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %tmp7 |
| store i32 2, ptr addrspace(1) %tmp9, align 4 |
| br label %bb.outer.end |
| |
| bb.outer.end: ; preds = %bb, %bb.then, %bb.else |
| store i32 3, ptr addrspace(3) null |
| ret void |
| } |
| |
| define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) { |
| ; GCN-LABEL: nested_if_else_if: |
| ; GCN: ; %bb.0: ; %bb |
| ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v3, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v4, 0 |
| ; GCN-NEXT: s_mov_b32 s3, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: v_mov_b32_e32 v2, s1 |
| ; GCN-NEXT: v_add_i32_e32 v1, vcc, s0, v3 |
| ; GCN-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc |
| ; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 2, v0 |
| ; GCN-NEXT: buffer_store_dword v4, v[3:4], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_and_saveexec_b64 s[0:1], vcc |
| ; GCN-NEXT: s_xor_b64 s[4:5], exec, s[0:1] |
| ; GCN-NEXT: s_cbranch_execz .LBB3_4 |
| ; GCN-NEXT: ; %bb.1: ; %bb.outer.else |
| ; GCN-NEXT: s_mov_b32 s0, s2 |
| ; GCN-NEXT: s_mov_b32 s1, s2 |
| ; GCN-NEXT: v_mov_b32_e32 v3, 3 |
| ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| ; GCN-NEXT: buffer_store_dword v3, v[1:2], s[0:3], 0 addr64 offset:12 |
| ; GCN-NEXT: s_and_saveexec_b64 s[0:1], vcc |
| ; GCN-NEXT: s_cbranch_execz .LBB3_3 |
| ; GCN-NEXT: ; %bb.2: ; %bb.inner.then2 |
| ; GCN-NEXT: s_mov_b32 s10, 0 |
| ; GCN-NEXT: s_mov_b32 s11, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s8, s10 |
| ; GCN-NEXT: s_mov_b32 s9, s10 |
| ; GCN-NEXT: v_mov_b32_e32 v0, 4 |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[8:11], 0 addr64 offset:16 |
| ; GCN-NEXT: .LBB3_3: ; %Flow |
| ; GCN-NEXT: s_or_b64 exec, exec, s[0:1] |
| ; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2 |
| ; GCN-NEXT: ; implicit-def: $vgpr0 |
| ; GCN-NEXT: .LBB3_4: ; %Flow2 |
| ; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] |
| ; GCN-NEXT: s_cbranch_execz .LBB3_8 |
| ; GCN-NEXT: ; %bb.5: ; %bb.outer.then |
| ; GCN-NEXT: s_mov_b32 s0, s2 |
| ; GCN-NEXT: s_mov_b32 s1, s2 |
| ; GCN-NEXT: s_waitcnt expcnt(0) |
| ; GCN-NEXT: v_mov_b32_e32 v3, 1 |
| ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 |
| ; GCN-NEXT: buffer_store_dword v3, v[1:2], s[0:3], 0 addr64 offset:4 |
| ; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc |
| ; GCN-NEXT: s_cbranch_execz .LBB3_7 |
| ; GCN-NEXT: ; %bb.6: ; %bb.inner.then |
| ; GCN-NEXT: v_mov_b32_e32 v0, 2 |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 offset:8 |
| ; GCN-NEXT: .LBB3_7: ; %Flow1 |
| ; GCN-NEXT: s_or_b64 exec, exec, s[6:7] |
| ; GCN-NEXT: .LBB3_8: ; %bb.outer.end |
| ; GCN-NEXT: s_or_b64 exec, exec, s[4:5] |
| ; GCN-NEXT: s_waitcnt expcnt(0) |
| ; GCN-NEXT: v_mov_b32_e32 v0, 3 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_mov_b32 m0, -1 |
| ; GCN-NEXT: ds_write_b32 v1, v0 |
| ; GCN-NEXT: s_endpgm |
| ; |
| ; GCN-O0-LABEL: nested_if_else_if: |
| ; GCN-O0: ; %bb.0: ; %bb |
| ; GCN-O0-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 |
| ; GCN-O0-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 |
| ; GCN-O0-NEXT: s_mov_b32 s10, -1 |
| ; GCN-O0-NEXT: s_mov_b32 s11, 0xe8f000 |
| ; GCN-O0-NEXT: s_add_u32 s8, s8, s3 |
| ; GCN-O0-NEXT: s_addc_u32 s9, s9, 0 |
| ; GCN-O0-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, v0 |
| ; GCN-O0-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:12 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: s_mov_b32 s0, 0 |
| ; GCN-O0-NEXT: ; implicit-def: $sgpr0 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v4, 0 |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, v0 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v3, v4 |
| ; GCN-O0-NEXT: s_mov_b32 s0, 2 |
| ; GCN-O0-NEXT: s_mov_b32 s1, s0 |
| ; GCN-O0-NEXT: v_lshl_b64 v[3:4], v[2:3], s1 |
| ; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-O0-NEXT: s_mov_b32 s2, s4 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, v3 |
| ; GCN-O0-NEXT: s_mov_b32 s1, s5 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v6, v4 |
| ; GCN-O0-NEXT: v_add_i32_e64 v5, s[2:3], s2, v2 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, s1 |
| ; GCN-O0-NEXT: v_addc_u32_e64 v2, s[2:3], v2, v6, s[2:3] |
| ; GCN-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec |
| ; GCN-O0-NEXT: v_mov_b32_e32 v6, v2 |
| ; GCN-O0-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:8 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: s_mov_b32 s1, 0xf000 |
| ; GCN-O0-NEXT: s_mov_b32 s2, 0 |
| ; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 |
| ; GCN-O0-NEXT: s_mov_b32 s3, s1 |
| ; GCN-O0-NEXT: ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7 |
| ; GCN-O0-NEXT: s_mov_b64 s[6:7], s[2:3] |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-O0-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64 |
| ; GCN-O0-NEXT: v_cmp_lt_u32_e64 s[0:1], v0, s0 |
| ; GCN-O0-NEXT: s_mov_b64 s[2:3], exec |
| ; GCN-O0-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] |
| ; GCN-O0-NEXT: s_xor_b64 s[2:3], s[0:1], s[2:3] |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s2, 0 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s3, 1 |
| ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] |
| ; GCN-O0-NEXT: s_cbranch_execz .LBB3_1 |
| ; GCN-O0-NEXT: s_branch .LBB3_4 |
| ; GCN-O0-NEXT: .LBB3_1: ; %Flow2 |
| ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 0 |
| ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 1 |
| ; GCN-O0-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] |
| ; GCN-O0-NEXT: s_and_b64 s[0:1], exec, s[0:1] |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s0, 2 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s1, 3 |
| ; GCN-O0-NEXT: s_xor_b64 exec, exec, s[0:1] |
| ; GCN-O0-NEXT: s_cbranch_execz .LBB3_8 |
| ; GCN-O0-NEXT: ; %bb.2: ; %bb.outer.then |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:12 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: buffer_load_dword v4, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: s_mov_b32 s0, 0xf000 |
| ; GCN-O0-NEXT: s_mov_b32 s2, 0 |
| ; GCN-O0-NEXT: s_mov_b32 s4, s2 |
| ; GCN-O0-NEXT: s_mov_b32 s5, s0 |
| ; GCN-O0-NEXT: s_mov_b32 s0, s2 |
| ; GCN-O0-NEXT: s_mov_b32 s1, s2 |
| ; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, 1 |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: buffer_store_dword v2, v[3:4], s[0:3], 0 addr64 offset:4 |
| ; GCN-O0-NEXT: s_mov_b32 s0, 2 |
| ; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v0, s0 |
| ; GCN-O0-NEXT: s_mov_b64 s[0:1], exec |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s0, 4 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s1, 5 |
| ; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] |
| ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] |
| ; GCN-O0-NEXT: s_cbranch_execz .LBB3_7 |
| ; GCN-O0-NEXT: ; %bb.3: ; %bb.inner.then |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: s_mov_b32 s0, 0xf000 |
| ; GCN-O0-NEXT: s_mov_b32 s2, 0 |
| ; GCN-O0-NEXT: s_mov_b32 s4, s2 |
| ; GCN-O0-NEXT: s_mov_b32 s5, s0 |
| ; GCN-O0-NEXT: s_mov_b32 s0, s2 |
| ; GCN-O0-NEXT: s_mov_b32 s1, s2 |
| ; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] |
| ; GCN-O0-NEXT: v_mov_b32_e32 v0, 2 |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: buffer_store_dword v0, v[2:3], s[0:3], 0 addr64 offset:8 |
| ; GCN-O0-NEXT: s_branch .LBB3_7 |
| ; GCN-O0-NEXT: .LBB3_4: ; %bb.outer.else |
| ; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:12 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: buffer_load_dword v4, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: s_mov_b32 s1, 0xf000 |
| ; GCN-O0-NEXT: s_mov_b32 s0, 0 |
| ; GCN-O0-NEXT: s_mov_b32 s2, s0 |
| ; GCN-O0-NEXT: s_mov_b32 s3, s1 |
| ; GCN-O0-NEXT: s_mov_b32 s4, s0 |
| ; GCN-O0-NEXT: s_mov_b32 s5, s0 |
| ; GCN-O0-NEXT: ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7 |
| ; GCN-O0-NEXT: s_mov_b64 s[6:7], s[2:3] |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, 3 |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64 offset:12 |
| ; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v0, s0 |
| ; GCN-O0-NEXT: s_mov_b64 s[0:1], exec |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s0, 6 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s1, 7 |
| ; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] |
| ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] |
| ; GCN-O0-NEXT: s_cbranch_execz .LBB3_6 |
| ; GCN-O0-NEXT: ; %bb.5: ; %bb.inner.then2 |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: s_mov_b32 s0, 0xf000 |
| ; GCN-O0-NEXT: s_mov_b32 s2, 0 |
| ; GCN-O0-NEXT: s_mov_b32 s4, s2 |
| ; GCN-O0-NEXT: s_mov_b32 s5, s0 |
| ; GCN-O0-NEXT: s_mov_b32 s0, s2 |
| ; GCN-O0-NEXT: s_mov_b32 s1, s2 |
| ; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] |
| ; GCN-O0-NEXT: v_mov_b32_e32 v0, 4 |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: buffer_store_dword v0, v[2:3], s[0:3], 0 addr64 offset:16 |
| ; GCN-O0-NEXT: .LBB3_6: ; %Flow |
| ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 6 |
| ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 7 |
| ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] |
| ; GCN-O0-NEXT: s_branch .LBB3_1 |
| ; GCN-O0-NEXT: .LBB3_7: ; %Flow1 |
| ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 4 |
| ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 5 |
| ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] |
| ; GCN-O0-NEXT: .LBB3_8: ; %bb.outer.end |
| ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 2 |
| ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 3 |
| ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, 3 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v0, 0 |
| ; GCN-O0-NEXT: s_mov_b32 m0, -1 |
| ; GCN-O0-NEXT: ds_write_b32 v0, v2 |
| ; GCN-O0-NEXT: s_endpgm |
| bb: |
| %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() |
| %tmp1 = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %tmp |
| store i32 0, ptr addrspace(1) %tmp1, align 4 |
| %cc1 = icmp ugt i32 %tmp, 1 |
| br i1 %cc1, label %bb.outer.then, label %bb.outer.else |
| |
| bb.outer.then: |
| %tmp2 = getelementptr inbounds i32, ptr addrspace(1) %tmp1, i32 1 |
| store i32 1, ptr addrspace(1) %tmp2, align 4 |
| %cc2 = icmp eq i32 %tmp, 2 |
| br i1 %cc2, label %bb.inner.then, label %bb.outer.end |
| |
| bb.inner.then: |
| %tmp3 = getelementptr inbounds i32, ptr addrspace(1) %tmp1, i32 2 |
| store i32 2, ptr addrspace(1) %tmp3, align 4 |
| br label %bb.outer.end |
| |
| bb.outer.else: |
| %tmp4 = getelementptr inbounds i32, ptr addrspace(1) %tmp1, i32 3 |
| store i32 3, ptr addrspace(1) %tmp4, align 4 |
| %cc3 = icmp eq i32 %tmp, 0 ; avoid being optimized away through the domination |
| br i1 %cc3, label %bb.inner.then2, label %bb.outer.end |
| |
| bb.inner.then2: |
| %tmp5 = getelementptr inbounds i32, ptr addrspace(1) %tmp1, i32 4 |
| store i32 4, ptr addrspace(1) %tmp5, align 4 |
| br label %bb.outer.end |
| |
| bb.outer.end: |
| store i32 3, ptr addrspace(3) null |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_endpgm_unsafe_barrier(ptr addrspace(1) nocapture %arg) { |
| ; GCN-LABEL: s_endpgm_unsafe_barrier: |
| ; GCN: ; %bb.0: ; %bb |
| ; GCN-NEXT: v_cmp_lt_u32_e32 vcc, 1, v0 |
| ; GCN-NEXT: s_and_saveexec_b64 s[2:3], vcc |
| ; GCN-NEXT: s_cbranch_execz .LBB4_2 |
| ; GCN-NEXT: ; %bb.1: ; %bb.then |
| ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_store_dword v1, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: .LBB4_2: ; %bb.end |
| ; GCN-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GCN-NEXT: s_barrier |
| ; GCN-NEXT: s_endpgm |
| ; |
| ; GCN-O0-LABEL: s_endpgm_unsafe_barrier: |
| ; GCN-O0: ; %bb.0: ; %bb |
| ; GCN-O0-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 |
| ; GCN-O0-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 |
| ; GCN-O0-NEXT: s_mov_b32 s10, -1 |
| ; GCN-O0-NEXT: s_mov_b32 s11, 0xe8f000 |
| ; GCN-O0-NEXT: s_add_u32 s8, s8, s3 |
| ; GCN-O0-NEXT: s_addc_u32 s9, s9, 0 |
| ; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s0, 0 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s1, 1 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, v0 |
| ; GCN-O0-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: s_mov_b32 s0, 1 |
| ; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v0, s0 |
| ; GCN-O0-NEXT: s_mov_b64 s[0:1], exec |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s0, 2 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s1, 3 |
| ; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] |
| ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] |
| ; GCN-O0-NEXT: s_cbranch_execz .LBB4_2 |
| ; GCN-O0-NEXT: ; %bb.1: ; %bb.then |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 0 |
| ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 1 |
| ; GCN-O0-NEXT: s_mov_b32 s2, 0xf000 |
| ; GCN-O0-NEXT: s_mov_b32 s4, 0 |
| ; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 |
| ; GCN-O0-NEXT: s_mov_b32 s5, s2 |
| ; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: v_ashrrev_i32_e64 v0, 31, v2 |
| ; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec |
| ; GCN-O0-NEXT: v_mov_b32_e32 v3, v0 |
| ; GCN-O0-NEXT: s_mov_b32 s4, 2 |
| ; GCN-O0-NEXT: v_lshl_b64 v[2:3], v[2:3], s4 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v0, 0 |
| ; GCN-O0-NEXT: buffer_store_dword v0, v[2:3], s[0:3], 0 addr64 |
| ; GCN-O0-NEXT: .LBB4_2: ; %bb.end |
| ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 2 |
| ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 3 |
| ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GCN-O0-NEXT: s_barrier |
| ; GCN-O0-NEXT: s_endpgm |
| bb: |
| %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() |
| %tmp1 = icmp ugt i32 %tmp, 1 |
| br i1 %tmp1, label %bb.then, label %bb.end |
| |
| bb.then: ; preds = %bb |
| %tmp4 = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %tmp |
| store i32 0, ptr addrspace(1) %tmp4, align 4 |
| br label %bb.end |
| |
| bb.end: ; preds = %bb.then, %bb |
| call void @llvm.amdgcn.s.barrier() |
| ret void |
| } |
| |
| define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { |
| ; GCN-LABEL: scc_liveness: |
| ; GCN: ; %bb.0: ; %bb |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: s_movk_i32 s4, 0x207 |
| ; GCN-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0 |
| ; GCN-NEXT: s_mov_b32 s8, 0 |
| ; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 |
| ; GCN-NEXT: s_mov_b64 s[12:13], 0 |
| ; GCN-NEXT: s_mov_b64 s[6:7], 0 |
| ; GCN-NEXT: s_branch .LBB5_3 |
| ; GCN-NEXT: .LBB5_1: ; %Flow |
| ; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1 |
| ; GCN-NEXT: s_or_b64 exec, exec, s[10:11] |
| ; GCN-NEXT: .LBB5_2: ; %bb10 |
| ; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1 |
| ; GCN-NEXT: s_or_b64 exec, exec, s[14:15] |
| ; GCN-NEXT: s_and_b64 s[6:7], exec, s[4:5] |
| ; GCN-NEXT: s_or_b64 s[12:13], s[6:7], s[12:13] |
| ; GCN-NEXT: s_mov_b64 s[6:7], 0 |
| ; GCN-NEXT: s_andn2_b64 exec, exec, s[12:13] |
| ; GCN-NEXT: s_cbranch_execz .LBB5_7 |
| ; GCN-NEXT: .LBB5_3: ; %bb1 |
| ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GCN-NEXT: s_and_b64 s[10:11], exec, vcc |
| ; GCN-NEXT: s_or_b64 s[6:7], s[10:11], s[6:7] |
| ; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7] |
| ; GCN-NEXT: s_cbranch_execnz .LBB5_3 |
| ; GCN-NEXT: ; %bb.4: ; %bb2 |
| ; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1 |
| ; GCN-NEXT: s_or_b64 exec, exec, s[6:7] |
| ; GCN-NEXT: s_mov_b32 s9, s8 |
| ; GCN-NEXT: s_mov_b32 s10, s8 |
| ; GCN-NEXT: s_mov_b32 s11, s8 |
| ; GCN-NEXT: v_mov_b32_e32 v0, s8 |
| ; GCN-NEXT: v_mov_b32_e32 v1, s9 |
| ; GCN-NEXT: v_mov_b32_e32 v2, s10 |
| ; GCN-NEXT: v_mov_b32_e32 v3, s11 |
| ; GCN-NEXT: s_and_saveexec_b64 s[14:15], s[4:5] |
| ; GCN-NEXT: s_cbranch_execz .LBB5_2 |
| ; GCN-NEXT: ; %bb.5: ; %bb4 |
| ; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1 |
| ; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_gt_f32_e64 s[6:7], 0, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v0, s8 |
| ; GCN-NEXT: v_mov_b32_e32 v1, s9 |
| ; GCN-NEXT: v_mov_b32_e32 v2, s10 |
| ; GCN-NEXT: v_mov_b32_e32 v3, s11 |
| ; GCN-NEXT: s_and_saveexec_b64 s[10:11], s[6:7] |
| ; GCN-NEXT: s_cbranch_execz .LBB5_1 |
| ; GCN-NEXT: ; %bb.6: ; %bb8 |
| ; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1 |
| ; GCN-NEXT: s_mov_b32 s9, s8 |
| ; GCN-NEXT: v_mov_b32_e32 v0, s8 |
| ; GCN-NEXT: v_mov_b32_e32 v1, s9 |
| ; GCN-NEXT: v_mov_b32_e32 v2, s10 |
| ; GCN-NEXT: v_mov_b32_e32 v3, s11 |
| ; GCN-NEXT: s_branch .LBB5_1 |
| ; GCN-NEXT: .LBB5_7: ; %bb12 |
| ; GCN-NEXT: s_or_b64 exec, exec, s[12:13] |
| ; GCN-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GCN-O0-LABEL: scc_liveness: |
| ; GCN-O0: ; %bb.0: ; %bb |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] |
| ; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: s_mov_b64 s[4:5], 0 |
| ; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] |
| ; GCN-O0-NEXT: s_waitcnt expcnt(1) |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s6, 0 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s7, 1 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s4, 2 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s5, 3 |
| ; GCN-O0-NEXT: .LBB5_1: ; %bb1 |
| ; GCN-O0-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: v_readlane_b32 s8, v1, 2 |
| ; GCN-O0-NEXT: v_readlane_b32 s9, v1, 3 |
| ; GCN-O0-NEXT: v_readlane_b32 s6, v1, 0 |
| ; GCN-O0-NEXT: v_readlane_b32 s7, v1, 1 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s6, 4 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s7, 5 |
| ; GCN-O0-NEXT: s_mov_b32 s4, 0x207 |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: v_cmp_lt_i32_e64 s[4:5], v0, s4 |
| ; GCN-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s4, 6 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s5, 7 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s6, 0 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s7, 1 |
| ; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s6, 2 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s7, 3 |
| ; GCN-O0-NEXT: s_andn2_b64 exec, exec, s[4:5] |
| ; GCN-O0-NEXT: s_cbranch_execnz .LBB5_1 |
| ; GCN-O0-NEXT: ; %bb.2: ; %bb2 |
| ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 |
| ; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: v_readlane_b32 s4, v1, 6 |
| ; GCN-O0-NEXT: v_readlane_b32 s5, v1, 7 |
| ; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5] |
| ; GCN-O0-NEXT: s_mov_b32 s6, 0 |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[4:5], v0, s6 |
| ; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v0, s6 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s4, 8 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s5, 9 |
| ; GCN-O0-NEXT: s_mov_b32 s4, 0 |
| ; GCN-O0-NEXT: s_mov_b32 s8, s4 |
| ; GCN-O0-NEXT: s_mov_b32 s9, s4 |
| ; GCN-O0-NEXT: s_mov_b32 s10, s4 |
| ; GCN-O0-NEXT: s_mov_b32 s11, s4 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, s8 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v3, s9 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v4, s10 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v5, s11 |
| ; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: s_mov_b64 s[4:5], exec |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s4, 10 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s5, 11 |
| ; GCN-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] |
| ; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] |
| ; GCN-O0-NEXT: s_cbranch_execz .LBB5_5 |
| ; GCN-O0-NEXT: ; %bb.3: ; %bb4 |
| ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 |
| ; GCN-O0-NEXT: ; implicit-def: $sgpr4 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 |
| ; GCN-O0-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GCN-O0-NEXT: s_mov_b32 s4, 0 |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: v_cmp_lt_f32_e64 s[6:7], v0, s4 |
| ; GCN-O0-NEXT: s_mov_b32 s8, s4 |
| ; GCN-O0-NEXT: s_mov_b32 s9, s4 |
| ; GCN-O0-NEXT: s_mov_b32 s10, s4 |
| ; GCN-O0-NEXT: s_mov_b32 s11, s4 |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, s8 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v3, s9 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v4, s10 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v5, s11 |
| ; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: s_mov_b64 s[4:5], exec |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s4, 12 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s5, 13 |
| ; GCN-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] |
| ; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] |
| ; GCN-O0-NEXT: s_cbranch_execz .LBB5_6 |
| ; GCN-O0-NEXT: ; %bb.4: ; %bb8 |
| ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 |
| ; GCN-O0-NEXT: s_mov_b32 s10, 0 |
| ; GCN-O0-NEXT: ; implicit-def: $sgpr4 |
| ; GCN-O0-NEXT: ; implicit-def: $sgpr5 |
| ; GCN-O0-NEXT: ; implicit-def: $sgpr9 |
| ; GCN-O0-NEXT: ; implicit-def: $sgpr5 |
| ; GCN-O0-NEXT: ; implicit-def: $sgpr8 |
| ; GCN-O0-NEXT: ; implicit-def: $sgpr5 |
| ; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 |
| ; GCN-O0-NEXT: s_mov_b32 s5, s10 |
| ; GCN-O0-NEXT: s_mov_b32 s6, s9 |
| ; GCN-O0-NEXT: s_mov_b32 s7, s8 |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, s4 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v3, s5 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v4, s6 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v5, s7 |
| ; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: s_branch .LBB5_6 |
| ; GCN-O0-NEXT: .LBB5_5: ; %Flow2 |
| ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: v_readlane_b32 s4, v1, 10 |
| ; GCN-O0-NEXT: v_readlane_b32 s5, v1, 11 |
| ; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5] |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: s_branch .LBB5_7 |
| ; GCN-O0-NEXT: .LBB5_6: ; %Flow |
| ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: v_readlane_b32 s4, v1, 12 |
| ; GCN-O0-NEXT: v_readlane_b32 s5, v1, 13 |
| ; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5] |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: s_branch .LBB5_5 |
| ; GCN-O0-NEXT: .LBB5_7: ; %bb10 |
| ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 |
| ; GCN-O0-NEXT: v_readlane_b32 s6, v1, 8 |
| ; GCN-O0-NEXT: v_readlane_b32 s7, v1, 9 |
| ; GCN-O0-NEXT: s_mov_b64 s[4:5], -1 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s4, 14 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s5, 15 |
| ; GCN-O0-NEXT: s_mov_b64 s[4:5], exec |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s4, 16 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s5, 17 |
| ; GCN-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] |
| ; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] |
| ; GCN-O0-NEXT: s_cbranch_execz .LBB5_9 |
| ; GCN-O0-NEXT: ; %bb.8: ; %Flow1 |
| ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 |
| ; GCN-O0-NEXT: s_mov_b64 s[4:5], 0 |
| ; GCN-O0-NEXT: s_xor_b64 s[4:5], exec, -1 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s4, 14 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s5, 15 |
| ; GCN-O0-NEXT: .LBB5_9: ; %Flow3 |
| ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: v_readlane_b32 s8, v1, 16 |
| ; GCN-O0-NEXT: v_readlane_b32 s9, v1, 17 |
| ; GCN-O0-NEXT: s_or_b64 exec, exec, s[8:9] |
| ; GCN-O0-NEXT: v_readlane_b32 s6, v1, 4 |
| ; GCN-O0-NEXT: v_readlane_b32 s7, v1, 5 |
| ; GCN-O0-NEXT: v_readlane_b32 s4, v1, 14 |
| ; GCN-O0-NEXT: v_readlane_b32 s5, v1, 15 |
| ; GCN-O0-NEXT: s_and_b64 s[4:5], exec, s[4:5] |
| ; GCN-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] |
| ; GCN-O0-NEXT: s_mov_b64 s[6:7], 0 |
| ; GCN-O0-NEXT: s_mov_b64 s[8:9], s[4:5] |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s8, 0 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s9, 1 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s6, 2 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s7, 3 |
| ; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s6, 18 |
| ; GCN-O0-NEXT: v_writelane_b32 v1, s7, 19 |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill |
| ; GCN-O0-NEXT: s_andn2_b64 exec, exec, s[4:5] |
| ; GCN-O0-NEXT: s_cbranch_execnz .LBB5_1 |
| ; GCN-O0-NEXT: ; %bb.10: ; %bb12 |
| ; GCN-O0-NEXT: v_readlane_b32 s4, v1, 18 |
| ; GCN-O0-NEXT: v_readlane_b32 s5, v1, 19 |
| ; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5] |
| ; GCN-O0-NEXT: ; %bb.11: ; %bb12 |
| ; GCN-O0-NEXT: s_waitcnt expcnt(0) |
| ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: v_mov_b32_e32 v0, v5 |
| ; GCN-O0-NEXT: ; implicit-def: $sgpr4 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v6, s4 |
| ; GCN-O0-NEXT: buffer_store_dword v0, v6, s[0:3], 0 offen |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GCN-O0-NEXT: v_mov_b32_e32 v0, v4 |
| ; GCN-O0-NEXT: ; implicit-def: $sgpr4 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v6, s4 |
| ; GCN-O0-NEXT: buffer_store_dword v0, v6, s[0:3], 0 offen |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GCN-O0-NEXT: v_mov_b32_e32 v0, v3 |
| ; GCN-O0-NEXT: ; implicit-def: $sgpr4 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v6, s4 |
| ; GCN-O0-NEXT: buffer_store_dword v0, v6, s[0:3], 0 offen |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GCN-O0-NEXT: v_mov_b32_e32 v0, v2 |
| ; GCN-O0-NEXT: ; implicit-def: $sgpr4 |
| ; GCN-O0-NEXT: v_mov_b32_e32 v2, s4 |
| ; GCN-O0-NEXT: buffer_store_dword v0, v2, s[0:3], 0 offen |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload |
| ; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] |
| ; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GCN-O0-NEXT: s_setpc_b64 s[30:31] |
| bb: |
| br label %bb1 |
| |
| bb1: ; preds = %Flow1, %bb1, %bb |
| %tmp = icmp slt i32 %arg, 519 |
| br i1 %tmp, label %bb2, label %bb1 |
| |
| bb2: ; preds = %bb1 |
| %tmp3 = icmp eq i32 %arg, 0 |
| br i1 %tmp3, label %bb4, label %bb10 |
| |
| bb4: ; preds = %bb2 |
| %tmp6 = load float, ptr addrspace(5) undef |
| %tmp7 = fcmp olt float %tmp6, 0.0 |
| br i1 %tmp7, label %bb8, label %Flow |
| |
| bb8: ; preds = %bb4 |
| %tmp9 = insertelement <4 x float> undef, float 0.0, i32 1 |
| br label %Flow |
| |
| Flow: ; preds = %bb8, %bb4 |
| %tmp8 = phi <4 x float> [ %tmp9, %bb8 ], [ zeroinitializer, %bb4 ] |
| br label %bb10 |
| |
| bb10: ; preds = %Flow, %bb2 |
| %tmp11 = phi <4 x float> [ zeroinitializer, %bb2 ], [ %tmp8, %Flow ] |
| br i1 %tmp3, label %bb12, label %Flow1 |
| |
| Flow1: ; preds = %bb10 |
| br label %bb1 |
| |
| bb12: ; preds = %bb10 |
| store volatile <4 x float> %tmp11, ptr addrspace(5) undef, align 16 |
| ret void |
| } |
| |
| declare i32 @llvm.amdgcn.workitem.id.x() #0 |
| declare void @llvm.amdgcn.s.barrier() #1 |
| |
| attributes #0 = { nounwind readnone speculatable } |
| attributes #1 = { nounwind convergent } |
| attributes #2 = { nounwind } |