| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -global-isel=1 -O2 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefix=GISEL12 %s |
| ; RUN: llc -global-isel=0 -O2 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefix=DAGISEL12 %s |
| ; RUN: llc -global-isel=1 -O2 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck --check-prefix=GISEL10 %s |
| ; RUN: llc -global-isel=0 -O2 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck --check-prefix=DAGISEL10 %s |
| |
| define amdgpu_cs_chain void @basic(<3 x i32> inreg %sgpr, ptr inreg %callee, i32 inreg %exec, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 %x, i32 %y) { |
| ; GISEL12-LABEL: basic: |
| ; GISEL12: ; %bb.0: ; %entry |
| ; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GISEL12-NEXT: s_wait_expcnt 0x0 |
| ; GISEL12-NEXT: s_wait_samplecnt 0x0 |
| ; GISEL12-NEXT: s_wait_bvhcnt 0x0 |
| ; GISEL12-NEXT: s_wait_kmcnt 0x0 |
| ; GISEL12-NEXT: s_or_saveexec_b32 s8, -1 |
| ; GISEL12-NEXT: s_mov_b32 s6, s3 |
| ; GISEL12-NEXT: s_mov_b32 s7, s4 |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: s_and_saveexec_b32 s3, s8 |
| ; GISEL12-NEXT: ; %bb.1: ; %shader |
| ; GISEL12-NEXT: v_add_nc_u32_e32 v12, 42, v12 |
| ; GISEL12-NEXT: v_add_nc_u32_e32 v8, 5, v8 |
| ; GISEL12-NEXT: ; %bb.2: ; %tail |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_2) |
| ; GISEL12-NEXT: v_add_nc_u32_e32 v11, 32, v12 |
| ; GISEL12-NEXT: s_mov_b32 exec_lo, s5 |
| ; GISEL12-NEXT: s_setpc_b64 s[6:7] |
| ; |
| ; DAGISEL12-LABEL: basic: |
| ; DAGISEL12: ; %bb.0: ; %entry |
| ; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_expcnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_samplecnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_bvhcnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_kmcnt 0x0 |
| ; DAGISEL12-NEXT: s_or_saveexec_b32 s8, -1 |
| ; DAGISEL12-NEXT: s_mov_b32 s7, s4 |
| ; DAGISEL12-NEXT: s_mov_b32 s6, s3 |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: s_and_saveexec_b32 s3, s8 |
| ; DAGISEL12-NEXT: ; %bb.1: ; %shader |
| ; DAGISEL12-NEXT: v_add_nc_u32_e32 v12, 42, v12 |
| ; DAGISEL12-NEXT: v_add_nc_u32_e32 v8, 5, v8 |
| ; DAGISEL12-NEXT: ; %bb.2: ; %tail |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; DAGISEL12-NEXT: s_delay_alu instid0(VALU_DEP_2) |
| ; DAGISEL12-NEXT: v_add_nc_u32_e32 v11, 32, v12 |
| ; DAGISEL12-NEXT: s_mov_b32 exec_lo, s5 |
| ; DAGISEL12-NEXT: s_setpc_b64 s[6:7] |
| ; |
| ; GISEL10-LABEL: basic: |
| ; GISEL10: ; %bb.0: ; %entry |
| ; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL10-NEXT: s_or_saveexec_b32 s8, -1 |
| ; GISEL10-NEXT: s_mov_b32 s6, s3 |
| ; GISEL10-NEXT: s_mov_b32 s7, s4 |
| ; GISEL10-NEXT: s_and_saveexec_b32 s3, s8 |
| ; GISEL10-NEXT: ; %bb.1: ; %shader |
| ; GISEL10-NEXT: v_add_nc_u32_e32 v12, 42, v12 |
| ; GISEL10-NEXT: v_add_nc_u32_e32 v8, 5, v8 |
| ; GISEL10-NEXT: ; %bb.2: ; %tail |
| ; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; GISEL10-NEXT: v_add_nc_u32_e32 v11, 32, v12 |
| ; GISEL10-NEXT: s_mov_b32 exec_lo, s5 |
| ; GISEL10-NEXT: s_setpc_b64 s[6:7] |
| ; |
| ; DAGISEL10-LABEL: basic: |
| ; DAGISEL10: ; %bb.0: ; %entry |
| ; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL10-NEXT: s_or_saveexec_b32 s8, -1 |
| ; DAGISEL10-NEXT: s_mov_b32 s7, s4 |
| ; DAGISEL10-NEXT: s_mov_b32 s6, s3 |
| ; DAGISEL10-NEXT: s_and_saveexec_b32 s3, s8 |
| ; DAGISEL10-NEXT: ; %bb.1: ; %shader |
| ; DAGISEL10-NEXT: v_add_nc_u32_e32 v12, 42, v12 |
| ; DAGISEL10-NEXT: v_add_nc_u32_e32 v8, 5, v8 |
| ; DAGISEL10-NEXT: ; %bb.2: ; %tail |
| ; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; DAGISEL10-NEXT: v_add_nc_u32_e32 v11, 32, v12 |
| ; DAGISEL10-NEXT: s_mov_b32 exec_lo, s5 |
| ; DAGISEL10-NEXT: s_setpc_b64 s[6:7] |
| entry: |
| %entry_exec = call i1 @llvm.amdgcn.init.whole.wave() |
| br i1 %entry_exec, label %shader, label %tail |
| |
| shader: |
| %newx = add i32 %x, 42 |
| %oldval = extractvalue { i32, ptr addrspace(5), i32, i32 } %vgpr, 0 |
| %newval = add i32 %oldval, 5 |
| %newvgpr = insertvalue { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 %newval, 0 |
| |
| br label %tail |
| |
| tail: |
| %full.x = phi i32 [%x, %entry], [%newx, %shader] |
| %full.vgpr = phi { i32, ptr addrspace(5), i32, i32 } [%vgpr, %entry], [%newvgpr, %shader] |
| %modified.x = add i32 %full.x, 32 |
| %vgpr.args = insertvalue { i32, ptr addrspace(5), i32, i32 } %full.vgpr, i32 %modified.x, 3 |
| call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr.args, i32 0) |
| unreachable |
| } |
| |
| define amdgpu_cs_chain void @wwm_in_shader(<3 x i32> inreg %sgpr, ptr inreg %callee, i32 inreg %exec, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 %x, i32 %y) { |
| ; GISEL12-LABEL: wwm_in_shader: |
| ; GISEL12: ; %bb.0: ; %entry |
| ; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GISEL12-NEXT: s_wait_expcnt 0x0 |
| ; GISEL12-NEXT: s_wait_samplecnt 0x0 |
| ; GISEL12-NEXT: s_wait_bvhcnt 0x0 |
| ; GISEL12-NEXT: s_wait_kmcnt 0x0 |
| ; GISEL12-NEXT: s_or_saveexec_b32 s8, -1 |
| ; GISEL12-NEXT: v_dual_mov_b32 v10, v12 :: v_dual_mov_b32 v11, v13 |
| ; GISEL12-NEXT: s_mov_b32 s6, s3 |
| ; GISEL12-NEXT: s_mov_b32 s7, s4 |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: s_and_saveexec_b32 s3, s8 |
| ; GISEL12-NEXT: ; %bb.1: ; %shader |
| ; GISEL12-NEXT: s_or_saveexec_b32 s4, -1 |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: v_cndmask_b32_e64 v0, 0x47, v10, s4 |
| ; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) |
| ; GISEL12-NEXT: v_cmp_ne_u32_e64 s8, 0, v0 |
| ; GISEL12-NEXT: s_wait_alu 0xf1ff |
| ; GISEL12-NEXT: v_mov_b32_e32 v0, s8 |
| ; GISEL12-NEXT: s_mov_b32 exec_lo, s4 |
| ; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GISEL12-NEXT: v_dual_mov_b32 v11, v0 :: v_dual_add_nc_u32 v10, 42, v10 |
| ; GISEL12-NEXT: ; %bb.2: ; %tail |
| ; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; GISEL12-NEXT: s_mov_b32 exec_lo, s5 |
| ; GISEL12-NEXT: s_setpc_b64 s[6:7] |
| ; |
| ; DAGISEL12-LABEL: wwm_in_shader: |
| ; DAGISEL12: ; %bb.0: ; %entry |
| ; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_expcnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_samplecnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_bvhcnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_kmcnt 0x0 |
| ; DAGISEL12-NEXT: s_or_saveexec_b32 s8, -1 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v11, v13 :: v_dual_mov_b32 v10, v12 |
| ; DAGISEL12-NEXT: s_mov_b32 s7, s4 |
| ; DAGISEL12-NEXT: s_mov_b32 s6, s3 |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: s_and_saveexec_b32 s3, s8 |
| ; DAGISEL12-NEXT: ; %bb.1: ; %shader |
| ; DAGISEL12-NEXT: s_or_saveexec_b32 s4, -1 |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: v_cndmask_b32_e64 v0, 0x47, v10, s4 |
| ; DAGISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; DAGISEL12-NEXT: v_cmp_ne_u32_e64 s8, 0, v0 |
| ; DAGISEL12-NEXT: s_mov_b32 exec_lo, s4 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v11, s8 :: v_dual_add_nc_u32 v10, 42, v10 |
| ; DAGISEL12-NEXT: ; %bb.2: ; %tail |
| ; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; DAGISEL12-NEXT: s_mov_b32 exec_lo, s5 |
| ; DAGISEL12-NEXT: s_setpc_b64 s[6:7] |
| ; |
| ; GISEL10-LABEL: wwm_in_shader: |
| ; GISEL10: ; %bb.0: ; %entry |
| ; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL10-NEXT: s_or_saveexec_b32 s8, -1 |
| ; GISEL10-NEXT: v_mov_b32_e32 v10, v12 |
| ; GISEL10-NEXT: v_mov_b32_e32 v11, v13 |
| ; GISEL10-NEXT: s_mov_b32 s6, s3 |
| ; GISEL10-NEXT: s_mov_b32 s7, s4 |
| ; GISEL10-NEXT: s_and_saveexec_b32 s3, s8 |
| ; GISEL10-NEXT: ; %bb.1: ; %shader |
| ; GISEL10-NEXT: s_or_saveexec_b32 s4, -1 |
| ; GISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v10, s4 |
| ; GISEL10-NEXT: v_cmp_ne_u32_e64 s8, 0, v0 |
| ; GISEL10-NEXT: v_mov_b32_e32 v0, s8 |
| ; GISEL10-NEXT: s_mov_b32 exec_lo, s4 |
| ; GISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v10 |
| ; GISEL10-NEXT: v_mov_b32_e32 v11, v0 |
| ; GISEL10-NEXT: ; %bb.2: ; %tail |
| ; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; GISEL10-NEXT: s_mov_b32 exec_lo, s5 |
| ; GISEL10-NEXT: s_setpc_b64 s[6:7] |
| ; |
| ; DAGISEL10-LABEL: wwm_in_shader: |
| ; DAGISEL10: ; %bb.0: ; %entry |
| ; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL10-NEXT: s_or_saveexec_b32 s8, -1 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v11, v13 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v10, v12 |
| ; DAGISEL10-NEXT: s_mov_b32 s7, s4 |
| ; DAGISEL10-NEXT: s_mov_b32 s6, s3 |
| ; DAGISEL10-NEXT: s_and_saveexec_b32 s3, s8 |
| ; DAGISEL10-NEXT: ; %bb.1: ; %shader |
| ; DAGISEL10-NEXT: s_or_saveexec_b32 s4, -1 |
| ; DAGISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v10, s4 |
| ; DAGISEL10-NEXT: v_cmp_ne_u32_e64 s8, 0, v0 |
| ; DAGISEL10-NEXT: s_mov_b32 exec_lo, s4 |
| ; DAGISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v10 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v11, s8 |
| ; DAGISEL10-NEXT: ; %bb.2: ; %tail |
| ; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; DAGISEL10-NEXT: s_mov_b32 exec_lo, s5 |
| ; DAGISEL10-NEXT: s_setpc_b64 s[6:7] |
| entry: |
| %entry_exec = call i1 @llvm.amdgcn.init.whole.wave() |
| br i1 %entry_exec, label %shader, label %tail |
| |
| shader: |
| %nonwwm = add i32 %x, 42 |
| |
| %full.vgpr = call i32 @llvm.amdgcn.set.inactive.i32(i32 %x, i32 71) |
| %non.zero = icmp ne i32 %full.vgpr, 0 |
| %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %non.zero) |
| %wwm = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %ballot) |
| |
| br label %tail |
| |
| tail: |
| %full.nonwwm = phi i32 [%x, %entry], [%nonwwm, %shader] |
| %full.wwm = phi i32 [%y, %entry], [%wwm, %shader] |
| %vgpr.1 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr, i32 %full.nonwwm, 2 |
| %vgpr.2 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr.1, i32 %full.wwm, 3 |
| call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr.2, i32 0) |
| unreachable |
| } |
| |
| define amdgpu_cs_chain void @phi_whole_struct(<3 x i32> inreg %sgpr, ptr inreg %callee, i32 inreg %exec, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 %x, i32 %y) { |
| ; GISEL12-LABEL: phi_whole_struct: |
| ; GISEL12: ; %bb.0: ; %entry |
| ; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GISEL12-NEXT: s_wait_expcnt 0x0 |
| ; GISEL12-NEXT: s_wait_samplecnt 0x0 |
| ; GISEL12-NEXT: s_wait_bvhcnt 0x0 |
| ; GISEL12-NEXT: s_wait_kmcnt 0x0 |
| ; GISEL12-NEXT: s_or_saveexec_b32 s8, -1 |
| ; GISEL12-NEXT: s_mov_b32 s6, s3 |
| ; GISEL12-NEXT: s_mov_b32 s7, s4 |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: s_and_saveexec_b32 s3, s8 |
| ; GISEL12-NEXT: ; %bb.1: ; %shader |
| ; GISEL12-NEXT: s_or_saveexec_b32 s4, -1 |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: v_cndmask_b32_e64 v0, 0x47, v12, s4 |
| ; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) |
| ; GISEL12-NEXT: v_cmp_ne_u32_e64 s8, 0, v0 |
| ; GISEL12-NEXT: s_wait_alu 0xf1ff |
| ; GISEL12-NEXT: v_mov_b32_e32 v0, s8 |
| ; GISEL12-NEXT: s_mov_b32 exec_lo, s4 |
| ; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GISEL12-NEXT: v_dual_mov_b32 v11, v0 :: v_dual_add_nc_u32 v10, 42, v12 |
| ; GISEL12-NEXT: ; %bb.2: ; %tail |
| ; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; GISEL12-NEXT: s_mov_b32 exec_lo, s5 |
| ; GISEL12-NEXT: s_setpc_b64 s[6:7] |
| ; |
| ; DAGISEL12-LABEL: phi_whole_struct: |
| ; DAGISEL12: ; %bb.0: ; %entry |
| ; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_expcnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_samplecnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_bvhcnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_kmcnt 0x0 |
| ; DAGISEL12-NEXT: s_or_saveexec_b32 s8, -1 |
| ; DAGISEL12-NEXT: s_mov_b32 s7, s4 |
| ; DAGISEL12-NEXT: s_mov_b32 s6, s3 |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: s_and_saveexec_b32 s3, s8 |
| ; DAGISEL12-NEXT: ; %bb.1: ; %shader |
| ; DAGISEL12-NEXT: s_or_saveexec_b32 s4, -1 |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: v_cndmask_b32_e64 v0, 0x47, v12, s4 |
| ; DAGISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; DAGISEL12-NEXT: v_cmp_ne_u32_e64 s8, 0, v0 |
| ; DAGISEL12-NEXT: s_mov_b32 exec_lo, s4 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v11, s8 :: v_dual_add_nc_u32 v10, 42, v12 |
| ; DAGISEL12-NEXT: ; %bb.2: ; %tail |
| ; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; DAGISEL12-NEXT: s_mov_b32 exec_lo, s5 |
| ; DAGISEL12-NEXT: s_setpc_b64 s[6:7] |
| ; |
| ; GISEL10-LABEL: phi_whole_struct: |
| ; GISEL10: ; %bb.0: ; %entry |
| ; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL10-NEXT: s_or_saveexec_b32 s8, -1 |
| ; GISEL10-NEXT: s_mov_b32 s6, s3 |
| ; GISEL10-NEXT: s_mov_b32 s7, s4 |
| ; GISEL10-NEXT: s_and_saveexec_b32 s3, s8 |
| ; GISEL10-NEXT: ; %bb.1: ; %shader |
| ; GISEL10-NEXT: s_or_saveexec_b32 s4, -1 |
| ; GISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v12, s4 |
| ; GISEL10-NEXT: v_cmp_ne_u32_e64 s8, 0, v0 |
| ; GISEL10-NEXT: v_mov_b32_e32 v0, s8 |
| ; GISEL10-NEXT: s_mov_b32 exec_lo, s4 |
| ; GISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v12 |
| ; GISEL10-NEXT: v_mov_b32_e32 v11, v0 |
| ; GISEL10-NEXT: ; %bb.2: ; %tail |
| ; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; GISEL10-NEXT: s_mov_b32 exec_lo, s5 |
| ; GISEL10-NEXT: s_setpc_b64 s[6:7] |
| ; |
| ; DAGISEL10-LABEL: phi_whole_struct: |
| ; DAGISEL10: ; %bb.0: ; %entry |
| ; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL10-NEXT: s_or_saveexec_b32 s8, -1 |
| ; DAGISEL10-NEXT: s_mov_b32 s7, s4 |
| ; DAGISEL10-NEXT: s_mov_b32 s6, s3 |
| ; DAGISEL10-NEXT: s_and_saveexec_b32 s3, s8 |
| ; DAGISEL10-NEXT: ; %bb.1: ; %shader |
| ; DAGISEL10-NEXT: s_or_saveexec_b32 s4, -1 |
| ; DAGISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v12, s4 |
| ; DAGISEL10-NEXT: v_cmp_ne_u32_e64 s8, 0, v0 |
| ; DAGISEL10-NEXT: s_mov_b32 exec_lo, s4 |
| ; DAGISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v12 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v11, s8 |
| ; DAGISEL10-NEXT: ; %bb.2: ; %tail |
| ; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; DAGISEL10-NEXT: s_mov_b32 exec_lo, s5 |
| ; DAGISEL10-NEXT: s_setpc_b64 s[6:7] |
| entry: |
| %entry_exec = call i1 @llvm.amdgcn.init.whole.wave() |
| br i1 %entry_exec, label %shader, label %tail |
| |
| shader: |
| %nonwwm = add i32 %x, 42 |
| %vgpr.1 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr, i32 %nonwwm, 2 |
| |
| %full.vgpr = call i32 @llvm.amdgcn.set.inactive.i32(i32 %x, i32 71) |
| %non.zero = icmp ne i32 %full.vgpr, 0 |
| %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %non.zero) |
| %wwm = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %ballot) |
| %vgpr.2 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr.1, i32 %wwm, 3 |
| |
| br label %tail |
| |
| tail: |
| %vgpr.args = phi { i32, ptr addrspace(5), i32, i32} [%vgpr, %entry], [%vgpr.2, %shader] |
| call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr.args, i32 0) |
| unreachable |
| } |
| |
| ; Introduce more complex control flow - %shader contains a simple loop, and %tail contains an if. |
| define amdgpu_cs_chain void @control_flow(<3 x i32> inreg %sgpr, ptr inreg %callee, i32 inreg %exec, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 %x, i32 %y) { |
| ; GISEL12-LABEL: control_flow: |
| ; GISEL12: ; %bb.0: ; %entry |
| ; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GISEL12-NEXT: s_wait_expcnt 0x0 |
| ; GISEL12-NEXT: s_wait_samplecnt 0x0 |
| ; GISEL12-NEXT: s_wait_bvhcnt 0x0 |
| ; GISEL12-NEXT: s_wait_kmcnt 0x0 |
| ; GISEL12-NEXT: s_or_saveexec_b32 s8, -1 |
| ; GISEL12-NEXT: s_mov_b32 s6, s3 |
| ; GISEL12-NEXT: s_mov_b32 s7, s4 |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: s_and_saveexec_b32 s3, s8 |
| ; GISEL12-NEXT: s_cbranch_execz .LBB3_4 |
| ; GISEL12-NEXT: ; %bb.1: ; %shader.preheader |
| ; GISEL12-NEXT: v_add_nc_u32_e32 v1, -1, v12 |
| ; GISEL12-NEXT: s_mov_b32 s4, 0 |
| ; GISEL12-NEXT: .LBB3_2: ; %shader |
| ; GISEL12-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) |
| ; GISEL12-NEXT: v_add_nc_u32_e32 v1, 1, v1 |
| ; GISEL12-NEXT: s_or_saveexec_b32 s8, -1 |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: v_cndmask_b32_e64 v0, 0x47, v1, s8 |
| ; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) |
| ; GISEL12-NEXT: v_cmp_ne_u32_e64 s9, 0, v0 |
| ; GISEL12-NEXT: s_wait_alu 0xf1ff |
| ; GISEL12-NEXT: v_mov_b32_e32 v0, s9 |
| ; GISEL12-NEXT: s_mov_b32 exec_lo, s8 |
| ; GISEL12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v13, v1 |
| ; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_2) |
| ; GISEL12-NEXT: v_mov_b32_e32 v11, v0 |
| ; GISEL12-NEXT: s_or_b32 s4, vcc_lo, s4 |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 |
| ; GISEL12-NEXT: s_cbranch_execnz .LBB3_2 |
| ; GISEL12-NEXT: ; %bb.3: ; %tail.loopexit |
| ; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s4 |
| ; GISEL12-NEXT: v_add_nc_u32_e32 v10, 42, v1 |
| ; GISEL12-NEXT: .LBB3_4: ; %Flow1 |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; GISEL12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GISEL12-NEXT: s_mov_b32 s3, exec_lo |
| ; GISEL12-NEXT: ; implicit-def: $vgpr8 |
| ; GISEL12-NEXT: v_cmpx_lt_i32_e64 v12, v13 |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: s_xor_b32 s3, exec_lo, s3 |
| ; GISEL12-NEXT: ; %bb.5: ; %tail.else |
| ; GISEL12-NEXT: s_or_saveexec_b32 s4, -1 |
| ; GISEL12-NEXT: v_mov_b32_e32 v0, 15 |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: s_mov_b32 exec_lo, s4 |
| ; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GISEL12-NEXT: v_mov_b32_e32 v8, v0 |
| ; GISEL12-NEXT: ; %bb.6: ; %Flow |
| ; GISEL12-NEXT: s_and_not1_saveexec_b32 s3, s3 |
| ; GISEL12-NEXT: ; %bb.7: ; %tail.then |
| ; GISEL12-NEXT: s_mov_b32 s4, 44 |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: v_mov_b32_e32 v8, s4 |
| ; GISEL12-NEXT: ; %bb.8: ; %tail.end |
| ; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; GISEL12-NEXT: s_mov_b32 exec_lo, s5 |
| ; GISEL12-NEXT: s_setpc_b64 s[6:7] |
| ; |
| ; DAGISEL12-LABEL: control_flow: |
| ; DAGISEL12: ; %bb.0: ; %entry |
| ; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_expcnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_samplecnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_bvhcnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_kmcnt 0x0 |
| ; DAGISEL12-NEXT: s_or_saveexec_b32 s8, -1 |
| ; DAGISEL12-NEXT: s_mov_b32 s7, s4 |
| ; DAGISEL12-NEXT: s_mov_b32 s6, s3 |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: s_and_saveexec_b32 s3, s8 |
| ; DAGISEL12-NEXT: s_cbranch_execz .LBB3_4 |
| ; DAGISEL12-NEXT: ; %bb.1: ; %shader.preheader |
| ; DAGISEL12-NEXT: v_add_nc_u32_e32 v1, -1, v12 |
| ; DAGISEL12-NEXT: s_mov_b32 s4, 0 |
| ; DAGISEL12-NEXT: .LBB3_2: ; %shader |
| ; DAGISEL12-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; DAGISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) |
| ; DAGISEL12-NEXT: v_add_nc_u32_e32 v1, 1, v1 |
| ; DAGISEL12-NEXT: s_or_saveexec_b32 s8, -1 |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: v_cndmask_b32_e64 v0, 0x47, v1, s8 |
| ; DAGISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; DAGISEL12-NEXT: v_cmp_ne_u32_e64 s9, 0, v0 |
| ; DAGISEL12-NEXT: s_mov_b32 exec_lo, s8 |
| ; DAGISEL12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v13, v1 |
| ; DAGISEL12-NEXT: v_mov_b32_e32 v11, s9 |
| ; DAGISEL12-NEXT: s_or_b32 s4, vcc_lo, s4 |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 |
| ; DAGISEL12-NEXT: s_cbranch_execnz .LBB3_2 |
| ; DAGISEL12-NEXT: ; %bb.3: ; %tail.loopexit |
| ; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s4 |
| ; DAGISEL12-NEXT: v_add_nc_u32_e32 v10, 42, v1 |
| ; DAGISEL12-NEXT: .LBB3_4: ; %Flow1 |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; DAGISEL12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; DAGISEL12-NEXT: s_mov_b32 s3, exec_lo |
| ; DAGISEL12-NEXT: ; implicit-def: $vgpr8 |
| ; DAGISEL12-NEXT: v_cmpx_lt_i32_e64 v12, v13 |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: s_xor_b32 s3, exec_lo, s3 |
| ; DAGISEL12-NEXT: ; %bb.5: ; %tail.else |
| ; DAGISEL12-NEXT: s_mov_b32 s4, 15 |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: v_mov_b32_e32 v8, s4 |
| ; DAGISEL12-NEXT: ; %bb.6: ; %Flow |
| ; DAGISEL12-NEXT: s_and_not1_saveexec_b32 s3, s3 |
| ; DAGISEL12-NEXT: ; %bb.7: ; %tail.then |
| ; DAGISEL12-NEXT: v_mov_b32_e32 v8, 44 |
| ; DAGISEL12-NEXT: ; %bb.8: ; %tail.end |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; DAGISEL12-NEXT: s_mov_b32 exec_lo, s5 |
| ; DAGISEL12-NEXT: s_setpc_b64 s[6:7] |
| ; |
| ; GISEL10-LABEL: control_flow: |
| ; GISEL10: ; %bb.0: ; %entry |
| ; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL10-NEXT: s_or_saveexec_b32 s8, -1 |
| ; GISEL10-NEXT: s_mov_b32 s6, s3 |
| ; GISEL10-NEXT: s_mov_b32 s7, s4 |
| ; GISEL10-NEXT: s_and_saveexec_b32 s3, s8 |
| ; GISEL10-NEXT: s_cbranch_execz .LBB3_4 |
| ; GISEL10-NEXT: ; %bb.1: ; %shader.preheader |
| ; GISEL10-NEXT: v_add_nc_u32_e32 v1, -1, v12 |
| ; GISEL10-NEXT: s_mov_b32 s4, 0 |
| ; GISEL10-NEXT: .LBB3_2: ; %shader |
| ; GISEL10-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GISEL10-NEXT: v_add_nc_u32_e32 v1, 1, v1 |
| ; GISEL10-NEXT: s_or_saveexec_b32 s8, -1 |
| ; GISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v1, s8 |
| ; GISEL10-NEXT: v_cmp_ne_u32_e64 s9, 0, v0 |
| ; GISEL10-NEXT: v_mov_b32_e32 v0, s9 |
| ; GISEL10-NEXT: s_mov_b32 exec_lo, s8 |
| ; GISEL10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v13, v1 |
| ; GISEL10-NEXT: v_mov_b32_e32 v11, v0 |
| ; GISEL10-NEXT: s_or_b32 s4, vcc_lo, s4 |
| ; GISEL10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 |
| ; GISEL10-NEXT: s_cbranch_execnz .LBB3_2 |
| ; GISEL10-NEXT: ; %bb.3: ; %tail.loopexit |
| ; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s4 |
| ; GISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v1 |
| ; GISEL10-NEXT: .LBB3_4: ; %Flow1 |
| ; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; GISEL10-NEXT: s_mov_b32 s3, exec_lo |
| ; GISEL10-NEXT: ; implicit-def: $vgpr8 |
| ; GISEL10-NEXT: v_cmpx_lt_i32_e64 v12, v13 |
| ; GISEL10-NEXT: s_xor_b32 s3, exec_lo, s3 |
| ; GISEL10-NEXT: ; %bb.5: ; %tail.else |
| ; GISEL10-NEXT: s_or_saveexec_b32 s4, -1 |
| ; GISEL10-NEXT: v_mov_b32_e32 v0, 15 |
| ; GISEL10-NEXT: s_mov_b32 exec_lo, s4 |
| ; GISEL10-NEXT: v_mov_b32_e32 v8, v0 |
| ; GISEL10-NEXT: ; %bb.6: ; %Flow |
| ; GISEL10-NEXT: s_andn2_saveexec_b32 s3, s3 |
| ; GISEL10-NEXT: ; %bb.7: ; %tail.then |
| ; GISEL10-NEXT: s_mov_b32 s4, 44 |
| ; GISEL10-NEXT: v_mov_b32_e32 v8, s4 |
| ; GISEL10-NEXT: ; %bb.8: ; %tail.end |
| ; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; GISEL10-NEXT: s_mov_b32 exec_lo, s5 |
| ; GISEL10-NEXT: s_setpc_b64 s[6:7] |
| ; |
| ; DAGISEL10-LABEL: control_flow: |
| ; DAGISEL10: ; %bb.0: ; %entry |
| ; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL10-NEXT: s_or_saveexec_b32 s8, -1 |
| ; DAGISEL10-NEXT: s_mov_b32 s7, s4 |
| ; DAGISEL10-NEXT: s_mov_b32 s6, s3 |
| ; DAGISEL10-NEXT: s_and_saveexec_b32 s3, s8 |
| ; DAGISEL10-NEXT: s_cbranch_execz .LBB3_4 |
| ; DAGISEL10-NEXT: ; %bb.1: ; %shader.preheader |
| ; DAGISEL10-NEXT: v_add_nc_u32_e32 v1, -1, v12 |
| ; DAGISEL10-NEXT: s_mov_b32 s4, 0 |
| ; DAGISEL10-NEXT: .LBB3_2: ; %shader |
| ; DAGISEL10-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; DAGISEL10-NEXT: v_add_nc_u32_e32 v1, 1, v1 |
| ; DAGISEL10-NEXT: s_or_saveexec_b32 s8, -1 |
| ; DAGISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v1, s8 |
| ; DAGISEL10-NEXT: v_cmp_ne_u32_e64 s9, 0, v0 |
| ; DAGISEL10-NEXT: s_mov_b32 exec_lo, s8 |
| ; DAGISEL10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v13, v1 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v11, s9 |
| ; DAGISEL10-NEXT: s_or_b32 s4, vcc_lo, s4 |
| ; DAGISEL10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 |
| ; DAGISEL10-NEXT: s_cbranch_execnz .LBB3_2 |
| ; DAGISEL10-NEXT: ; %bb.3: ; %tail.loopexit |
| ; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s4 |
| ; DAGISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v1 |
| ; DAGISEL10-NEXT: .LBB3_4: ; %Flow1 |
| ; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; DAGISEL10-NEXT: s_mov_b32 s3, exec_lo |
| ; DAGISEL10-NEXT: ; implicit-def: $vgpr8 |
| ; DAGISEL10-NEXT: v_cmpx_lt_i32_e64 v12, v13 |
| ; DAGISEL10-NEXT: s_xor_b32 s3, exec_lo, s3 |
| ; DAGISEL10-NEXT: ; %bb.5: ; %tail.else |
| ; DAGISEL10-NEXT: s_mov_b32 s4, 15 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v8, s4 |
| ; DAGISEL10-NEXT: ; %bb.6: ; %Flow |
| ; DAGISEL10-NEXT: s_andn2_saveexec_b32 s3, s3 |
| ; DAGISEL10-NEXT: ; %bb.7: ; %tail.then |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v8, 44 |
| ; DAGISEL10-NEXT: ; %bb.8: ; %tail.end |
| ; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; DAGISEL10-NEXT: s_mov_b32 exec_lo, s5 |
| ; DAGISEL10-NEXT: s_setpc_b64 s[6:7] |
| entry: |
| %entry_exec = call i1 @llvm.amdgcn.init.whole.wave() |
| br i1 %entry_exec, label %shader, label %tail |
| |
| shader: |
| %i = phi i32 [%x, %entry], [%i.inc, %shader] |
| |
| %nonwwm = add i32 %i, 42 |
| %vgpr.1 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr, i32 %nonwwm, 2 |
| |
| %full.vgpr = call i32 @llvm.amdgcn.set.inactive.i32(i32 %i, i32 71) |
| %non.zero = icmp ne i32 %full.vgpr, 0 |
| %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %non.zero) |
| %wwm = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %ballot) |
| %vgpr.2 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr.1, i32 %wwm, 3 |
| |
| %i.inc = add i32 %i, 1 |
| %loop.cond = icmp ne i32 %i, %y |
| br i1 %loop.cond, label %shader, label %tail |
| |
| tail: |
| %vgpr.tail = phi { i32, ptr addrspace(5), i32, i32} [%vgpr, %entry], [%vgpr.2, %shader] |
| |
| %if.cond = icmp sge i32 %x, %y |
| br i1 %if.cond, label %tail.then, label %tail.else |
| |
| tail.then: |
| %vgpr.then = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr.tail, i32 44, 0 |
| br label %tail.end |
| |
| tail.else: |
| %wwm.tail = call i32 @llvm.amdgcn.strict.wwm.i32(i32 15) |
| %vgpr.else = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr.tail, i32 %wwm.tail, 0 |
| br label %tail.end |
| |
| tail.end: |
| %vgpr.args = phi { i32, ptr addrspace(5), i32, i32 } [%vgpr.then, %tail.then], [%vgpr.else, %tail.else] |
| call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr.args, i32 0) |
| unreachable |
| } |
| |
| ; Try with v0-v7 occupied - this will force us to use higher registers for temporaries. Make sure we don't preserve them. |
| define amdgpu_cs_chain void @use_v0_7(<3 x i32> inreg %sgpr, ptr inreg %callee, i32 inreg %exec, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 %x, i32 %y) { |
| ; GISEL12-LABEL: use_v0_7: |
| ; GISEL12: ; %bb.0: ; %entry |
| ; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GISEL12-NEXT: s_wait_expcnt 0x0 |
| ; GISEL12-NEXT: s_wait_samplecnt 0x0 |
| ; GISEL12-NEXT: s_wait_bvhcnt 0x0 |
| ; GISEL12-NEXT: s_wait_kmcnt 0x0 |
| ; GISEL12-NEXT: s_or_saveexec_b32 s8, -1 |
| ; GISEL12-NEXT: s_mov_b32 s6, s3 |
| ; GISEL12-NEXT: s_mov_b32 s7, s4 |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: s_and_saveexec_b32 s3, s8 |
| ; GISEL12-NEXT: s_cbranch_execz .LBB4_2 |
| ; GISEL12-NEXT: ; %bb.1: ; %shader |
| ; GISEL12-NEXT: s_or_saveexec_b32 s4, -1 |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: v_cndmask_b32_e64 v13, 0x47, v12, s4 |
| ; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) |
| ; GISEL12-NEXT: v_cmp_ne_u32_e64 s8, 0, v13 |
| ; GISEL12-NEXT: s_wait_alu 0xf1ff |
| ; GISEL12-NEXT: v_mov_b32_e32 v13, s8 |
| ; GISEL12-NEXT: s_mov_b32 exec_lo, s4 |
| ; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GISEL12-NEXT: v_dual_mov_b32 v11, v13 :: v_dual_add_nc_u32 v10, 42, v12 |
| ; GISEL12-NEXT: ;;#ASMSTART |
| ; GISEL12-NEXT: ; use v0-7 |
| ; GISEL12-NEXT: ;;#ASMEND |
| ; GISEL12-NEXT: .LBB4_2: ; %tail |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; GISEL12-NEXT: s_mov_b32 exec_lo, s5 |
| ; GISEL12-NEXT: s_setpc_b64 s[6:7] |
| ; |
| ; DAGISEL12-LABEL: use_v0_7: |
| ; DAGISEL12: ; %bb.0: ; %entry |
| ; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_expcnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_samplecnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_bvhcnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_kmcnt 0x0 |
| ; DAGISEL12-NEXT: s_or_saveexec_b32 s8, -1 |
| ; DAGISEL12-NEXT: s_mov_b32 s7, s4 |
| ; DAGISEL12-NEXT: s_mov_b32 s6, s3 |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: s_and_saveexec_b32 s3, s8 |
| ; DAGISEL12-NEXT: s_cbranch_execz .LBB4_2 |
| ; DAGISEL12-NEXT: ; %bb.1: ; %shader |
| ; DAGISEL12-NEXT: s_or_saveexec_b32 s4, -1 |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: v_cndmask_b32_e64 v13, 0x47, v12, s4 |
| ; DAGISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; DAGISEL12-NEXT: v_cmp_ne_u32_e64 s8, 0, v13 |
| ; DAGISEL12-NEXT: s_mov_b32 exec_lo, s4 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v11, s8 :: v_dual_add_nc_u32 v10, 42, v12 |
| ; DAGISEL12-NEXT: ;;#ASMSTART |
| ; DAGISEL12-NEXT: ; use v0-7 |
| ; DAGISEL12-NEXT: ;;#ASMEND |
| ; DAGISEL12-NEXT: .LBB4_2: ; %tail |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; DAGISEL12-NEXT: s_mov_b32 exec_lo, s5 |
| ; DAGISEL12-NEXT: s_setpc_b64 s[6:7] |
| ; |
| ; GISEL10-LABEL: use_v0_7: |
| ; GISEL10: ; %bb.0: ; %entry |
| ; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL10-NEXT: s_or_saveexec_b32 s8, -1 |
| ; GISEL10-NEXT: s_mov_b32 s6, s3 |
| ; GISEL10-NEXT: s_mov_b32 s7, s4 |
| ; GISEL10-NEXT: s_and_saveexec_b32 s3, s8 |
| ; GISEL10-NEXT: s_cbranch_execz .LBB4_2 |
| ; GISEL10-NEXT: ; %bb.1: ; %shader |
| ; GISEL10-NEXT: s_or_saveexec_b32 s4, -1 |
| ; GISEL10-NEXT: v_cndmask_b32_e64 v13, 0x47, v12, s4 |
| ; GISEL10-NEXT: v_cmp_ne_u32_e64 s8, 0, v13 |
| ; GISEL10-NEXT: v_mov_b32_e32 v13, s8 |
| ; GISEL10-NEXT: s_mov_b32 exec_lo, s4 |
| ; GISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v12 |
| ; GISEL10-NEXT: v_mov_b32_e32 v11, v13 |
| ; GISEL10-NEXT: ;;#ASMSTART |
| ; GISEL10-NEXT: ; use v0-7 |
| ; GISEL10-NEXT: ;;#ASMEND |
| ; GISEL10-NEXT: .LBB4_2: ; %tail |
| ; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; GISEL10-NEXT: s_mov_b32 exec_lo, s5 |
| ; GISEL10-NEXT: s_setpc_b64 s[6:7] |
| ; |
| ; DAGISEL10-LABEL: use_v0_7: |
| ; DAGISEL10: ; %bb.0: ; %entry |
| ; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL10-NEXT: s_or_saveexec_b32 s8, -1 |
| ; DAGISEL10-NEXT: s_mov_b32 s7, s4 |
| ; DAGISEL10-NEXT: s_mov_b32 s6, s3 |
| ; DAGISEL10-NEXT: s_and_saveexec_b32 s3, s8 |
| ; DAGISEL10-NEXT: s_cbranch_execz .LBB4_2 |
| ; DAGISEL10-NEXT: ; %bb.1: ; %shader |
| ; DAGISEL10-NEXT: s_or_saveexec_b32 s4, -1 |
| ; DAGISEL10-NEXT: v_cndmask_b32_e64 v13, 0x47, v12, s4 |
| ; DAGISEL10-NEXT: v_cmp_ne_u32_e64 s8, 0, v13 |
| ; DAGISEL10-NEXT: s_mov_b32 exec_lo, s4 |
| ; DAGISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v12 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v11, s8 |
| ; DAGISEL10-NEXT: ;;#ASMSTART |
| ; DAGISEL10-NEXT: ; use v0-7 |
| ; DAGISEL10-NEXT: ;;#ASMEND |
| ; DAGISEL10-NEXT: .LBB4_2: ; %tail |
| ; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; DAGISEL10-NEXT: s_mov_b32 exec_lo, s5 |
| ; DAGISEL10-NEXT: s_setpc_b64 s[6:7] |
| entry: |
| %entry_exec = call i1 @llvm.amdgcn.init.whole.wave() |
| br i1 %entry_exec, label %shader, label %tail |
| |
| shader: |
| call void asm sideeffect "; use v0-7", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() |
| |
| %nonwwm = add i32 %x, 42 |
| %vgpr.1 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr, i32 %nonwwm, 2 |
| |
| %full.vgpr = call i32 @llvm.amdgcn.set.inactive.i32(i32 %x, i32 71) |
| %non.zero = icmp ne i32 %full.vgpr, 0 |
| %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %non.zero) |
| %wwm = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %ballot) |
| %vgpr.2 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr.1, i32 %wwm, 3 |
| |
| br label %tail |
| |
| tail: |
| %vgpr.args = phi { i32, ptr addrspace(5), i32, i32} [%vgpr, %entry], [%vgpr.2, %shader] |
| call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr.args, i32 0) |
| unreachable |
| } |
| |
| |
| ; Check that the inactive lanes of v8:15 are correctly preserved even across a |
| ; WWM call that reads and writes them. |
| ; FIXME: The GlobalISel path hits a pre-existing issue, so the inactive lanes do get overwritten. |
| define amdgpu_cs_chain void @wwm_write_to_arg_reg(<3 x i32> inreg %sgpr, ptr inreg %callee, i32 inreg %exec, <16 x i32> %vgpr, i32 %x, i32 %y) { |
| ; GISEL12-LABEL: wwm_write_to_arg_reg: |
| ; GISEL12: ; %bb.0: ; %entry |
| ; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GISEL12-NEXT: s_wait_expcnt 0x0 |
| ; GISEL12-NEXT: s_wait_samplecnt 0x0 |
| ; GISEL12-NEXT: s_wait_bvhcnt 0x0 |
| ; GISEL12-NEXT: s_wait_kmcnt 0x0 |
| ; GISEL12-NEXT: s_mov_b32 s32, 0 |
| ; GISEL12-NEXT: s_or_saveexec_b32 s9, -1 |
| ; GISEL12-NEXT: s_or_saveexec_b32 s12, -1 |
| ; GISEL12-NEXT: s_mov_b32 s6, s0 |
| ; GISEL12-NEXT: s_mov_b32 s7, s1 |
| ; GISEL12-NEXT: s_mov_b32 s8, s2 |
| ; GISEL12-NEXT: s_mov_b32 s10, s3 |
| ; GISEL12-NEXT: s_mov_b32 s11, s4 |
| ; GISEL12-NEXT: v_dual_mov_b32 v24, v8 :: v_dual_mov_b32 v25, v9 |
| ; GISEL12-NEXT: v_dual_mov_b32 v26, v10 :: v_dual_mov_b32 v27, v11 |
| ; GISEL12-NEXT: v_dual_mov_b32 v28, v12 :: v_dual_mov_b32 v29, v13 |
| ; GISEL12-NEXT: v_dual_mov_b32 v30, v14 :: v_dual_mov_b32 v31, v15 |
| ; GISEL12-NEXT: v_dual_mov_b32 v32, v16 :: v_dual_mov_b32 v33, v17 |
| ; GISEL12-NEXT: v_dual_mov_b32 v34, v18 :: v_dual_mov_b32 v35, v19 |
| ; GISEL12-NEXT: v_dual_mov_b32 v36, v20 :: v_dual_mov_b32 v37, v21 |
| ; GISEL12-NEXT: v_dual_mov_b32 v38, v22 :: v_dual_mov_b32 v39, v23 |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: s_mov_b32 exec_lo, s12 |
| ; GISEL12-NEXT: s_and_saveexec_b32 s4, s9 |
| ; GISEL12-NEXT: s_cbranch_execz .LBB5_2 |
| ; GISEL12-NEXT: ; %bb.1: ; %shader |
| ; GISEL12-NEXT: s_or_saveexec_b32 s9, -1 |
| ; GISEL12-NEXT: s_getpc_b64 s[0:1] |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: s_sext_i32_i16 s1, s1 |
| ; GISEL12-NEXT: s_add_co_u32 s0, s0, write_v0_v15@gotpcrel32@lo+12 |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: s_add_co_ci_u32 s1, s1, write_v0_v15@gotpcrel32@hi+24 |
| ; GISEL12-NEXT: v_dual_mov_b32 v0, v24 :: v_dual_mov_b32 v1, v25 |
| ; GISEL12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 |
| ; GISEL12-NEXT: v_dual_mov_b32 v2, v26 :: v_dual_mov_b32 v3, v27 |
| ; GISEL12-NEXT: v_dual_mov_b32 v4, v28 :: v_dual_mov_b32 v5, v29 |
| ; GISEL12-NEXT: v_dual_mov_b32 v6, v30 :: v_dual_mov_b32 v7, v31 |
| ; GISEL12-NEXT: v_dual_mov_b32 v8, v32 :: v_dual_mov_b32 v9, v33 |
| ; GISEL12-NEXT: v_dual_mov_b32 v10, v34 :: v_dual_mov_b32 v11, v35 |
| ; GISEL12-NEXT: v_dual_mov_b32 v12, v36 :: v_dual_mov_b32 v13, v37 |
| ; GISEL12-NEXT: v_dual_mov_b32 v14, v38 :: v_dual_mov_b32 v15, v39 |
| ; GISEL12-NEXT: s_wait_kmcnt 0x0 |
| ; GISEL12-NEXT: s_swappc_b64 s[30:31], s[0:1] |
| ; GISEL12-NEXT: v_dual_mov_b32 v24, v0 :: v_dual_mov_b32 v25, v1 |
| ; GISEL12-NEXT: v_dual_mov_b32 v26, v2 :: v_dual_mov_b32 v27, v3 |
| ; GISEL12-NEXT: v_dual_mov_b32 v28, v4 :: v_dual_mov_b32 v29, v5 |
| ; GISEL12-NEXT: v_dual_mov_b32 v30, v6 :: v_dual_mov_b32 v31, v7 |
| ; GISEL12-NEXT: v_dual_mov_b32 v32, v8 :: v_dual_mov_b32 v33, v9 |
| ; GISEL12-NEXT: v_dual_mov_b32 v34, v10 :: v_dual_mov_b32 v35, v11 |
| ; GISEL12-NEXT: v_dual_mov_b32 v36, v12 :: v_dual_mov_b32 v37, v13 |
| ; GISEL12-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v39, v15 |
| ; GISEL12-NEXT: s_mov_b32 exec_lo, s9 |
| ; GISEL12-NEXT: ; kill: def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 killed $exec |
| ; GISEL12-NEXT: .LBB5_2: ; %tail |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s4 |
| ; GISEL12-NEXT: v_dual_mov_b32 v8, v24 :: v_dual_mov_b32 v9, v25 |
| ; GISEL12-NEXT: v_dual_mov_b32 v10, v26 :: v_dual_mov_b32 v11, v27 |
| ; GISEL12-NEXT: v_dual_mov_b32 v12, v28 :: v_dual_mov_b32 v13, v29 |
| ; GISEL12-NEXT: v_dual_mov_b32 v14, v30 :: v_dual_mov_b32 v15, v31 |
| ; GISEL12-NEXT: v_dual_mov_b32 v16, v32 :: v_dual_mov_b32 v17, v33 |
| ; GISEL12-NEXT: v_dual_mov_b32 v18, v34 :: v_dual_mov_b32 v19, v35 |
| ; GISEL12-NEXT: v_dual_mov_b32 v20, v36 :: v_dual_mov_b32 v21, v37 |
| ; GISEL12-NEXT: v_dual_mov_b32 v22, v38 :: v_dual_mov_b32 v23, v39 |
| ; GISEL12-NEXT: s_mov_b32 s0, s6 |
| ; GISEL12-NEXT: s_mov_b32 s1, s7 |
| ; GISEL12-NEXT: s_mov_b32 s2, s8 |
| ; GISEL12-NEXT: s_mov_b32 exec_lo, s5 |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: s_setpc_b64 s[10:11] |
| ; |
| ; DAGISEL12-LABEL: wwm_write_to_arg_reg: |
| ; DAGISEL12: ; %bb.0: ; %entry |
| ; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_expcnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_samplecnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_bvhcnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_kmcnt 0x0 |
| ; DAGISEL12-NEXT: s_mov_b32 s32, 0 |
| ; DAGISEL12-NEXT: s_or_saveexec_b32 s11, -1 |
| ; DAGISEL12-NEXT: s_or_saveexec_b32 s6, -1 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v39, v23 :: v_dual_mov_b32 v38, v22 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v37, v21 :: v_dual_mov_b32 v36, v20 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v35, v19 :: v_dual_mov_b32 v34, v18 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v33, v17 :: v_dual_mov_b32 v32, v16 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v31, v15 :: v_dual_mov_b32 v30, v14 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v29, v13 :: v_dual_mov_b32 v28, v12 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v27, v11 :: v_dual_mov_b32 v26, v10 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v25, v9 :: v_dual_mov_b32 v24, v8 |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: s_mov_b32 exec_lo, s6 |
| ; DAGISEL12-NEXT: s_mov_b32 s9, s4 |
| ; DAGISEL12-NEXT: s_mov_b32 s8, s3 |
| ; DAGISEL12-NEXT: s_mov_b32 s4, s2 |
| ; DAGISEL12-NEXT: s_mov_b32 s6, s1 |
| ; DAGISEL12-NEXT: s_mov_b32 s7, s0 |
| ; DAGISEL12-NEXT: s_and_saveexec_b32 s10, s11 |
| ; DAGISEL12-NEXT: s_cbranch_execz .LBB5_2 |
| ; DAGISEL12-NEXT: ; %bb.1: ; %shader |
| ; DAGISEL12-NEXT: s_or_saveexec_b32 s11, -1 |
| ; DAGISEL12-NEXT: s_getpc_b64 s[0:1] |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: s_sext_i32_i16 s1, s1 |
| ; DAGISEL12-NEXT: s_add_co_u32 s0, s0, write_v0_v15@gotpcrel32@lo+12 |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: s_add_co_ci_u32 s1, s1, write_v0_v15@gotpcrel32@hi+24 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v0, v24 :: v_dual_mov_b32 v1, v25 |
| ; DAGISEL12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v2, v26 :: v_dual_mov_b32 v3, v27 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v4, v28 :: v_dual_mov_b32 v5, v29 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v6, v30 :: v_dual_mov_b32 v7, v31 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v8, v32 :: v_dual_mov_b32 v9, v33 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v10, v34 :: v_dual_mov_b32 v11, v35 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v12, v36 :: v_dual_mov_b32 v13, v37 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v14, v38 :: v_dual_mov_b32 v15, v39 |
| ; DAGISEL12-NEXT: s_wait_kmcnt 0x0 |
| ; DAGISEL12-NEXT: s_swappc_b64 s[30:31], s[0:1] |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v41, v1 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v42, v2 :: v_dual_mov_b32 v43, v3 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v44, v4 :: v_dual_mov_b32 v45, v5 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v46, v6 :: v_dual_mov_b32 v47, v7 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v48, v8 :: v_dual_mov_b32 v49, v9 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v50, v10 :: v_dual_mov_b32 v51, v11 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v52, v12 :: v_dual_mov_b32 v53, v13 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v54, v14 :: v_dual_mov_b32 v55, v15 |
| ; DAGISEL12-NEXT: s_mov_b32 exec_lo, s11 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v24, v40 :: v_dual_mov_b32 v25, v41 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v26, v42 :: v_dual_mov_b32 v27, v43 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v28, v44 :: v_dual_mov_b32 v29, v45 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v30, v46 :: v_dual_mov_b32 v31, v47 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v32, v48 :: v_dual_mov_b32 v33, v49 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v34, v50 :: v_dual_mov_b32 v35, v51 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v36, v52 :: v_dual_mov_b32 v37, v53 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v38, v54 :: v_dual_mov_b32 v39, v55 |
| ; DAGISEL12-NEXT: .LBB5_2: ; %tail |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s10 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v8, v24 :: v_dual_mov_b32 v9, v25 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v10, v26 :: v_dual_mov_b32 v11, v27 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v12, v28 :: v_dual_mov_b32 v13, v29 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v14, v30 :: v_dual_mov_b32 v15, v31 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v16, v32 :: v_dual_mov_b32 v17, v33 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v18, v34 :: v_dual_mov_b32 v19, v35 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v20, v36 :: v_dual_mov_b32 v21, v37 |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v22, v38 :: v_dual_mov_b32 v23, v39 |
| ; DAGISEL12-NEXT: s_mov_b32 s0, s7 |
| ; DAGISEL12-NEXT: s_mov_b32 s1, s6 |
| ; DAGISEL12-NEXT: s_mov_b32 s2, s4 |
| ; DAGISEL12-NEXT: s_mov_b32 exec_lo, s5 |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: s_setpc_b64 s[8:9] |
| ; |
| ; GISEL10-LABEL: wwm_write_to_arg_reg: |
| ; GISEL10: ; %bb.0: ; %entry |
| ; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL10-NEXT: s_mov_b32 s32, 0 |
| ; GISEL10-NEXT: s_or_saveexec_b32 s9, -1 |
| ; GISEL10-NEXT: s_or_saveexec_b32 s12, -1 |
| ; GISEL10-NEXT: s_mov_b32 s6, s0 |
| ; GISEL10-NEXT: s_mov_b32 s7, s1 |
| ; GISEL10-NEXT: s_mov_b32 s8, s2 |
| ; GISEL10-NEXT: s_mov_b32 s10, s3 |
| ; GISEL10-NEXT: s_mov_b32 s11, s4 |
| ; GISEL10-NEXT: v_mov_b32_e32 v24, v8 |
| ; GISEL10-NEXT: v_mov_b32_e32 v25, v9 |
| ; GISEL10-NEXT: v_mov_b32_e32 v26, v10 |
| ; GISEL10-NEXT: v_mov_b32_e32 v27, v11 |
| ; GISEL10-NEXT: v_mov_b32_e32 v28, v12 |
| ; GISEL10-NEXT: v_mov_b32_e32 v29, v13 |
| ; GISEL10-NEXT: v_mov_b32_e32 v30, v14 |
| ; GISEL10-NEXT: v_mov_b32_e32 v31, v15 |
| ; GISEL10-NEXT: v_mov_b32_e32 v32, v16 |
| ; GISEL10-NEXT: v_mov_b32_e32 v33, v17 |
| ; GISEL10-NEXT: v_mov_b32_e32 v34, v18 |
| ; GISEL10-NEXT: v_mov_b32_e32 v35, v19 |
| ; GISEL10-NEXT: v_mov_b32_e32 v36, v20 |
| ; GISEL10-NEXT: v_mov_b32_e32 v37, v21 |
| ; GISEL10-NEXT: v_mov_b32_e32 v38, v22 |
| ; GISEL10-NEXT: v_mov_b32_e32 v39, v23 |
| ; GISEL10-NEXT: s_mov_b32 exec_lo, s12 |
| ; GISEL10-NEXT: s_and_saveexec_b32 s4, s9 |
| ; GISEL10-NEXT: s_cbranch_execz .LBB5_2 |
| ; GISEL10-NEXT: ; %bb.1: ; %shader |
| ; GISEL10-NEXT: s_or_saveexec_b32 s9, -1 |
| ; GISEL10-NEXT: s_getpc_b64 s[0:1] |
| ; GISEL10-NEXT: s_add_u32 s0, s0, write_v0_v15@gotpcrel32@lo+4 |
| ; GISEL10-NEXT: s_addc_u32 s1, s1, write_v0_v15@gotpcrel32@hi+12 |
| ; GISEL10-NEXT: v_mov_b32_e32 v0, v24 |
| ; GISEL10-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x0 |
| ; GISEL10-NEXT: v_mov_b32_e32 v1, v25 |
| ; GISEL10-NEXT: v_mov_b32_e32 v2, v26 |
| ; GISEL10-NEXT: v_mov_b32_e32 v3, v27 |
| ; GISEL10-NEXT: v_mov_b32_e32 v4, v28 |
| ; GISEL10-NEXT: v_mov_b32_e32 v5, v29 |
| ; GISEL10-NEXT: v_mov_b32_e32 v6, v30 |
| ; GISEL10-NEXT: v_mov_b32_e32 v7, v31 |
| ; GISEL10-NEXT: v_mov_b32_e32 v8, v32 |
| ; GISEL10-NEXT: v_mov_b32_e32 v9, v33 |
| ; GISEL10-NEXT: v_mov_b32_e32 v10, v34 |
| ; GISEL10-NEXT: v_mov_b32_e32 v11, v35 |
| ; GISEL10-NEXT: v_mov_b32_e32 v12, v36 |
| ; GISEL10-NEXT: v_mov_b32_e32 v13, v37 |
| ; GISEL10-NEXT: v_mov_b32_e32 v14, v38 |
| ; GISEL10-NEXT: v_mov_b32_e32 v15, v39 |
| ; GISEL10-NEXT: s_mov_b64 s[0:1], s[48:49] |
| ; GISEL10-NEXT: s_mov_b64 s[2:3], s[50:51] |
| ; GISEL10-NEXT: s_waitcnt lgkmcnt(0) |
| ; GISEL10-NEXT: s_swappc_b64 s[30:31], s[12:13] |
| ; GISEL10-NEXT: v_mov_b32_e32 v24, v0 |
| ; GISEL10-NEXT: v_mov_b32_e32 v25, v1 |
| ; GISEL10-NEXT: v_mov_b32_e32 v26, v2 |
| ; GISEL10-NEXT: v_mov_b32_e32 v27, v3 |
| ; GISEL10-NEXT: v_mov_b32_e32 v28, v4 |
| ; GISEL10-NEXT: v_mov_b32_e32 v29, v5 |
| ; GISEL10-NEXT: v_mov_b32_e32 v30, v6 |
| ; GISEL10-NEXT: v_mov_b32_e32 v31, v7 |
| ; GISEL10-NEXT: v_mov_b32_e32 v32, v8 |
| ; GISEL10-NEXT: v_mov_b32_e32 v33, v9 |
| ; GISEL10-NEXT: v_mov_b32_e32 v34, v10 |
| ; GISEL10-NEXT: v_mov_b32_e32 v35, v11 |
| ; GISEL10-NEXT: v_mov_b32_e32 v36, v12 |
| ; GISEL10-NEXT: v_mov_b32_e32 v37, v13 |
| ; GISEL10-NEXT: v_mov_b32_e32 v38, v14 |
| ; GISEL10-NEXT: v_mov_b32_e32 v39, v15 |
| ; GISEL10-NEXT: s_mov_b32 exec_lo, s9 |
| ; GISEL10-NEXT: ; kill: def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 killed $exec |
| ; GISEL10-NEXT: .LBB5_2: ; %tail |
| ; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s4 |
| ; GISEL10-NEXT: v_mov_b32_e32 v8, v24 |
| ; GISEL10-NEXT: v_mov_b32_e32 v9, v25 |
| ; GISEL10-NEXT: v_mov_b32_e32 v10, v26 |
| ; GISEL10-NEXT: v_mov_b32_e32 v11, v27 |
| ; GISEL10-NEXT: v_mov_b32_e32 v12, v28 |
| ; GISEL10-NEXT: v_mov_b32_e32 v13, v29 |
| ; GISEL10-NEXT: v_mov_b32_e32 v14, v30 |
| ; GISEL10-NEXT: v_mov_b32_e32 v15, v31 |
| ; GISEL10-NEXT: v_mov_b32_e32 v16, v32 |
| ; GISEL10-NEXT: v_mov_b32_e32 v17, v33 |
| ; GISEL10-NEXT: v_mov_b32_e32 v18, v34 |
| ; GISEL10-NEXT: v_mov_b32_e32 v19, v35 |
| ; GISEL10-NEXT: v_mov_b32_e32 v20, v36 |
| ; GISEL10-NEXT: v_mov_b32_e32 v21, v37 |
| ; GISEL10-NEXT: v_mov_b32_e32 v22, v38 |
| ; GISEL10-NEXT: v_mov_b32_e32 v23, v39 |
| ; GISEL10-NEXT: s_mov_b32 s0, s6 |
| ; GISEL10-NEXT: s_mov_b32 s1, s7 |
| ; GISEL10-NEXT: s_mov_b32 s2, s8 |
| ; GISEL10-NEXT: s_mov_b32 exec_lo, s5 |
| ; GISEL10-NEXT: s_setpc_b64 s[10:11] |
| ; |
| ; DAGISEL10-LABEL: wwm_write_to_arg_reg: |
| ; DAGISEL10: ; %bb.0: ; %entry |
| ; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL10-NEXT: s_mov_b32 s32, 0 |
| ; DAGISEL10-NEXT: s_or_saveexec_b32 s11, -1 |
| ; DAGISEL10-NEXT: s_or_saveexec_b32 s6, -1 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v39, v23 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v38, v22 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v37, v21 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v36, v20 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v35, v19 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v34, v18 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v33, v17 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v32, v16 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v31, v15 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v30, v14 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v29, v13 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v28, v12 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v27, v11 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v26, v10 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v25, v9 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v24, v8 |
| ; DAGISEL10-NEXT: s_mov_b32 exec_lo, s6 |
| ; DAGISEL10-NEXT: s_mov_b32 s9, s4 |
| ; DAGISEL10-NEXT: s_mov_b32 s8, s3 |
| ; DAGISEL10-NEXT: s_mov_b32 s4, s2 |
| ; DAGISEL10-NEXT: s_mov_b32 s6, s1 |
| ; DAGISEL10-NEXT: s_mov_b32 s7, s0 |
| ; DAGISEL10-NEXT: s_and_saveexec_b32 s10, s11 |
| ; DAGISEL10-NEXT: s_cbranch_execz .LBB5_2 |
| ; DAGISEL10-NEXT: ; %bb.1: ; %shader |
| ; DAGISEL10-NEXT: s_or_saveexec_b32 s11, -1 |
| ; DAGISEL10-NEXT: s_getpc_b64 s[0:1] |
| ; DAGISEL10-NEXT: s_add_u32 s0, s0, write_v0_v15@gotpcrel32@lo+4 |
| ; DAGISEL10-NEXT: s_addc_u32 s1, s1, write_v0_v15@gotpcrel32@hi+12 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v0, v24 |
| ; DAGISEL10-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x0 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v1, v25 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v2, v26 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v3, v27 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v4, v28 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v5, v29 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v6, v30 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v7, v31 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v8, v32 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v9, v33 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v10, v34 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v11, v35 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v12, v36 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v13, v37 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v14, v38 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v15, v39 |
| ; DAGISEL10-NEXT: s_mov_b64 s[0:1], s[48:49] |
| ; DAGISEL10-NEXT: s_mov_b64 s[2:3], s[50:51] |
| ; DAGISEL10-NEXT: s_waitcnt lgkmcnt(0) |
| ; DAGISEL10-NEXT: s_swappc_b64 s[30:31], s[12:13] |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v40, v0 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v41, v1 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v42, v2 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v43, v3 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v44, v4 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v45, v5 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v46, v6 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v47, v7 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v48, v8 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v49, v9 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v50, v10 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v51, v11 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v52, v12 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v53, v13 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v54, v14 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v55, v15 |
| ; DAGISEL10-NEXT: s_mov_b32 exec_lo, s11 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v24, v40 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v25, v41 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v26, v42 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v27, v43 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v28, v44 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v29, v45 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v30, v46 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v31, v47 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v32, v48 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v33, v49 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v34, v50 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v35, v51 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v36, v52 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v37, v53 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v38, v54 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v39, v55 |
| ; DAGISEL10-NEXT: .LBB5_2: ; %tail |
| ; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s10 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v8, v24 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v9, v25 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v10, v26 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v11, v27 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v12, v28 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v13, v29 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v14, v30 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v15, v31 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v16, v32 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v17, v33 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v18, v34 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v19, v35 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v20, v36 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v21, v37 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v22, v38 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v23, v39 |
| ; DAGISEL10-NEXT: s_mov_b32 s0, s7 |
| ; DAGISEL10-NEXT: s_mov_b32 s1, s6 |
| ; DAGISEL10-NEXT: s_mov_b32 s2, s4 |
| ; DAGISEL10-NEXT: s_mov_b32 exec_lo, s5 |
| ; DAGISEL10-NEXT: s_setpc_b64 s[8:9] |
| entry: |
| %entry_exec = call i1 @llvm.amdgcn.init.whole.wave() |
| br i1 %entry_exec, label %shader, label %tail |
| |
| shader: |
| %v0.15 = call amdgpu_gfx <16 x i32> @write_v0_v15(<16 x i32> %vgpr) |
| %vgpr.wwm = call <16 x i32> @llvm.amdgcn.strict.wwm.v16i32(<16 x i32> %v0.15) |
| |
| br label %tail |
| |
| tail: |
| %vgpr.args = phi <16 x i32> [%vgpr, %entry], [%vgpr.wwm, %shader] |
| call void(ptr, i32, <3 x i32>, <16 x i32>, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 %exec, <3 x i32> inreg %sgpr, <16 x i32> %vgpr.args, i32 0) |
| unreachable |
| } |
| |
| ; Since functions that contain amdgcn.init.whole.wave do not preserve the inactive |
| ; lanes of any VGPRs, the middle end will explicitly preserve them if needed by adding |
| ; dummy VGPR arguments. Since only the inactive lanes are important, we need to make |
| ; it clear to the backend that it's safe to allocate v9's active lanes inside |
| ; shader. This is achieved by using the llvm.amdgcn.dead intrinsic. |
| define amdgpu_cs_chain void @with_inactive_vgprs(ptr inreg %callee, i32 inreg %exec, i32 inreg %sgpr, i32 %active.vgpr, i32 %inactive.vgpr) { |
| ; GISEL12-LABEL: with_inactive_vgprs: |
| ; GISEL12: ; %bb.0: ; %entry |
| ; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GISEL12-NEXT: s_wait_expcnt 0x0 |
| ; GISEL12-NEXT: s_wait_samplecnt 0x0 |
| ; GISEL12-NEXT: s_wait_bvhcnt 0x0 |
| ; GISEL12-NEXT: s_wait_kmcnt 0x0 |
| ; GISEL12-NEXT: s_or_saveexec_b32 s6, -1 |
| ; GISEL12-NEXT: s_mov_b32 s4, s0 |
| ; GISEL12-NEXT: s_mov_b32 s5, s1 |
| ; GISEL12-NEXT: s_mov_b32 s0, s3 |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: s_and_saveexec_b32 s1, s6 |
| ; GISEL12-NEXT: s_cbranch_execz .LBB6_2 |
| ; GISEL12-NEXT: ; %bb.1: ; %shader |
| ; GISEL12-NEXT: v_dual_mov_b32 v10, s5 :: v_dual_mov_b32 v9, s4 |
| ; GISEL12-NEXT: flat_load_b32 v11, v[9:10] |
| ; GISEL12-NEXT: ;;#ASMSTART |
| ; GISEL12-NEXT: ; use v0-7 |
| ; GISEL12-NEXT: ;;#ASMEND |
| ; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GISEL12-NEXT: v_add_nc_u32_e32 v8, v8, v11 |
| ; GISEL12-NEXT: flat_store_b32 v[9:10], v11 |
| ; GISEL12-NEXT: ; implicit-def: $vgpr9 |
| ; GISEL12-NEXT: .LBB6_2: ; %tail.block |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s1 |
| ; GISEL12-NEXT: s_mov_b32 exec_lo, s2 |
| ; GISEL12-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; DAGISEL12-LABEL: with_inactive_vgprs: |
| ; DAGISEL12: ; %bb.0: ; %entry |
| ; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_expcnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_samplecnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_bvhcnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_kmcnt 0x0 |
| ; DAGISEL12-NEXT: s_or_saveexec_b32 s6, -1 |
| ; DAGISEL12-NEXT: s_mov_b32 s5, s1 |
| ; DAGISEL12-NEXT: s_mov_b32 s4, s0 |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: s_and_saveexec_b32 s0, s6 |
| ; DAGISEL12-NEXT: s_cbranch_execz .LBB6_2 |
| ; DAGISEL12-NEXT: ; %bb.1: ; %shader |
| ; DAGISEL12-NEXT: v_dual_mov_b32 v10, s5 :: v_dual_mov_b32 v9, s4 |
| ; DAGISEL12-NEXT: flat_load_b32 v11, v[9:10] |
| ; DAGISEL12-NEXT: ;;#ASMSTART |
| ; DAGISEL12-NEXT: ; use v0-7 |
| ; DAGISEL12-NEXT: ;;#ASMEND |
| ; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; DAGISEL12-NEXT: v_add_nc_u32_e32 v8, v8, v11 |
| ; DAGISEL12-NEXT: flat_store_b32 v[9:10], v11 |
| ; DAGISEL12-NEXT: ; implicit-def: $vgpr9 |
| ; DAGISEL12-NEXT: .LBB6_2: ; %tail.block |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s0 |
| ; DAGISEL12-NEXT: s_mov_b32 s0, s3 |
| ; DAGISEL12-NEXT: s_mov_b32 exec_lo, s2 |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; GISEL10-LABEL: with_inactive_vgprs: |
| ; GISEL10: ; %bb.0: ; %entry |
| ; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL10-NEXT: s_or_saveexec_b32 s6, -1 |
| ; GISEL10-NEXT: s_mov_b32 s4, s0 |
| ; GISEL10-NEXT: s_mov_b32 s5, s1 |
| ; GISEL10-NEXT: s_mov_b32 s0, s3 |
| ; GISEL10-NEXT: s_and_saveexec_b32 s1, s6 |
| ; GISEL10-NEXT: s_cbranch_execz .LBB6_2 |
| ; GISEL10-NEXT: ; %bb.1: ; %shader |
| ; GISEL10-NEXT: v_mov_b32_e32 v10, s5 |
| ; GISEL10-NEXT: v_mov_b32_e32 v9, s4 |
| ; GISEL10-NEXT: flat_load_dword v11, v[9:10] |
| ; GISEL10-NEXT: ;;#ASMSTART |
| ; GISEL10-NEXT: ; use v0-7 |
| ; GISEL10-NEXT: ;;#ASMEND |
| ; GISEL10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GISEL10-NEXT: v_add_nc_u32_e32 v8, v8, v11 |
| ; GISEL10-NEXT: flat_store_dword v[9:10], v11 |
| ; GISEL10-NEXT: ; implicit-def: $vgpr9 |
| ; GISEL10-NEXT: .LBB6_2: ; %tail.block |
| ; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s1 |
| ; GISEL10-NEXT: s_mov_b32 exec_lo, s2 |
| ; GISEL10-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; DAGISEL10-LABEL: with_inactive_vgprs: |
| ; DAGISEL10: ; %bb.0: ; %entry |
| ; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL10-NEXT: s_or_saveexec_b32 s6, -1 |
| ; DAGISEL10-NEXT: s_mov_b32 s5, s1 |
| ; DAGISEL10-NEXT: s_mov_b32 s4, s0 |
| ; DAGISEL10-NEXT: s_and_saveexec_b32 s0, s6 |
| ; DAGISEL10-NEXT: s_cbranch_execz .LBB6_2 |
| ; DAGISEL10-NEXT: ; %bb.1: ; %shader |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v10, s5 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v9, s4 |
| ; DAGISEL10-NEXT: flat_load_dword v11, v[9:10] |
| ; DAGISEL10-NEXT: ;;#ASMSTART |
| ; DAGISEL10-NEXT: ; use v0-7 |
| ; DAGISEL10-NEXT: ;;#ASMEND |
| ; DAGISEL10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; DAGISEL10-NEXT: v_add_nc_u32_e32 v8, v8, v11 |
| ; DAGISEL10-NEXT: flat_store_dword v[9:10], v11 |
| ; DAGISEL10-NEXT: ; implicit-def: $vgpr9 |
| ; DAGISEL10-NEXT: .LBB6_2: ; %tail.block |
| ; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s0 |
| ; DAGISEL10-NEXT: s_mov_b32 s0, s3 |
| ; DAGISEL10-NEXT: s_mov_b32 exec_lo, s2 |
| ; DAGISEL10-NEXT: s_setpc_b64 s[4:5] |
| entry: |
| %imp.def = call i32 @llvm.amdgcn.dead() |
| %initial.exec = call i1 @llvm.amdgcn.init.whole.wave() |
| br i1 %initial.exec, label %shader, label %tail.block |
| |
| shader: ; preds = %entry |
| %use.another.vgpr = load i32, ptr %callee ; smth that won't be moved past the inline asm |
| call void asm sideeffect "; use v0-7", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() |
| store i32 %use.another.vgpr, ptr %callee |
| %active.vgpr.new = add i32 %active.vgpr, %use.another.vgpr |
| br label %tail.block |
| |
| tail.block: ; preds = %.exit27, %.exit49, %244, %243, %entry |
| %active.vgpr.arg = phi i32 [ %active.vgpr, %entry ], [ %active.vgpr.new, %shader ] |
| %inactive.vgpr.arg = phi i32 [ %inactive.vgpr, %entry ], [ %imp.def, %shader ] |
| %vgprs.0 = insertvalue { i32, i32 } poison, i32 %active.vgpr.arg, 0 |
| %vgprs = insertvalue { i32, i32 } %vgprs.0, i32 %inactive.vgpr.arg, 1 |
| call void (ptr, i32, i32, { i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.i32.sl_i32i32(ptr inreg %callee, i32 inreg %exec, i32 inreg %sgpr, { i32, i32} %vgprs, i32 0) |
| unreachable |
| } |
| |
| declare amdgpu_gfx <16 x i32> @write_v0_v15(<16 x i32>) |