| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s |
| ; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=FLAT %s |
| |
| define amdgpu_kernel void @break_inserted_outside_of_loop(ptr addrspace(1) %out, i32 %a) { |
| ; SI-LABEL: break_inserted_outside_of_loop: |
| ; SI: ; %bb.0: ; %main_body |
| ; SI-NEXT: s_load_dword s2, s[0:1], 0xb |
| ; SI-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_and_b32_e32 v0, s2, v0 |
| ; SI-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 |
| ; SI-NEXT: s_mov_b64 s[2:3], 0 |
| ; SI-NEXT: .LBB0_1: ; %ENDIF |
| ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; SI-NEXT: s_and_b64 s[4:5], exec, vcc |
| ; SI-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3] |
| ; SI-NEXT: s_andn2_b64 exec, exec, s[2:3] |
| ; SI-NEXT: s_cbranch_execnz .LBB0_1 |
| ; SI-NEXT: ; %bb.2: ; %ENDLOOP |
| ; SI-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_mov_b32_e32 v0, 0 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; FLAT-LABEL: break_inserted_outside_of_loop: |
| ; FLAT: ; %bb.0: ; %main_body |
| ; FLAT-NEXT: s_load_dword s2, s[0:1], 0x2c |
| ; FLAT-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 |
| ; FLAT-NEXT: s_waitcnt lgkmcnt(0) |
| ; FLAT-NEXT: v_and_b32_e32 v0, s2, v0 |
| ; FLAT-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; FLAT-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 |
| ; FLAT-NEXT: s_mov_b64 s[2:3], 0 |
| ; FLAT-NEXT: .LBB0_1: ; %ENDIF |
| ; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; FLAT-NEXT: s_and_b64 s[4:5], exec, vcc |
| ; FLAT-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3] |
| ; FLAT-NEXT: s_andn2_b64 exec, exec, s[2:3] |
| ; FLAT-NEXT: s_cbranch_execnz .LBB0_1 |
| ; FLAT-NEXT: ; %bb.2: ; %ENDLOOP |
| ; FLAT-NEXT: s_or_b64 exec, exec, s[2:3] |
| ; FLAT-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 |
| ; FLAT-NEXT: s_mov_b32 s3, 0xf000 |
| ; FLAT-NEXT: s_mov_b32 s2, -1 |
| ; FLAT-NEXT: v_mov_b32_e32 v0, 0 |
| ; FLAT-NEXT: s_waitcnt lgkmcnt(0) |
| ; FLAT-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| ; FLAT-NEXT: s_endpgm |
| main_body: |
| %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 |
| %0 = and i32 %a, %tid |
| %1 = trunc i32 %0 to i1 |
| br label %ENDIF |
| |
| ENDLOOP: |
| store i32 0, ptr addrspace(1) %out |
| ret void |
| |
| ENDIF: |
| br i1 %1, label %ENDLOOP, label %ENDIF |
| } |
| |
| define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) { |
| ; SI-LABEL: phi_cond_outside_loop: |
| ; SI: ; %bb.0: ; %entry |
| ; SI-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 |
| ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 |
| ; SI-NEXT: s_mov_b64 s[2:3], 0 |
| ; SI-NEXT: s_mov_b64 s[4:5], 0 |
| ; SI-NEXT: s_and_saveexec_b64 s[6:7], vcc |
| ; SI-NEXT: s_cbranch_execz .LBB1_2 |
| ; SI-NEXT: ; %bb.1: ; %else |
| ; SI-NEXT: s_load_dword s0, s[0:1], 0x9 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: s_cmp_eq_u32 s0, 0 |
| ; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 |
| ; SI-NEXT: s_and_b64 s[4:5], s[0:1], exec |
| ; SI-NEXT: .LBB1_2: ; %endif |
| ; SI-NEXT: s_or_b64 exec, exec, s[6:7] |
| ; SI-NEXT: .LBB1_3: ; %loop |
| ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; SI-NEXT: s_and_b64 s[0:1], exec, s[4:5] |
| ; SI-NEXT: s_or_b64 s[2:3], s[0:1], s[2:3] |
| ; SI-NEXT: s_andn2_b64 exec, exec, s[2:3] |
| ; SI-NEXT: s_cbranch_execnz .LBB1_3 |
| ; SI-NEXT: ; %bb.4: ; %exit |
| ; SI-NEXT: s_endpgm |
| ; |
| ; FLAT-LABEL: phi_cond_outside_loop: |
| ; FLAT: ; %bb.0: ; %entry |
| ; FLAT-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 |
| ; FLAT-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 |
| ; FLAT-NEXT: s_mov_b64 s[2:3], 0 |
| ; FLAT-NEXT: s_mov_b64 s[4:5], 0 |
| ; FLAT-NEXT: s_and_saveexec_b64 s[6:7], vcc |
| ; FLAT-NEXT: s_cbranch_execz .LBB1_2 |
| ; FLAT-NEXT: ; %bb.1: ; %else |
| ; FLAT-NEXT: s_load_dword s0, s[0:1], 0x24 |
| ; FLAT-NEXT: s_waitcnt lgkmcnt(0) |
| ; FLAT-NEXT: s_cmp_eq_u32 s0, 0 |
| ; FLAT-NEXT: s_cselect_b64 s[0:1], -1, 0 |
| ; FLAT-NEXT: s_and_b64 s[4:5], s[0:1], exec |
| ; FLAT-NEXT: .LBB1_2: ; %endif |
| ; FLAT-NEXT: s_or_b64 exec, exec, s[6:7] |
| ; FLAT-NEXT: .LBB1_3: ; %loop |
| ; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; FLAT-NEXT: s_and_b64 s[0:1], exec, s[4:5] |
| ; FLAT-NEXT: s_or_b64 s[2:3], s[0:1], s[2:3] |
| ; FLAT-NEXT: s_andn2_b64 exec, exec, s[2:3] |
| ; FLAT-NEXT: s_cbranch_execnz .LBB1_3 |
| ; FLAT-NEXT: ; %bb.4: ; %exit |
| ; FLAT-NEXT: s_endpgm |
| entry: |
| %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 |
| %0 = icmp eq i32 %tid , 0 |
| br i1 %0, label %if, label %else |
| |
| if: |
| br label %endif |
| |
| else: |
| %1 = icmp eq i32 %b, 0 |
| br label %endif |
| |
| endif: |
| %2 = phi i1 [0, %if], [%1, %else] |
| br label %loop |
| |
| loop: |
| br i1 %2, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define amdgpu_kernel void @switch_unreachable(ptr addrspace(1) %g, ptr addrspace(3) %l, i32 %x) nounwind { |
| ; SI-LABEL: switch_unreachable: |
| ; SI: ; %bb.0: ; %centry |
| ; |
| ; FLAT-LABEL: switch_unreachable: |
| ; FLAT: ; %bb.0: ; %centry |
| centry: |
| switch i32 %x, label %sw.default [ |
| i32 0, label %sw.bb |
| i32 60, label %sw.bb |
| ] |
| |
| sw.bb: |
| unreachable |
| |
| sw.default: |
| unreachable |
| |
| sw.epilog: |
| ret void |
| } |
| |
| declare float @llvm.fabs.f32(float) nounwind readnone |
| |
| define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind { |
| ; SI-LABEL: loop_land_info_assert: |
| ; SI: ; %bb.0: ; %entry |
| ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 |
| ; SI-NEXT: s_load_dword s8, s[0:1], 0x0 |
| ; SI-NEXT: v_bfrev_b32_e32 v0, 44 |
| ; SI-NEXT: s_mov_b32 s11, 0xf000 |
| ; SI-NEXT: s_mov_b32 s10, -1 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: s_cmp_lt_i32 s2, 1 |
| ; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 |
| ; SI-NEXT: s_cmp_lt_i32 s3, 4 |
| ; SI-NEXT: s_cselect_b64 s[6:7], -1, 0 |
| ; SI-NEXT: s_cmp_gt_i32 s3, 3 |
| ; SI-NEXT: s_cselect_b64 s[2:3], -1, 0 |
| ; SI-NEXT: s_and_b64 s[4:5], s[4:5], s[2:3] |
| ; SI-NEXT: v_cmp_lt_f32_e64 s[8:9], |s8|, v0 |
| ; SI-NEXT: s_and_b64 s[2:3], exec, s[6:7] |
| ; SI-NEXT: s_and_b64 s[4:5], exec, s[4:5] |
| ; SI-NEXT: s_and_b64 s[6:7], exec, s[8:9] |
| ; SI-NEXT: v_mov_b32_e32 v0, 3 |
| ; SI-NEXT: s_branch .LBB3_3 |
| ; SI-NEXT: .LBB3_1: ; in Loop: Header=BB3_3 Depth=1 |
| ; SI-NEXT: s_mov_b64 s[8:9], 0 |
| ; SI-NEXT: .LBB3_2: ; %Flow |
| ; SI-NEXT: ; in Loop: Header=BB3_3 Depth=1 |
| ; SI-NEXT: s_and_b64 vcc, exec, s[14:15] |
| ; SI-NEXT: s_cbranch_vccnz .LBB3_8 |
| ; SI-NEXT: .LBB3_3: ; %while.cond |
| ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; SI-NEXT: s_mov_b64 s[12:13], -1 |
| ; SI-NEXT: s_mov_b64 s[8:9], -1 |
| ; SI-NEXT: s_mov_b64 s[14:15], -1 |
| ; SI-NEXT: s_mov_b64 vcc, s[2:3] |
| ; SI-NEXT: s_cbranch_vccz .LBB3_2 |
| ; SI-NEXT: ; %bb.4: ; %convex.exit |
| ; SI-NEXT: ; in Loop: Header=BB3_3 Depth=1 |
| ; SI-NEXT: s_mov_b64 vcc, s[4:5] |
| ; SI-NEXT: s_cbranch_vccz .LBB3_1 |
| ; SI-NEXT: ; %bb.5: ; %if.end |
| ; SI-NEXT: ; in Loop: Header=BB3_3 Depth=1 |
| ; SI-NEXT: s_mov_b64 vcc, s[6:7] |
| ; SI-NEXT: s_cbranch_vccz .LBB3_7 |
| ; SI-NEXT: ; %bb.6: ; %if.else |
| ; SI-NEXT: ; in Loop: Header=BB3_3 Depth=1 |
| ; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0 |
| ; SI-NEXT: s_waitcnt vmcnt(0) |
| ; SI-NEXT: s_mov_b64 s[14:15], 0 |
| ; SI-NEXT: .LBB3_7: ; %Flow6 |
| ; SI-NEXT: ; in Loop: Header=BB3_3 Depth=1 |
| ; SI-NEXT: s_mov_b64 s[12:13], 0 |
| ; SI-NEXT: ; implicit-def: $sgpr8_sgpr9 |
| ; SI-NEXT: s_branch .LBB3_2 |
| ; SI-NEXT: .LBB3_8: ; %loop.exit.guard4 |
| ; SI-NEXT: ; in Loop: Header=BB3_3 Depth=1 |
| ; SI-NEXT: s_and_b64 vcc, exec, s[12:13] |
| ; SI-NEXT: s_cbranch_vccz .LBB3_3 |
| ; SI-NEXT: ; %bb.9: ; %loop.exit.guard |
| ; SI-NEXT: s_and_b64 vcc, exec, s[8:9] |
| ; SI-NEXT: s_cbranch_vccz .LBB3_13 |
| ; SI-NEXT: ; %bb.10: ; %for.cond.preheader |
| ; SI-NEXT: s_load_dword s0, s[0:1], 0xc |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: s_cmpk_lt_i32 s0, 0x3e8 |
| ; SI-NEXT: s_cbranch_scc0 .LBB3_13 |
| ; SI-NEXT: ; %bb.11: ; %for.body |
| ; SI-NEXT: s_and_b64 vcc, exec, 0 |
| ; SI-NEXT: .LBB3_12: ; %self.loop |
| ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; SI-NEXT: s_mov_b64 vcc, vcc |
| ; SI-NEXT: s_cbranch_vccz .LBB3_12 |
| ; SI-NEXT: .LBB3_13: ; %DummyReturnBlock |
| ; SI-NEXT: s_endpgm |
| ; |
| ; FLAT-LABEL: loop_land_info_assert: |
| ; FLAT: ; %bb.0: ; %entry |
| ; FLAT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| ; FLAT-NEXT: s_load_dword s8, s[0:1], 0x0 |
| ; FLAT-NEXT: v_bfrev_b32_e32 v0, 44 |
| ; FLAT-NEXT: s_mov_b32 s11, 0xf000 |
| ; FLAT-NEXT: s_mov_b32 s10, -1 |
| ; FLAT-NEXT: s_waitcnt lgkmcnt(0) |
| ; FLAT-NEXT: s_cmp_lt_i32 s2, 1 |
| ; FLAT-NEXT: s_cselect_b64 s[4:5], -1, 0 |
| ; FLAT-NEXT: s_cmp_lt_i32 s3, 4 |
| ; FLAT-NEXT: s_cselect_b64 s[6:7], -1, 0 |
| ; FLAT-NEXT: s_cmp_gt_i32 s3, 3 |
| ; FLAT-NEXT: s_cselect_b64 s[2:3], -1, 0 |
| ; FLAT-NEXT: s_and_b64 s[4:5], s[4:5], s[2:3] |
| ; FLAT-NEXT: v_cmp_lt_f32_e64 s[8:9], |s8|, v0 |
| ; FLAT-NEXT: s_and_b64 s[2:3], exec, s[6:7] |
| ; FLAT-NEXT: s_and_b64 s[4:5], exec, s[4:5] |
| ; FLAT-NEXT: s_and_b64 s[6:7], exec, s[8:9] |
| ; FLAT-NEXT: v_mov_b32_e32 v0, 3 |
| ; FLAT-NEXT: s_branch .LBB3_3 |
| ; FLAT-NEXT: .LBB3_1: ; in Loop: Header=BB3_3 Depth=1 |
| ; FLAT-NEXT: s_mov_b64 s[8:9], 0 |
| ; FLAT-NEXT: .LBB3_2: ; %Flow |
| ; FLAT-NEXT: ; in Loop: Header=BB3_3 Depth=1 |
| ; FLAT-NEXT: s_and_b64 vcc, exec, s[14:15] |
| ; FLAT-NEXT: s_cbranch_vccnz .LBB3_8 |
| ; FLAT-NEXT: .LBB3_3: ; %while.cond |
| ; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; FLAT-NEXT: s_mov_b64 s[12:13], -1 |
| ; FLAT-NEXT: s_mov_b64 s[8:9], -1 |
| ; FLAT-NEXT: s_mov_b64 s[14:15], -1 |
| ; FLAT-NEXT: s_mov_b64 vcc, s[2:3] |
| ; FLAT-NEXT: s_cbranch_vccz .LBB3_2 |
| ; FLAT-NEXT: ; %bb.4: ; %convex.exit |
| ; FLAT-NEXT: ; in Loop: Header=BB3_3 Depth=1 |
| ; FLAT-NEXT: s_mov_b64 vcc, s[4:5] |
| ; FLAT-NEXT: s_cbranch_vccz .LBB3_1 |
| ; FLAT-NEXT: ; %bb.5: ; %if.end |
| ; FLAT-NEXT: ; in Loop: Header=BB3_3 Depth=1 |
| ; FLAT-NEXT: s_mov_b64 vcc, s[6:7] |
| ; FLAT-NEXT: s_cbranch_vccz .LBB3_7 |
| ; FLAT-NEXT: ; %bb.6: ; %if.else |
| ; FLAT-NEXT: ; in Loop: Header=BB3_3 Depth=1 |
| ; FLAT-NEXT: buffer_store_dword v0, off, s[8:11], 0 |
| ; FLAT-NEXT: s_waitcnt vmcnt(0) |
| ; FLAT-NEXT: s_mov_b64 s[14:15], 0 |
| ; FLAT-NEXT: .LBB3_7: ; %Flow6 |
| ; FLAT-NEXT: ; in Loop: Header=BB3_3 Depth=1 |
| ; FLAT-NEXT: s_mov_b64 s[12:13], 0 |
| ; FLAT-NEXT: ; implicit-def: $sgpr8_sgpr9 |
| ; FLAT-NEXT: s_branch .LBB3_2 |
| ; FLAT-NEXT: .LBB3_8: ; %loop.exit.guard4 |
| ; FLAT-NEXT: ; in Loop: Header=BB3_3 Depth=1 |
| ; FLAT-NEXT: s_and_b64 vcc, exec, s[12:13] |
| ; FLAT-NEXT: s_cbranch_vccz .LBB3_3 |
| ; FLAT-NEXT: ; %bb.9: ; %loop.exit.guard |
| ; FLAT-NEXT: s_and_b64 vcc, exec, s[8:9] |
| ; FLAT-NEXT: s_cbranch_vccz .LBB3_13 |
| ; FLAT-NEXT: ; %bb.10: ; %for.cond.preheader |
| ; FLAT-NEXT: s_load_dword s0, s[0:1], 0x30 |
| ; FLAT-NEXT: s_waitcnt lgkmcnt(0) |
| ; FLAT-NEXT: s_cmpk_lt_i32 s0, 0x3e8 |
| ; FLAT-NEXT: s_cbranch_scc0 .LBB3_13 |
| ; FLAT-NEXT: ; %bb.11: ; %for.body |
| ; FLAT-NEXT: s_and_b64 vcc, exec, 0 |
| ; FLAT-NEXT: .LBB3_12: ; %self.loop |
| ; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; FLAT-NEXT: s_mov_b64 vcc, vcc |
| ; FLAT-NEXT: s_cbranch_vccz .LBB3_12 |
| ; FLAT-NEXT: .LBB3_13: ; %DummyReturnBlock |
| ; FLAT-NEXT: s_endpgm |
| entry: |
| %cmp = icmp sgt i32 %c0, 0 |
| br label %while.cond.outer |
| |
| while.cond.outer: |
| %tmp = load float, ptr addrspace(1) undef |
| br label %while.cond |
| |
| while.cond: |
| %cmp1 = icmp slt i32 %c1, 4 |
| br i1 %cmp1, label %convex.exit, label %for.cond |
| |
| convex.exit: |
| %or = or i1 %cmp, %cmp1 |
| br i1 %or, label %return, label %if.end |
| |
| if.end: |
| %tmp3 = call float @llvm.fabs.f32(float %tmp) nounwind readnone |
| %cmp2 = fcmp olt float %tmp3, 0x3E80000000000000 |
| br i1 %cmp2, label %if.else, label %while.cond.outer |
| |
| if.else: |
| store volatile i32 3, ptr addrspace(1) undef, align 4 |
| br label %while.cond |
| |
| for.cond: |
| %cmp3 = icmp slt i32 %c3, 1000 |
| br i1 %cmp3, label %for.body, label %return |
| |
| for.body: |
| br i1 %cmp3, label %self.loop, label %if.end.2 |
| |
| if.end.2: |
| %or.cond2 = or i1 %cmp3, %arg |
| br i1 %or.cond2, label %return, label %for.cond |
| |
| self.loop: |
| br label %self.loop |
| |
| return: |
| ret void |
| } |
| |
| declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 |
| |
| attributes #0 = { nounwind readnone } |