| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s |
| |
| ; Make sure the branch targets are correct after lowering llvm.amdgcn.if |
| |
| define i32 @divergent_if_swap_brtarget_order0(i32 %value) { |
| ; CHECK-LABEL: divergent_if_swap_brtarget_order0: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 |
| ; CHECK-NEXT: ; implicit-def: $vgpr0 |
| ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_2 |
| ; CHECK-NEXT: ; %bb.1: ; %if.true |
| ; CHECK-NEXT: global_load_dword v0, v[0:1], off glc |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: .LBB0_2: ; %endif |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %c = icmp ne i32 %value, 0 |
| br i1 %c, label %if.true, label %endif |
| |
| if.true: |
| %val = load volatile i32, i32 addrspace(1)* undef |
| br label %endif |
| |
| endif: |
| %v = phi i32 [ %val, %if.true ], [ undef, %entry ] |
| ret i32 %v |
| } |
| |
| define i32 @divergent_if_swap_brtarget_order1(i32 %value) { |
| ; CHECK-LABEL: divergent_if_swap_brtarget_order1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 |
| ; CHECK-NEXT: ; implicit-def: $vgpr0 |
| ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc |
| ; CHECK-NEXT: s_cbranch_execz .LBB1_2 |
| ; CHECK-NEXT: ; %bb.1: ; %if.true |
| ; CHECK-NEXT: global_load_dword v0, v[0:1], off glc |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: .LBB1_2: ; %endif |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %c = icmp ne i32 %value, 0 |
| br i1 %c, label %if.true, label %endif |
| |
| endif: |
| %v = phi i32 [ %val, %if.true ], [ undef, %entry ] |
| ret i32 %v |
| |
| if.true: |
| %val = load volatile i32, i32 addrspace(1)* undef |
| br label %endif |
| } |
| |
| ; Make sure and 1 is inserted on llvm.amdgcn.if |
| define i32 @divergent_if_nonboolean_condition0(i32 %value) { |
| ; CHECK-LABEL: divergent_if_nonboolean_condition0: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 |
| ; CHECK-NEXT: ; implicit-def: $vgpr0 |
| ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc |
| ; CHECK-NEXT: s_cbranch_execz .LBB2_2 |
| ; CHECK-NEXT: ; %bb.1: ; %if.true |
| ; CHECK-NEXT: global_load_dword v0, v[0:1], off glc |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: .LBB2_2: ; %endif |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %c = trunc i32 %value to i1 |
| br i1 %c, label %if.true, label %endif |
| |
| if.true: |
| %val = load volatile i32, i32 addrspace(1)* undef |
| br label %endif |
| |
| endif: |
| %v = phi i32 [ %val, %if.true ], [ undef, %entry ] |
| ret i32 %v |
| } |
| |
| ; Make sure and 1 is inserted on llvm.amdgcn.if |
| define i32 @divergent_if_nonboolean_condition1(i32 addrspace(1)* %ptr) { |
| ; CHECK-LABEL: divergent_if_nonboolean_condition1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dword v0, v[0:1], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 |
| ; CHECK-NEXT: ; implicit-def: $vgpr0 |
| ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc |
| ; CHECK-NEXT: s_cbranch_execz .LBB3_2 |
| ; CHECK-NEXT: ; %bb.1: ; %if.true |
| ; CHECK-NEXT: global_load_dword v0, v[0:1], off glc |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: .LBB3_2: ; %endif |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %value = load i32, i32 addrspace(1)* %ptr |
| %c = trunc i32 %value to i1 |
| br i1 %c, label %if.true, label %endif |
| |
| if.true: |
| %val = load volatile i32, i32 addrspace(1)* undef |
| br label %endif |
| |
| endif: |
| %v = phi i32 [ %val, %if.true ], [ undef, %entry ] |
| ret i32 %v |
| } |
| |
| @external_constant = external addrspace(4) constant i32, align 4 |
| @const.ptr = external addrspace(4) constant float*, align 4 |
| |
| ; Make sure this case compiles. G_ICMP was mis-mapped due to having |
| ; the result register class constrained by llvm.amdgcn.if lowering. |
| define void @constrained_if_register_class() { |
| ; CHECK-LABEL: constrained_if_register_class: |
| ; CHECK: ; %bb.0: ; %bb |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_getpc_b64 s[4:5] |
| ; CHECK-NEXT: s_add_u32 s4, s4, external_constant@gotpcrel32@lo+4 |
| ; CHECK-NEXT: s_addc_u32 s5, s5, external_constant@gotpcrel32@hi+12 |
| ; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_cmp_lg_u32 s4, 0 |
| ; CHECK-NEXT: s_cbranch_scc1 .LBB4_4 |
| ; CHECK-NEXT: ; %bb.1: ; %bb2 |
| ; CHECK-NEXT: s_getpc_b64 s[6:7] |
| ; CHECK-NEXT: s_add_u32 s6, s6, const.ptr@gotpcrel32@lo+4 |
| ; CHECK-NEXT: s_addc_u32 s7, s7, const.ptr@gotpcrel32@hi+12 |
| ; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0 |
| ; CHECK-NEXT: s_mov_b32 s4, -1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dword v0, v0, s[6:7] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, 1.0, v0 |
| ; CHECK-NEXT: s_cbranch_vccnz .LBB4_3 |
| ; CHECK-NEXT: ; %bb.2: ; %bb7 |
| ; CHECK-NEXT: s_mov_b32 s4, 0 |
| ; CHECK-NEXT: .LBB4_3: ; %bb8 |
| ; CHECK-NEXT: s_cmp_lg_u32 s4, 0 |
| ; CHECK-NEXT: s_cbranch_scc0 .LBB4_5 |
| ; CHECK-NEXT: .LBB4_4: ; %bb12 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| ; CHECK-NEXT: .LBB4_5: ; %bb11 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 4.0 |
| ; CHECK-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| bb: |
| %tmp = load i32, i32 addrspace(4)* @external_constant |
| %ptr = load float*, float* addrspace(4)* @const.ptr |
| %tmp1 = icmp ne i32 %tmp, 0 |
| br i1 %tmp1, label %bb12, label %bb2 |
| |
| bb2: |
| %tmp4 = load float, float* %ptr, align 4 |
| %tmp5 = fcmp olt float %tmp4, 1.0 |
| %tmp6 = or i1 %tmp5, false |
| br i1 %tmp6, label %bb8, label %bb7 |
| |
| bb7: |
| br label %bb8 |
| |
| bb8: |
| %tmp9 = phi i32 [ 0, %bb7 ], [ -1, %bb2 ] |
| %tmp10 = icmp eq i32 %tmp9, 0 |
| br i1 %tmp10, label %bb11, label %bb12 |
| |
| bb11: |
| store float 4.0, float addrspace(5)* undef, align 4 |
| br label %bb12 |
| |
| bb12: |
| ret void |
| } |
| |
| define amdgpu_kernel void @break_loop(i32 %arg) { |
| ; CHECK-LABEL: break_loop: |
| ; CHECK: ; %bb.0: ; %bb |
| ; CHECK-NEXT: s_load_dword s2, s[4:5], 0x0 |
| ; CHECK-NEXT: s_mov_b64 s[0:1], 0 |
| ; CHECK-NEXT: ; implicit-def: $vgpr1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: v_subrev_u32_e32 v0, s2, v0 |
| ; CHECK-NEXT: s_branch .LBB5_2 |
| ; CHECK-NEXT: .LBB5_1: ; %Flow |
| ; CHECK-NEXT: ; in Loop: Header=BB5_2 Depth=1 |
| ; CHECK-NEXT: s_and_b64 s[2:3], exec, s[2:3] |
| ; CHECK-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] |
| ; CHECK-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; CHECK-NEXT: s_cbranch_execz .LBB5_4 |
| ; CHECK-NEXT: .LBB5_2: ; %bb1 |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: v_add_u32_e32 v1, 1, v1 |
| ; CHECK-NEXT: v_cmp_le_i32_e32 vcc, 0, v1 |
| ; CHECK-NEXT: s_mov_b64 s[2:3], -1 |
| ; CHECK-NEXT: s_cbranch_vccnz .LBB5_1 |
| ; CHECK-NEXT: ; %bb.3: ; %bb4 |
| ; CHECK-NEXT: ; in Loop: Header=BB5_2 Depth=1 |
| ; CHECK-NEXT: global_load_dword v2, v[0:1], off glc |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: v_cmp_ge_i32_e64 s[2:3], v0, v2 |
| ; CHECK-NEXT: s_branch .LBB5_1 |
| ; CHECK-NEXT: .LBB5_4: ; %bb9 |
| ; CHECK-NEXT: s_endpgm |
| bb: |
| %id = call i32 @llvm.amdgcn.workitem.id.x() |
| %tmp = sub i32 %id, %arg |
| br label %bb1 |
| |
| bb1: |
| %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ] |
| %lsr.iv.next = add i32 %lsr.iv, 1 |
| %cmp0 = icmp slt i32 %lsr.iv.next, 0 |
| br i1 %cmp0, label %bb4, label %bb9 |
| |
| bb4: |
| %load = load volatile i32, i32 addrspace(1)* undef, align 4 |
| %cmp1 = icmp slt i32 %tmp, %load |
| br i1 %cmp1, label %bb1, label %bb9 |
| |
| bb9: |
| ret void |
| } |
| |
| declare i32 @llvm.amdgcn.workitem.id.x() |