| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck %s |
| |
| ; Make sure that the 16-bit constant 0x3c00 isn't folded as 0 into |
| ; v_bfi_b32. |
| define i32 @mov16_bfi_fold_regression(half %arg, i32 %arg1) { |
| ; CHECK-LABEL: mov16_bfi_fold_regression: |
| ; CHECK: ; %bb.0: ; %bb |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: v_mov_b16_e32 v2.l, 0x3c00 |
| ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 |
| ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) |
| ; CHECK-NEXT: v_bfi_b32 v0, 0x7fff, v2, v0 |
| ; CHECK-NEXT: v_mov_b16_e32 v0.h, 0 |
| ; CHECK-NEXT: v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| bb: |
| %cmp = icmp eq i32 %arg1, 0 |
| %call = call half @llvm.copysign.f16(half 0xH3C00, half %arg) |
| %select = select i1 %cmp, half 0xH3C00, half %call |
| %insertelement = insertelement <2 x half> zeroinitializer, half %select, i64 0 |
| %bitcast = bitcast <2 x half> %insertelement to i32 |
| ret i32 %bitcast |
| } |
| |
| declare half @llvm.copysign.f16(half, half) #0 |
| |
| attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } |