blob: 27840c4f582d9f9693310839939ec30bc9162617 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck %s
; Make sure that the 16-bit constant 0x3c00 isn't folded as 0 into
; v_bfi_b32.
define i32 @mov16_bfi_fold_regression(half %arg, i32 %arg1) {
; CHECK-LABEL: mov16_bfi_fold_regression:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b16_e32 v2.l, 0x3c00
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; CHECK-NEXT: v_bfi_b32 v0, 0x7fff, v2, v0
; CHECK-NEXT: v_mov_b16_e32 v0.h, 0
; CHECK-NEXT: v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo
; CHECK-NEXT: s_setpc_b64 s[30:31]
bb:
%cmp = icmp eq i32 %arg1, 0
%call = call half @llvm.copysign.f16(half 0xH3C00, half %arg)
%select = select i1 %cmp, half 0xH3C00, half %call
%insertelement = insertelement <2 x half> zeroinitializer, half %select, i64 0
%bitcast = bitcast <2 x half> %insertelement to i32
ret i32 %bitcast
}
declare half @llvm.copysign.f16(half, half) #0
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }