blob: f788803a3c3585c33c2898c0320c919739bc9da7 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s
define <4 x half> @fadd_v4f16_neg(<4 x half> %first, <4 x half> %second) {
; GFX950-LABEL: fadd_v4f16_neg:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_add_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX950-NEXT: v_pk_add_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: fadd_v4f16_neg:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_pk_add_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX1250-NEXT: v_pk_add_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%neg = fneg <4 x half> %second
%add = fadd <4 x half> %first, %neg
ret <4 x half> %add
}
define <8 x half> @fadd_v8f16_neg(<8 x half> %first, <8 x half> %second) {
; GFX950-LABEL: fadd_v8f16_neg:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_add_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1]
; GFX950-NEXT: v_pk_add_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX950-NEXT: v_pk_add_f16 v2, v2, v6 neg_lo:[0,1] neg_hi:[0,1]
; GFX950-NEXT: v_pk_add_f16 v3, v3, v7 neg_lo:[0,1] neg_hi:[0,1]
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: fadd_v8f16_neg:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_pk_add_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1]
; GFX1250-NEXT: v_pk_add_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX1250-NEXT: v_pk_add_f16 v2, v2, v6 neg_lo:[0,1] neg_hi:[0,1]
; GFX1250-NEXT: v_pk_add_f16 v3, v3, v7 neg_lo:[0,1] neg_hi:[0,1]
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%neg = fneg <8 x half> %second
%add = fadd <8 x half> %first, %neg
ret <8 x half> %add
}
define <4 x half> @fsub_v4f16(<4 x half> %first, <4 x half> %second) {
; GFX950-LABEL: fsub_v4f16:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_add_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX950-NEXT: v_pk_add_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: fsub_v4f16:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_pk_add_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX1250-NEXT: v_pk_add_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%sub = fsub <4 x half> %first, %second
ret <4 x half> %sub
}
define <8 x half> @fsub_v8f16(<8 x half> %first, <8 x half> %second) {
; GFX950-LABEL: fsub_v8f16:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_add_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1]
; GFX950-NEXT: v_pk_add_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX950-NEXT: v_pk_add_f16 v2, v2, v6 neg_lo:[0,1] neg_hi:[0,1]
; GFX950-NEXT: v_pk_add_f16 v3, v3, v7 neg_lo:[0,1] neg_hi:[0,1]
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: fsub_v8f16:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_pk_add_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1]
; GFX1250-NEXT: v_pk_add_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX1250-NEXT: v_pk_add_f16 v2, v2, v6 neg_lo:[0,1] neg_hi:[0,1]
; GFX1250-NEXT: v_pk_add_f16 v3, v3, v7 neg_lo:[0,1] neg_hi:[0,1]
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%sub = fsub <8 x half> %first, %second
ret <8 x half> %sub
}
define <2 x half> @fneg_v2f16(<2 x half> %first) {
; GFX950-LABEL: fneg_v2f16:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: fneg_v2f16:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%neg = fneg <2 x half> %first
ret <2 x half> %neg
}
define <4 x half> @fneg_v4f16(<4 x half> %first) {
; GFX950-LABEL: fneg_v4f16:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX950-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: fneg_v4f16:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX1250-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%neg = fneg <4 x half> %first
ret <4 x half> %neg
}
define <4 x half> @fmul_v4f16_neg(<4 x half> %first, <4 x half> %second) {
; GFX950-LABEL: fmul_v4f16_neg:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX950-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: fmul_v4f16_neg:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX1250-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%neg = fneg <4 x half> %second
%mul = fmul <4 x half> %first, %neg
ret <4 x half> %mul
}
define <2 x half> @fabs_v2f16(<2 x half> %first) {
; GFX950-LABEL: fabs_v2f16:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: fabs_v2f16:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%abs = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> %first)
ret <2 x half> %abs
}
define <4 x half> @fabs_v4f16(<4 x half> %first) {
; GFX950-LABEL: fabs_v4f16:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
; GFX950-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: fabs_v4f16:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
; GFX1250-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%abs = tail call <4 x half> @llvm.fabs.v4f16(<4 x half> %first)
ret <4 x half> %abs
}
define <2 x half> @fneg_fabs_v2f16(<2 x half> %first) {
; GFX950-LABEL: fneg_fabs_v2f16:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_or_b32_e32 v0, 0x80008000, v0
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: fneg_fabs_v2f16:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_or_b32_e32 v0, 0x80008000, v0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%abs = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> %first)
%neg = fneg <2 x half> %abs
ret <2 x half> %neg
}
define <4 x half> @fneg_fabs_v4f16(<4 x half> %first) {
; GFX950-LABEL: fneg_fabs_v4f16:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_or_b32_e32 v0, 0x80008000, v0
; GFX950-NEXT: v_or_b32_e32 v1, 0x80008000, v1
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: fneg_fabs_v4f16:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_or_b32_e32 v0, 0x80008000, v0
; GFX1250-NEXT: v_or_b32_e32 v1, 0x80008000, v1
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%abs = tail call <4 x half> @llvm.fabs.v4f16(<4 x half> %first)
%neg = fneg <4 x half> %abs
ret <4 x half> %neg
}
define <8 x half> @fmul_v8f16_neg(<8 x half> %first, <8 x half> %second) {
; GFX950-LABEL: fmul_v8f16_neg:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_mul_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1]
; GFX950-NEXT: v_pk_mul_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX950-NEXT: v_pk_mul_f16 v2, v2, v6 neg_lo:[0,1] neg_hi:[0,1]
; GFX950-NEXT: v_pk_mul_f16 v3, v3, v7 neg_lo:[0,1] neg_hi:[0,1]
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: fmul_v8f16_neg:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_pk_mul_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1]
; GFX1250-NEXT: v_pk_mul_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX1250-NEXT: v_pk_mul_f16 v2, v2, v6 neg_lo:[0,1] neg_hi:[0,1]
; GFX1250-NEXT: v_pk_mul_f16 v3, v3, v7 neg_lo:[0,1] neg_hi:[0,1]
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%neg = fneg <8 x half> %second
%mul = fmul <8 x half> %first, %neg
ret <8 x half> %mul
}
define <4 x half> @fma_v4f16_neg(<4 x half> %first, <4 x half> %second, <4 x half> %third) {
; GFX950-LABEL: fma_v4f16_neg:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,1,0] neg_hi:[0,1,0]
; GFX950-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,1,0] neg_hi:[0,1,0]
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: fma_v4f16_neg:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,1,0] neg_hi:[0,1,0]
; GFX1250-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,1,0] neg_hi:[0,1,0]
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%neg = fneg <4 x half> %second
%fma = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %first, <4 x half> %neg, <4 x half> %third)
ret <4 x half> %fma
}
define <8 x half> @fma_v8f16_neg(<8 x half> %first, <8 x half> %second, <8 x half> %third) {
; GFX950-LABEL: fma_v8f16_neg:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_pk_fma_f16 v0, v0, v4, v8 neg_lo:[0,1,0] neg_hi:[0,1,0]
; GFX950-NEXT: v_pk_fma_f16 v1, v1, v5, v9 neg_lo:[0,1,0] neg_hi:[0,1,0]
; GFX950-NEXT: v_pk_fma_f16 v2, v2, v6, v10 neg_lo:[0,1,0] neg_hi:[0,1,0]
; GFX950-NEXT: v_pk_fma_f16 v3, v3, v7, v11 neg_lo:[0,1,0] neg_hi:[0,1,0]
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: fma_v8f16_neg:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_pk_fma_f16 v0, v0, v4, v8 neg_lo:[0,1,0] neg_hi:[0,1,0]
; GFX1250-NEXT: v_pk_fma_f16 v1, v1, v5, v9 neg_lo:[0,1,0] neg_hi:[0,1,0]
; GFX1250-NEXT: v_pk_fma_f16 v2, v2, v6, v10 neg_lo:[0,1,0] neg_hi:[0,1,0]
; GFX1250-NEXT: v_pk_fma_f16 v3, v3, v7, v11 neg_lo:[0,1,0] neg_hi:[0,1,0]
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%neg = fneg <8 x half> %second
%fma = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %first, <8 x half> %neg, <8 x half> %third)
ret <8 x half> %fma
}
define <4 x half> @fminnum_v4f16_neg(<4 x half> %first, <4 x half> %second) {
; GFX950-SDAG-LABEL: fminnum_v4f16_neg:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v3, v3 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-SDAG-NEXT: s_nop 0
; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v2
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-GISEL-LABEL: fminnum_v4f16_neg:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
; GFX950-GISEL-NEXT: v_pk_min_f16 v0, v0, v2
; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v3, v3 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-GISEL-NEXT: s_nop 0
; GFX950-GISEL-NEXT: v_pk_min_f16 v1, v1, v2
; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: fminnum_v4f16_neg:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v2
; GFX1250-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v3
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-GISEL-LABEL: fminnum_v4f16_neg:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v2
; GFX1250-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v3
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
%neg = fneg <4 x half> %second
%fmin = tail call <4 x half> @llvm.minnum.v4f16(<4 x half> %first, <4 x half> %neg)
ret <4 x half> %fmin
}
define <8 x half> @fminnum_v8f16_neg(<8 x half> %first, <8 x half> %second) {
; GFX950-SDAG-LABEL: fminnum_v8f16_neg:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v4
; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v5, v5 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v4
; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v6, v6 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
; GFX950-SDAG-NEXT: v_pk_min_f16 v2, v2, v4
; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v7, v7 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-SDAG-NEXT: s_nop 0
; GFX950-SDAG-NEXT: v_pk_min_f16 v3, v3, v4
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-GISEL-LABEL: fminnum_v8f16_neg:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
; GFX950-GISEL-NEXT: v_pk_min_f16 v0, v0, v4
; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v5, v5 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
; GFX950-GISEL-NEXT: v_pk_min_f16 v1, v1, v4
; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v6, v6 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
; GFX950-GISEL-NEXT: v_pk_min_f16 v2, v2, v4
; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v7, v7 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-GISEL-NEXT: s_nop 0
; GFX950-GISEL-NEXT: v_pk_min_f16 v3, v3, v4
; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: fminnum_v8f16_neg:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
; GFX1250-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v4
; GFX1250-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v5
; GFX1250-SDAG-NEXT: v_pk_min_num_f16 v2, v2, v6
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1250-SDAG-NEXT: v_pk_min_num_f16 v3, v3, v7
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-GISEL-LABEL: fminnum_v8f16_neg:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v4
; GFX1250-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v5
; GFX1250-GISEL-NEXT: v_pk_min_num_f16 v2, v2, v6
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1250-GISEL-NEXT: v_pk_min_num_f16 v3, v3, v7
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
%neg = fneg <8 x half> %second
%fmin = tail call <8 x half> @llvm.minnum.v8f16(<8 x half> %first, <8 x half> %neg)
ret <8 x half> %fmin
}
define <4 x half> @fmaxnum_v4f16_neg(<4 x half> %first, <4 x half> %second) {
; GFX950-SDAG-LABEL: fmaxnum_v4f16_neg:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v3, v3 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-SDAG-NEXT: s_nop 0
; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v2
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-GISEL-LABEL: fmaxnum_v4f16_neg:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v2
; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v3, v3 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-GISEL-NEXT: s_nop 0
; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v2
; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: fmaxnum_v4f16_neg:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v2
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v3
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-GISEL-LABEL: fmaxnum_v4f16_neg:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v2
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v3
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
%neg = fneg <4 x half> %second
%fmax = tail call <4 x half> @llvm.maxnum.v4f16(<4 x half> %first, <4 x half> %neg)
ret <4 x half> %fmax
}
define <8 x half> @fmaxnum_v8f16_neg(<8 x half> %first, <8 x half> %second) {
; GFX950-SDAG-LABEL: fmaxnum_v8f16_neg:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v4
; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v5, v5 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v4
; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v6, v6 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v4
; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v7, v7 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-SDAG-NEXT: s_nop 0
; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v4
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-GISEL-LABEL: fmaxnum_v8f16_neg:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0
; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1
; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v4
; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v5, v5 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v2, v2
; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v4
; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v6, v6 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-GISEL-NEXT: v_pk_max_f16 v3, v3, v3
; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v2, v4
; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v7, v7 neg_lo:[1,1] neg_hi:[1,1]
; GFX950-GISEL-NEXT: s_nop 0
; GFX950-GISEL-NEXT: v_pk_max_f16 v3, v3, v4
; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: fmaxnum_v8f16_neg:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v4
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v5
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v6
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v7
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-GISEL-LABEL: fmaxnum_v8f16_neg:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7 neg_lo:[1,1] neg_hi:[1,1]
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v4
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v5
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v6
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v7
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
%neg = fneg <8 x half> %second
%fmax = tail call <8 x half> @llvm.maxnum.v8f16(<8 x half> %first, <8 x half> %neg)
ret <8 x half> %fmax
}