| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-SDAG %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-GISEL %s |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s |
| |
| define <4 x half> @fadd_v4f16_neg(<4 x half> %first, <4 x half> %second) { |
| ; GFX950-LABEL: fadd_v4f16_neg: |
| ; GFX950: ; %bb.0: |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-NEXT: v_pk_add_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX950-NEXT: v_pk_add_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX950-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-LABEL: fadd_v4f16_neg: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: v_pk_add_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX1250-NEXT: v_pk_add_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX1250-NEXT: s_set_pc_i64 s[30:31] |
| %neg = fneg <4 x half> %second |
| %add = fadd <4 x half> %first, %neg |
| ret <4 x half> %add |
| } |
| |
| define <8 x half> @fadd_v8f16_neg(<8 x half> %first, <8 x half> %second) { |
| ; GFX950-LABEL: fadd_v8f16_neg: |
| ; GFX950: ; %bb.0: |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-NEXT: v_pk_add_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX950-NEXT: v_pk_add_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX950-NEXT: v_pk_add_f16 v2, v2, v6 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX950-NEXT: v_pk_add_f16 v3, v3, v7 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX950-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-LABEL: fadd_v8f16_neg: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: v_pk_add_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX1250-NEXT: v_pk_add_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX1250-NEXT: v_pk_add_f16 v2, v2, v6 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX1250-NEXT: v_pk_add_f16 v3, v3, v7 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX1250-NEXT: s_set_pc_i64 s[30:31] |
| %neg = fneg <8 x half> %second |
| %add = fadd <8 x half> %first, %neg |
| ret <8 x half> %add |
| } |
| |
| define <4 x half> @fsub_v4f16(<4 x half> %first, <4 x half> %second) { |
| ; GFX950-LABEL: fsub_v4f16: |
| ; GFX950: ; %bb.0: |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-NEXT: v_pk_add_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX950-NEXT: v_pk_add_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX950-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-LABEL: fsub_v4f16: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: v_pk_add_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX1250-NEXT: v_pk_add_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX1250-NEXT: s_set_pc_i64 s[30:31] |
| %sub = fsub <4 x half> %first, %second |
| ret <4 x half> %sub |
| } |
| |
| define <8 x half> @fsub_v8f16(<8 x half> %first, <8 x half> %second) { |
| ; GFX950-LABEL: fsub_v8f16: |
| ; GFX950: ; %bb.0: |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-NEXT: v_pk_add_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX950-NEXT: v_pk_add_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX950-NEXT: v_pk_add_f16 v2, v2, v6 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX950-NEXT: v_pk_add_f16 v3, v3, v7 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX950-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-LABEL: fsub_v8f16: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: v_pk_add_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX1250-NEXT: v_pk_add_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX1250-NEXT: v_pk_add_f16 v2, v2, v6 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX1250-NEXT: v_pk_add_f16 v3, v3, v7 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX1250-NEXT: s_set_pc_i64 s[30:31] |
| %sub = fsub <8 x half> %first, %second |
| ret <8 x half> %sub |
| } |
| |
| define <2 x half> @fneg_v2f16(<2 x half> %first) { |
| ; GFX950-LABEL: fneg_v2f16: |
| ; GFX950: ; %bb.0: |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 |
| ; GFX950-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-LABEL: fneg_v2f16: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 |
| ; GFX1250-NEXT: s_set_pc_i64 s[30:31] |
| %neg = fneg <2 x half> %first |
| ret <2 x half> %neg |
| } |
| |
| define <4 x half> @fneg_v4f16(<4 x half> %first) { |
| ; GFX950-LABEL: fneg_v4f16: |
| ; GFX950: ; %bb.0: |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 |
| ; GFX950-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 |
| ; GFX950-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-LABEL: fneg_v4f16: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 |
| ; GFX1250-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 |
| ; GFX1250-NEXT: s_set_pc_i64 s[30:31] |
| %neg = fneg <4 x half> %first |
| ret <4 x half> %neg |
| } |
| |
| define <4 x half> @fmul_v4f16_neg(<4 x half> %first, <4 x half> %second) { |
| ; GFX950-LABEL: fmul_v4f16_neg: |
| ; GFX950: ; %bb.0: |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX950-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX950-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-LABEL: fmul_v4f16_neg: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX1250-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX1250-NEXT: s_set_pc_i64 s[30:31] |
| %neg = fneg <4 x half> %second |
| %mul = fmul <4 x half> %first, %neg |
| ret <4 x half> %mul |
| } |
| |
| define <2 x half> @fabs_v2f16(<2 x half> %first) { |
| ; GFX950-LABEL: fabs_v2f16: |
| ; GFX950: ; %bb.0: |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 |
| ; GFX950-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-LABEL: fabs_v2f16: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 |
| ; GFX1250-NEXT: s_set_pc_i64 s[30:31] |
| %abs = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> %first) |
| ret <2 x half> %abs |
| } |
| |
| define <4 x half> @fabs_v4f16(<4 x half> %first) { |
| ; GFX950-LABEL: fabs_v4f16: |
| ; GFX950: ; %bb.0: |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 |
| ; GFX950-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1 |
| ; GFX950-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-LABEL: fabs_v4f16: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 |
| ; GFX1250-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1 |
| ; GFX1250-NEXT: s_set_pc_i64 s[30:31] |
| %abs = tail call <4 x half> @llvm.fabs.v4f16(<4 x half> %first) |
| ret <4 x half> %abs |
| } |
| |
| define <2 x half> @fneg_fabs_v2f16(<2 x half> %first) { |
| ; GFX950-LABEL: fneg_fabs_v2f16: |
| ; GFX950: ; %bb.0: |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-NEXT: v_or_b32_e32 v0, 0x80008000, v0 |
| ; GFX950-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-LABEL: fneg_fabs_v2f16: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: v_or_b32_e32 v0, 0x80008000, v0 |
| ; GFX1250-NEXT: s_set_pc_i64 s[30:31] |
| %abs = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> %first) |
| %neg = fneg <2 x half> %abs |
| ret <2 x half> %neg |
| } |
| |
| define <4 x half> @fneg_fabs_v4f16(<4 x half> %first) { |
| ; GFX950-LABEL: fneg_fabs_v4f16: |
| ; GFX950: ; %bb.0: |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-NEXT: v_or_b32_e32 v0, 0x80008000, v0 |
| ; GFX950-NEXT: v_or_b32_e32 v1, 0x80008000, v1 |
| ; GFX950-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-LABEL: fneg_fabs_v4f16: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: v_or_b32_e32 v0, 0x80008000, v0 |
| ; GFX1250-NEXT: v_or_b32_e32 v1, 0x80008000, v1 |
| ; GFX1250-NEXT: s_set_pc_i64 s[30:31] |
| %abs = tail call <4 x half> @llvm.fabs.v4f16(<4 x half> %first) |
| %neg = fneg <4 x half> %abs |
| ret <4 x half> %neg |
| } |
| |
| define <8 x half> @fmul_v8f16_neg(<8 x half> %first, <8 x half> %second) { |
| ; GFX950-LABEL: fmul_v8f16_neg: |
| ; GFX950: ; %bb.0: |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-NEXT: v_pk_mul_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX950-NEXT: v_pk_mul_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX950-NEXT: v_pk_mul_f16 v2, v2, v6 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX950-NEXT: v_pk_mul_f16 v3, v3, v7 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX950-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-LABEL: fmul_v8f16_neg: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: v_pk_mul_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX1250-NEXT: v_pk_mul_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX1250-NEXT: v_pk_mul_f16 v2, v2, v6 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX1250-NEXT: v_pk_mul_f16 v3, v3, v7 neg_lo:[0,1] neg_hi:[0,1] |
| ; GFX1250-NEXT: s_set_pc_i64 s[30:31] |
| %neg = fneg <8 x half> %second |
| %mul = fmul <8 x half> %first, %neg |
| ret <8 x half> %mul |
| } |
| |
| define <4 x half> @fma_v4f16_neg(<4 x half> %first, <4 x half> %second, <4 x half> %third) { |
| ; GFX950-LABEL: fma_v4f16_neg: |
| ; GFX950: ; %bb.0: |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,1,0] neg_hi:[0,1,0] |
| ; GFX950-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,1,0] neg_hi:[0,1,0] |
| ; GFX950-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-LABEL: fma_v4f16_neg: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,1,0] neg_hi:[0,1,0] |
| ; GFX1250-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,1,0] neg_hi:[0,1,0] |
| ; GFX1250-NEXT: s_set_pc_i64 s[30:31] |
| %neg = fneg <4 x half> %second |
| %fma = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %first, <4 x half> %neg, <4 x half> %third) |
| ret <4 x half> %fma |
| } |
| |
| define <8 x half> @fma_v8f16_neg(<8 x half> %first, <8 x half> %second, <8 x half> %third) { |
| ; GFX950-LABEL: fma_v8f16_neg: |
| ; GFX950: ; %bb.0: |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-NEXT: v_pk_fma_f16 v0, v0, v4, v8 neg_lo:[0,1,0] neg_hi:[0,1,0] |
| ; GFX950-NEXT: v_pk_fma_f16 v1, v1, v5, v9 neg_lo:[0,1,0] neg_hi:[0,1,0] |
| ; GFX950-NEXT: v_pk_fma_f16 v2, v2, v6, v10 neg_lo:[0,1,0] neg_hi:[0,1,0] |
| ; GFX950-NEXT: v_pk_fma_f16 v3, v3, v7, v11 neg_lo:[0,1,0] neg_hi:[0,1,0] |
| ; GFX950-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-LABEL: fma_v8f16_neg: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: v_pk_fma_f16 v0, v0, v4, v8 neg_lo:[0,1,0] neg_hi:[0,1,0] |
| ; GFX1250-NEXT: v_pk_fma_f16 v1, v1, v5, v9 neg_lo:[0,1,0] neg_hi:[0,1,0] |
| ; GFX1250-NEXT: v_pk_fma_f16 v2, v2, v6, v10 neg_lo:[0,1,0] neg_hi:[0,1,0] |
| ; GFX1250-NEXT: v_pk_fma_f16 v3, v3, v7, v11 neg_lo:[0,1,0] neg_hi:[0,1,0] |
| ; GFX1250-NEXT: s_set_pc_i64 s[30:31] |
| %neg = fneg <8 x half> %second |
| %fma = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %first, <8 x half> %neg, <8 x half> %third) |
| ret <8 x half> %fma |
| } |
| |
| define <4 x half> @fminnum_v4f16_neg(<4 x half> %first, <4 x half> %second) { |
| ; GFX950-SDAG-LABEL: fminnum_v4f16_neg: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v3, v3 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-SDAG-NEXT: s_nop 0 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v2 |
| ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-GISEL-LABEL: fminnum_v4f16_neg: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v3, v3 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-GISEL-NEXT: s_nop 0 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v1, v1, v2 |
| ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: fminnum_v4f16_neg: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX1250-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v2 |
| ; GFX1250-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v3 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX1250-GISEL-LABEL: fminnum_v4f16_neg: |
| ; GFX1250-GISEL: ; %bb.0: |
| ; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX1250-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v2 |
| ; GFX1250-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v3 |
| ; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] |
| %neg = fneg <4 x half> %second |
| %fmin = tail call <4 x half> @llvm.minnum.v4f16(<4 x half> %first, <4 x half> %neg) |
| ret <4 x half> %fmin |
| } |
| |
| define <8 x half> @fminnum_v8f16_neg(<8 x half> %first, <8 x half> %second) { |
| ; GFX950-SDAG-LABEL: fminnum_v8f16_neg: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v4 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v5, v5 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v4 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v6, v6 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v2, v2, v4 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v7, v7 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-SDAG-NEXT: s_nop 0 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v3, v3, v4 |
| ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-GISEL-LABEL: fminnum_v8f16_neg: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v0, v0, v4 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v5, v5 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v1, v1, v4 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v6, v6 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v2, v2, v4 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v7, v7 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-GISEL-NEXT: s_nop 0 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v3, v3, v4 |
| ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: fminnum_v8f16_neg: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 |
| ; GFX1250-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v4 |
| ; GFX1250-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v5 |
| ; GFX1250-SDAG-NEXT: v_pk_min_num_f16 v2, v2, v6 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) |
| ; GFX1250-SDAG-NEXT: v_pk_min_num_f16 v3, v3, v7 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX1250-GISEL-LABEL: fminnum_v8f16_neg: |
| ; GFX1250-GISEL: ; %bb.0: |
| ; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v4 |
| ; GFX1250-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v5 |
| ; GFX1250-GISEL-NEXT: v_pk_min_num_f16 v2, v2, v6 |
| ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) |
| ; GFX1250-GISEL-NEXT: v_pk_min_num_f16 v3, v3, v7 |
| ; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] |
| %neg = fneg <8 x half> %second |
| %fmin = tail call <8 x half> @llvm.minnum.v8f16(<8 x half> %first, <8 x half> %neg) |
| ret <8 x half> %fmin |
| } |
| |
| define <4 x half> @fmaxnum_v4f16_neg(<4 x half> %first, <4 x half> %second) { |
| ; GFX950-SDAG-LABEL: fmaxnum_v4f16_neg: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v2 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v3, v3 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-SDAG-NEXT: s_nop 0 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v2 |
| ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-GISEL-LABEL: fmaxnum_v4f16_neg: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v2 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v3, v3 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-GISEL-NEXT: s_nop 0 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v2 |
| ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: fmaxnum_v4f16_neg: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v2 |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v3 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX1250-GISEL-LABEL: fmaxnum_v4f16_neg: |
| ; GFX1250-GISEL: ; %bb.0: |
| ; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v2 |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v3 |
| ; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] |
| %neg = fneg <4 x half> %second |
| %fmax = tail call <4 x half> @llvm.maxnum.v4f16(<4 x half> %first, <4 x half> %neg) |
| ret <4 x half> %fmax |
| } |
| |
| define <8 x half> @fmaxnum_v8f16_neg(<8 x half> %first, <8 x half> %second) { |
| ; GFX950-SDAG-LABEL: fmaxnum_v8f16_neg: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v4 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v5, v5 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v4 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v6, v6 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v4 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v7, v7 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-SDAG-NEXT: s_nop 0 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v4 |
| ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-GISEL-LABEL: fmaxnum_v8f16_neg: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v4 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v5, v5 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v4 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v6, v6 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v2, v4 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v7, v7 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX950-GISEL-NEXT: s_nop 0 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v3, v3, v4 |
| ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-SDAG-LABEL: fmaxnum_v8f16_neg: |
| ; GFX1250-SDAG: ; %bb.0: |
| ; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v4 |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v5 |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v6 |
| ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) |
| ; GFX1250-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v7 |
| ; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] |
| ; |
| ; GFX1250-GISEL-LABEL: fmaxnum_v8f16_neg: |
| ; GFX1250-GISEL: ; %bb.0: |
| ; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7 neg_lo:[1,1] neg_hi:[1,1] |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v4 |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v5 |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v6 |
| ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) |
| ; GFX1250-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v7 |
| ; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] |
| %neg = fneg <8 x half> %second |
| %fmax = tail call <8 x half> @llvm.maxnum.v8f16(<8 x half> %first, <8 x half> %neg) |
| ret <8 x half> %fmax |
| } |