| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 |
| ; Test no legal f16. Should just keep the cast to f32 and |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-SDAG %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-GISEL %s |
| |
| ; Test legal f16, no f16 fmed3. Should expand to min/max sequence |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s |
| |
| ; Legal f16 med3. InstCombine ought to shrink the f32 op to f16 so the codegen doesn't really matter for this. |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s |
| |
| |
| declare float @llvm.amdgcn.fmed3.f32(float, float, float) #0 |
| declare float @llvm.fabs.f32(float) #0 |
| declare half @llvm.fabs.f16(half) #0 |
| |
| define half @fmed3_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v0, v1 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: fmed3_f32_fpext_f16: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %arg0.ext = fpext half %arg0 to float |
| %arg1.ext = fpext half %arg1 to float |
| %arg2.ext = fpext half %arg2 to float |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext) |
| %cast = fptrunc float %med3 to half |
| ret half %cast |
| } |
| |
| define half @fmed3_f32_fpext_f16_flags(half %arg0, half %arg1, half %arg2) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_flags: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_flags: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_flags: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v0, v1 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_flags: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: fmed3_f32_fpext_f16_flags: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %arg0.ext = fpext half %arg0 to float |
| %arg1.ext = fpext half %arg1 to float |
| %arg2.ext = fpext half %arg2 to float |
| %med3 = call nsz float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext) |
| %cast = fptrunc float %med3 to half |
| ret half %cast |
| } |
| |
| define half @fmed3_f32_fpext_f16_multi_use(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_med3_f32 v1, v0, v1, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v1 |
| ; GFX7-SDAG-NEXT: flat_store_dword v[3:4], v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_med3_f32 v1, v0, v1, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1 |
| ; GFX7-GISEL-NEXT: flat_store_dword v[3:4], v1 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: fmed3_f32_fpext_f16_multi_use: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX8-NEXT: v_med3_f32 v1, v0, v1, v2 |
| ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v1 |
| ; GFX8-NEXT: flat_store_dword v[3:4], v1 |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: fmed3_f32_fpext_f16_multi_use: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX9-NEXT: v_med3_f32 v1, v0, v1, v2 |
| ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v1 |
| ; GFX9-NEXT: global_store_dword v[3:4], v1, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %arg0.ext = fpext half %arg0 to float |
| %arg1.ext = fpext half %arg1 to float |
| %arg2.ext = fpext half %arg2 to float |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext) |
| store float %med3, ptr addrspace(1) %ptr |
| %cast = fptrunc float %med3 to half |
| ret half %cast |
| } |
| |
| define half @fmed3_f32_fpext_f16_k0(half %arg1, half %arg2) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k0: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_med3_f32 v0, 2.0, v0, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_k0: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, 2.0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_k0: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v2, 2.0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, 2.0, v0 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v2, v0 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_k0: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v2, 2.0, v0 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, 2.0, v0 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v2, v1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: fmed3_f32_fpext_f16_k0: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-NEXT: v_med3_f32 v0, 2.0, v0, v1 |
| ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %arg1.ext = fpext half %arg1 to float |
| %arg2.ext = fpext half %arg2 to float |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float 2.0, float %arg1.ext, float %arg2.ext) |
| %cast = fptrunc float %med3 to half |
| ret half %cast |
| } |
| |
| define half @fmed3_f32_fpext_f16_k1(half %arg0, half %arg2) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k1: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, 2.0, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_k1: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, 2.0, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_k1: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v2, 2.0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, 2.0, v0 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v2, v0 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_k1: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v2, 2.0, v0 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, 2.0, v0 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v2, v1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: fmed3_f32_fpext_f16_k1: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-NEXT: v_med3_f32 v0, v0, 2.0, v1 |
| ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %arg0.ext = fpext half %arg0 to float |
| %arg2.ext = fpext half %arg2 to float |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float 2.0, float %arg2.ext) |
| %cast = fptrunc float %med3 to half |
| ret half %cast |
| } |
| |
| define half @fmed3_f32_fpext_f16_k2(half %arg0, half %arg1) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k2: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, 2.0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_k2: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, 2.0 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_k2: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v0, v1 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, 2.0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v2, v0 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_k2: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, 2.0, v2 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: fmed3_f32_fpext_f16_k2: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-NEXT: v_med3_f32 v0, v0, v1, 2.0 |
| ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %arg0.ext = fpext half %arg0 to float |
| %arg1.ext = fpext half %arg1 to float |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float 2.0) |
| %cast = fptrunc float %med3 to half |
| ret half %cast |
| } |
| |
| define half @fmed3_f32_fpext_f16_k0_k1(half %arg2) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k0_k1: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_mov_b32_e32 v1, 0x41800000 |
| ; GFX7-SDAG-NEXT: v_med3_f32 v0, 0, v1, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_k0_k1: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, 0x41800000 |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, 0, v1, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_k0_k1: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_mov_b32_e32 v1, 0x4c00 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v2, 0, v1 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v1, 0, v1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v1, v0 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v2, v0 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_k0_k1: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0x4c00 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v2, 0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, 0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v2, v0 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v1, v0 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: fmed3_f32_fpext_f16_k0_k1: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, 0x41800000 |
| ; GFX9-NEXT: v_med3_f32 v0, 0, v1, v0 |
| ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %arg2.ext = fpext half %arg2 to float |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 16.0, float %arg2.ext) |
| %cast = fptrunc float %med3 to half |
| ret half %cast |
| } |
| |
| define half @fmed3_f32_fpext_f16_k0_k2(half %arg1) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k0_k2: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_med3_f32 v0, 0, v0, 2.0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_k0_k2: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, 0, v0, 2.0 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_k0_k2: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v1, 0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, 0, v0 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, 2.0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v1, v0 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_k0_k2: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v1, 0, v0 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, 0, v0 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, 2.0, v1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: fmed3_f32_fpext_f16_k0_k2: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-NEXT: v_med3_f32 v0, 0, v0, 2.0 |
| ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %arg1.ext = fpext half %arg1 to float |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %arg1.ext, float 2.0) |
| %cast = fptrunc float %med3 to half |
| ret half %cast |
| } |
| |
| define half @fmed3_f32_fpext_f16_fabs(half %arg0, half %arg1, half %arg2) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_fabs: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1| |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v2, |v2| |
| ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_fabs: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, |v0| |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, |v1| |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v2, |v2| |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_fabs: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f16_e64 v3, |v0|, |v1| |
| ; GFX8-SDAG-NEXT: v_min_f16_e64 v0, |v0|, |v1| |
| ; GFX8-SDAG-NEXT: v_max_f16_e64 v0, v0, |v2| |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_fabs: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_min_f16_e64 v3, |v0|, |v1| |
| ; GFX8-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v1| |
| ; GFX8-GISEL-NEXT: v_max_f16_e64 v1, v3, |v2| |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: fmed3_f32_fpext_f16_fabs: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_cvt_f32_f16_e64 v0, |v0| |
| ; GFX9-NEXT: v_cvt_f32_f16_e64 v1, |v1| |
| ; GFX9-NEXT: v_cvt_f32_f16_e64 v2, |v2| |
| ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %fabs.arg0 = call half @llvm.fabs.f16(half %arg0) |
| %fabs.arg1 = call half @llvm.fabs.f16(half %arg1) |
| %fabs.arg2 = call half @llvm.fabs.f16(half %arg2) |
| %arg0.ext = fpext half %fabs.arg0 to float |
| %arg1.ext = fpext half %fabs.arg1 to float |
| %arg2.ext = fpext half %fabs.arg2 to float |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext) |
| %cast = fptrunc float %med3 to half |
| ret half %cast |
| } |
| |
| define half @fmed3_fabs_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_fabs_f32_fpext_f16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_med3_f32 v0, |v0|, |v1|, |v2| |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_fabs_f32_fpext_f16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, |v0|, |v1|, |v2| |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: fmed3_fabs_f32_fpext_f16: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX8-NEXT: v_med3_f32 v0, |v0|, |v1|, |v2| |
| ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: fmed3_fabs_f32_fpext_f16: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX9-NEXT: v_med3_f32 v0, |v0|, |v1|, |v2| |
| ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %arg0.ext = fpext half %arg0 to float |
| %arg1.ext = fpext half %arg1 to float |
| %arg2.ext = fpext half %arg2 to float |
| %fabs.ext.arg0 = call float @llvm.fabs.f32(float %arg0.ext) |
| %fabs.ext.arg1 = call float @llvm.fabs.f32(float %arg1.ext) |
| %fabs.ext.arg2 = call float @llvm.fabs.f32(float %arg2.ext) |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %fabs.ext.arg0, float %fabs.ext.arg1, float %fabs.ext.arg2) |
| %cast = fptrunc float %med3 to half |
| ret half %cast |
| } |
| |
| define half @fmed3_f32_fpext_f16_fneg(half %arg0, half %arg1, half %arg2) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_fneg: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_med3_f32 v0, -v0, -v1, -v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_fneg: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, -v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v2, -v2 |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_fneg: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f16_e64 v3, -v0, -v1 |
| ; GFX8-SDAG-NEXT: v_min_f16_e64 v0, -v0, -v1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e64 v0, v0, -v2 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_fneg: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_min_f16_e64 v3, -v0, -v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e64 v1, v3, -v2 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: fmed3_f32_fpext_f16_fneg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_cvt_f32_f16_e64 v0, -v0 |
| ; GFX9-NEXT: v_cvt_f32_f16_e64 v1, -v1 |
| ; GFX9-NEXT: v_cvt_f32_f16_e64 v2, -v2 |
| ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %fneg.arg0 = fneg half %arg0 |
| %fneg.arg1 = fneg half %arg1 |
| %fneg.arg2 = fneg half %arg2 |
| %arg0.ext = fpext half %fneg.arg0 to float |
| %arg1.ext = fpext half %fneg.arg1 to float |
| %arg2.ext = fpext half %fneg.arg2 to float |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext) |
| %cast = fptrunc float %med3 to half |
| ret half %cast |
| } |
| |
| define half @fmed3_fneg_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_fneg_f32_fpext_f16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_med3_f32 v0, -v0, -v1, -v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_fneg_f32_fpext_f16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, -v0, -v1, -v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: fmed3_fneg_f32_fpext_f16: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX8-NEXT: v_med3_f32 v0, -v0, -v1, -v2 |
| ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: fmed3_fneg_f32_fpext_f16: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX9-NEXT: v_med3_f32 v0, -v0, -v1, -v2 |
| ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %arg0.ext = fpext half %arg0 to float |
| %arg1.ext = fpext half %arg1 to float |
| %arg2.ext = fpext half %arg2 to float |
| %fneg.ext.arg0 = fneg float %arg0.ext |
| %fneg.ext.arg1 = fneg float %arg1.ext |
| %fneg.ext.arg2 = fneg float %arg2.ext |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %fneg.ext.arg0, float %fneg.ext.arg1, float %fneg.ext.arg2) |
| %cast = fptrunc float %med3 to half |
| ret half %cast |
| } |
| |
| define half @fmed3_f32_fpext_f16_fneg_fabs(half %arg0, half %arg1, half %arg2) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_fneg_fabs: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1| |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v2, |v2| |
| ; GFX7-SDAG-NEXT: v_med3_f32 v0, -v0, -v1, -v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_fneg_fabs: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -|v0| |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, -|v1| |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v2, -|v2| |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_fneg_fabs: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f16_e64 v3, -|v0|, -|v1| |
| ; GFX8-SDAG-NEXT: v_min_f16_e64 v0, -|v0|, -|v1| |
| ; GFX8-SDAG-NEXT: v_max_f16_e64 v0, v0, -|v2| |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_fneg_fabs: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_min_f16_e64 v3, -|v0|, -|v1| |
| ; GFX8-GISEL-NEXT: v_max_f16_e64 v0, -|v0|, -|v1| |
| ; GFX8-GISEL-NEXT: v_max_f16_e64 v1, v3, -|v2| |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: fmed3_f32_fpext_f16_fneg_fabs: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_cvt_f32_f16_e64 v0, -|v0| |
| ; GFX9-NEXT: v_cvt_f32_f16_e64 v1, -|v1| |
| ; GFX9-NEXT: v_cvt_f32_f16_e64 v2, -|v2| |
| ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %fabs.arg0 = call half @llvm.fabs.f16(half %arg0) |
| %fabs.arg1 = call half @llvm.fabs.f16(half %arg1) |
| %fabs.arg2 = call half @llvm.fabs.f16(half %arg2) |
| %fneg.fabs.arg0 = fneg half %fabs.arg0 |
| %fneg.fabs.arg1 = fneg half %fabs.arg1 |
| %fneg.fabs.arg2 = fneg half %fabs.arg2 |
| %arg0.ext = fpext half %fneg.fabs.arg0 to float |
| %arg1.ext = fpext half %fneg.fabs.arg1 to float |
| %arg2.ext = fpext half %fneg.fabs.arg2 to float |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext) |
| %cast = fptrunc float %med3 to half |
| ret half %cast |
| } |
| |
| define half @fmed3_fneg_fabs_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_fneg_fabs_f32_fpext_f16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_med3_f32 v0, -|v0|, -|v1|, -|v2| |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_fneg_fabs_f32_fpext_f16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, -|v0|, -|v1|, -|v2| |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: fmed3_fneg_fabs_f32_fpext_f16: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX8-NEXT: v_med3_f32 v0, -|v0|, -|v1|, -|v2| |
| ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: fmed3_fneg_fabs_f32_fpext_f16: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX9-NEXT: v_med3_f32 v0, -|v0|, -|v1|, -|v2| |
| ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %arg0.ext = fpext half %arg0 to float |
| %arg1.ext = fpext half %arg1 to float |
| %arg2.ext = fpext half %arg2 to float |
| %fabs.ext.arg0 = call float @llvm.fabs.f32(float %arg0.ext) |
| %fabs.ext.arg1 = call float @llvm.fabs.f32(float %arg1.ext) |
| %fabs.ext.arg2 = call float @llvm.fabs.f32(float %arg2.ext) |
| %fneg.fabs.ext.arg0 = fneg float %fabs.ext.arg0 |
| %fneg.fabs.ext.arg1 = fneg float %fabs.ext.arg1 |
| %fneg.fabs.ext.arg2 = fneg float %fabs.ext.arg2 |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %fneg.fabs.ext.arg0, float %fneg.fabs.ext.arg1, float %fneg.fabs.ext.arg2) |
| %cast = fptrunc float %med3 to half |
| ret half %cast |
| } |
| ; -------------------------------------------------------------------------------- |
| ; Negative tests |
| ; -------------------------------------------------------------------------------- |
| |
| define bfloat @fmed3_f32_fpext_f16_fptrunc_bf16(half %arg0, half %arg1, half %arg2) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX8-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX8-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX9-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX9-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %arg0.ext = fpext half %arg0 to float |
| %arg1.ext = fpext half %arg1 to float |
| %arg2.ext = fpext half %arg2 to float |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext) |
| %cast = fptrunc float %med3 to bfloat |
| ret bfloat %cast |
| } |
| |
| define half @fmed3_f32_fpext_f16_multi_use_0(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_0: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_med3_f32 v1, v0, v1, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: flat_store_dword v[3:4], v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_mov_b32_e32 v0, v1 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use_0: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v2 |
| ; GFX7-GISEL-NEXT: flat_store_dword v[3:4], v5 |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, v5, v0, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_0: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v0 |
| ; GFX8-SDAG-NEXT: flat_store_dword v[3:4], v5 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v0, v1 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0 |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use_0: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v0 |
| ; GFX8-GISEL-NEXT: flat_store_dword v[3:4], v5 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: fmed3_f32_fpext_f16_multi_use_0: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v5, v0 |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v1 |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v2 |
| ; GFX9-NEXT: global_store_dword v[3:4], v5, off |
| ; GFX9-NEXT: v_med3_f32 v0, v5, v0, v1 |
| ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %arg0.ext = fpext half %arg0 to float |
| store float %arg0.ext, ptr addrspace(1) %ptr |
| %arg1.ext = fpext half %arg1 to float |
| %arg2.ext = fpext half %arg2 to float |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext) |
| %cast = fptrunc float %med3 to half |
| ret half %cast |
| } |
| |
| define half @fmed3_f32_fpext_f16_multi_use_1(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_1: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: flat_store_dword v[3:4], v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use_1: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: flat_store_dword v[3:4], v1 |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_1: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v1 |
| ; GFX8-SDAG-NEXT: flat_store_dword v[3:4], v5 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v0, v1 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0 |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use_1: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v1 |
| ; GFX8-GISEL-NEXT: flat_store_dword v[3:4], v5 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: fmed3_f32_fpext_f16_multi_use_1: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX9-NEXT: global_store_dword v[3:4], v1, off |
| ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %arg0.ext = fpext half %arg0 to float |
| %arg1.ext = fpext half %arg1 to float |
| store float %arg1.ext, ptr addrspace(1) %ptr |
| %arg2.ext = fpext half %arg2 to float |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext) |
| %cast = fptrunc float %med3 to half |
| ret half %cast |
| } |
| |
| define half @fmed3_f32_fpext_f16_multi_use_2(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_2: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: flat_store_dword v[3:4], v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use_2: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: flat_store_dword v[3:4], v2 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_2: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v2 |
| ; GFX8-SDAG-NEXT: flat_store_dword v[3:4], v5 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v0, v1 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0 |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use_2: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v2 |
| ; GFX8-GISEL-NEXT: flat_store_dword v[3:4], v5 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: fmed3_f32_fpext_f16_multi_use_2: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-NEXT: global_store_dword v[3:4], v2, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| %arg0.ext = fpext half %arg0 to float |
| %arg1.ext = fpext half %arg1 to float |
| %arg2.ext = fpext half %arg2 to float |
| store float %arg2.ext, ptr addrspace(1) %ptr |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext) |
| %cast = fptrunc float %med3 to |
| half ret half %cast |
| } |
| |
| define half @fmed3_f32_fpext_bf16(bfloat %arg0, bfloat %arg1, bfloat %arg2) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_f32_fpext_bf16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_f32_fpext_bf16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: fmed3_f32_fpext_bf16: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX8-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: fmed3_f32_fpext_bf16: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: fmed3_f32_fpext_bf16: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: fmed3_f32_fpext_bf16: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX9-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %arg0.ext = fpext bfloat %arg0 to float |
| %arg1.ext = fpext bfloat %arg1 to float |
| %arg2.ext = fpext bfloat %arg2 to float |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext) |
| %cast = fptrunc float %med3 to half |
| ret half %cast |
| } |
| |
| define half @fmed3_f32_fpext_f16_bf16_0(bfloat %arg0, half %arg1, half %arg2) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_0: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_0: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_0: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX8-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX8-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_0: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_0: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX9-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_0: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX9-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %arg0.ext = fpext bfloat %arg0 to float |
| %arg1.ext = fpext half %arg1 to float |
| %arg2.ext = fpext half %arg2 to float |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext) |
| %cast = fptrunc float %med3 to half |
| ret half %cast |
| } |
| |
| define half @fmed3_f32_fpext_f16_bf16_1(half %arg0, bfloat %arg1, half %arg2) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_1: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_1: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_1: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX8-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX8-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_1: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_1: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX9-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_1: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX9-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %arg0.ext = fpext half %arg0 to float |
| %arg1.ext = fpext bfloat %arg1 to float |
| %arg2.ext = fpext half %arg2 to float |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext) |
| %cast = fptrunc float %med3 to half |
| ret half %cast |
| } |
| |
| define half @fmed3_f32_fpext_f16_bf16_2(half %arg0, half %arg1, bfloat %arg2) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_2: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_2: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_2: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX8-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX8-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_2: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_2: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_2: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX9-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %arg0.ext = fpext half %arg0 to float |
| %arg1.ext = fpext half %arg1 to float |
| %arg2.ext = fpext bfloat %arg2 to float |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext) |
| %cast = fptrunc float %med3 to half |
| ret half %cast |
| } |
| |
| define half @fmed3_f32_fpext_f16_unrepresentable_k0(half %arg1, half %arg2) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, 0x4f800000 |
| ; GFX7-SDAG-NEXT: v_med3_f32 v0, s4, v0, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, v2, v0, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX8-SDAG-NEXT: s_mov_b32 s4, 0x4f800000 |
| ; GFX8-SDAG-NEXT: v_med3_f32 v0, s4, v0, v1 |
| ; GFX8-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 |
| ; GFX8-GISEL-NEXT: v_med3_f32 v0, v2, v0, v1 |
| ; GFX8-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x4f800000 |
| ; GFX9-SDAG-NEXT: v_med3_f32 v0, s4, v0, v1 |
| ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 |
| ; GFX9-GISEL-NEXT: v_med3_f32 v0, v2, v0, v1 |
| ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %arg1.ext = fpext half %arg1 to float |
| %arg2.ext = fpext half %arg2 to float |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x41f0000000000000, float %arg1.ext, float %arg2.ext) |
| %cast = fptrunc float %med3 to half |
| ret half %cast |
| } |
| |
| define half @fmed3_f32_fpext_f16_unrepresentable_k1(half %arg0, half %arg2) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, 0x4f800000 |
| ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, s4, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v2, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX8-SDAG-NEXT: s_mov_b32 s4, 0x4f800000 |
| ; GFX8-SDAG-NEXT: v_med3_f32 v0, v0, s4, v1 |
| ; GFX8-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 |
| ; GFX8-GISEL-NEXT: v_med3_f32 v0, v0, v2, v1 |
| ; GFX8-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x4f800000 |
| ; GFX9-SDAG-NEXT: v_med3_f32 v0, v0, s4, v1 |
| ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 |
| ; GFX9-GISEL-NEXT: v_med3_f32 v0, v0, v2, v1 |
| ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %arg0.ext = fpext half %arg0 to float |
| %arg2.ext = fpext half %arg2 to float |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float 0x41f0000000000000, float %arg2.ext) |
| %cast = fptrunc float %med3 to half |
| ret half %cast |
| } |
| |
| define half @fmed3_f32_fpext_f16_unrepresentable_k2(half %arg0, half %arg1) #1 { |
| ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, 0x4f800000 |
| ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, s4 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 |
| ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX8-SDAG-NEXT: s_mov_b32 s4, 0x4f800000 |
| ; GFX8-SDAG-NEXT: v_med3_f32 v0, v0, v1, s4 |
| ; GFX8-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 |
| ; GFX8-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX8-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x4f800000 |
| ; GFX9-SDAG-NEXT: v_med3_f32 v0, v0, v1, s4 |
| ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 |
| ; GFX9-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 |
| ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %arg0.ext = fpext half %arg0 to float |
| %arg1.ext = fpext half %arg1 to float |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float 0x41f0000000000000) |
| %cast = fptrunc float %med3 to half |
| ret half %cast |
| } |
| |
| attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } |
| attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn } |
| ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| ; GCN: {{.*}} |
| ; GFX7: {{.*}} |