| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-SDAG %s |
| ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-GISEL %s |
| |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s |
| ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s |
| |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-SDAG,GFX900-SDAG %s |
| ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-GISEL,GFX900-GISEL %s |
| |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-SDAG,GFX950-SDAG %s |
| ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-GISEL,GFX950-GISEL %s |
| |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s |
| ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s |
| |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16,GFX11-SDAG,GFX11-TRUE16-SDAG %s |
| ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16,GFX11-GISEL,GFX11-TRUE16-GISEL %s |
| |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-SDAG,GFX11-FAKE16-SDAG %s |
| ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-GISEL,GFX11-FAKE16-GISEL %s |
| |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16,GFX12-SDAG,GFX12-TRUE16-SDAG %s |
| ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16,GFX12-GISEL,GFX12-TRUE16-GISEL %s |
| |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16,GFX12-SDAG,GFX12-FAKE16-SDAG %s |
| ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16,GFX12-GISEL,GFX12-FAKE16-GISEL %s |
| |
| define half @v_minimumnum_f16(half %x, half %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_f16: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_f16: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: v_minimumnum_f16: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 |
| ; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: v_minimumnum_f16: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX9-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 |
| ; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_f16: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 |
| ; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX10-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_f16: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX10-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 |
| ; GFX10-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16: |
| ; GFX11-TRUE16-SDAG: ; %bb.0: |
| ; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l |
| ; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l |
| ; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-TRUE16-SDAG-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h |
| ; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16: |
| ; GFX11-TRUE16-GISEL: ; %bb.0: |
| ; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l |
| ; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l |
| ; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-TRUE16-GISEL-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h |
| ; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16: |
| ; GFX11-FAKE16-SDAG: ; %bb.0: |
| ; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 |
| ; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-FAKE16-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16: |
| ; GFX11-FAKE16-GISEL: ; %bb.0: |
| ; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 |
| ; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16: |
| ; GFX12-TRUE16-SDAG: ; %bb.0: |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l |
| ; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l |
| ; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h |
| ; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16: |
| ; GFX12-TRUE16-GISEL: ; %bb.0: |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l |
| ; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l |
| ; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h |
| ; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16: |
| ; GFX12-FAKE16-SDAG: ; %bb.0: |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v1, v1, v1 |
| ; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1 |
| ; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16: |
| ; GFX12-FAKE16-GISEL: ; %bb.0: |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 |
| ; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v1, v1, v1 |
| ; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 |
| ; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call half @llvm.minimumnum.f16(half %x, half %y) |
| ret half %result |
| } |
| |
| define half @v_minimumnum_f16_nnan(half %x, half %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f16_nnan: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f16_nnan: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v_minimumnum_f16_nnan: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_f16_nnan: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_f16_nnan: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-TRUE16-LABEL: v_minimumnum_f16_nnan: |
| ; GFX11-TRUE16: ; %bb.0: |
| ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v1.l |
| ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-FAKE16-LABEL: v_minimumnum_f16_nnan: |
| ; GFX11-FAKE16: ; %bb.0: |
| ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-TRUE16-LABEL: v_minimumnum_f16_nnan: |
| ; GFX12-TRUE16: ; %bb.0: |
| ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v1.l |
| ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-FAKE16-LABEL: v_minimumnum_f16_nnan: |
| ; GFX12-FAKE16: ; %bb.0: |
| ; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 |
| ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| %result = call nnan half @llvm.minimumnum.f16(half %x, half %y) |
| ret half %result |
| } |
| |
| define half @v_minimumnum_f16_1.0(half %x) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f16_1.0: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, 1.0, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f16_1.0: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, 1.0 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v_minimumnum_f16_1.0: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX8-NEXT: v_min_f16_e32 v0, 1.0, v0 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_f16_1.0: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX9-NEXT: v_min_f16_e32 v0, 1.0, v0 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_f16_1.0: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX10-NEXT: v_min_f16_e32 v0, 1.0, v0 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-TRUE16-LABEL: v_minimumnum_f16_1.0: |
| ; GFX11-TRUE16: ; %bb.0: |
| ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l |
| ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, 1.0, v0.l |
| ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-FAKE16-LABEL: v_minimumnum_f16_1.0: |
| ; GFX11-FAKE16: ; %bb.0: |
| ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, 1.0, v0 |
| ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-TRUE16-LABEL: v_minimumnum_f16_1.0: |
| ; GFX12-TRUE16: ; %bb.0: |
| ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l |
| ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, 1.0, v0.l |
| ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-FAKE16-LABEL: v_minimumnum_f16_1.0: |
| ; GFX12-FAKE16: ; %bb.0: |
| ; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0 |
| ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, 1.0, v0 |
| ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| %result = call half @llvm.minimumnum.f16(half %x, half 1.0) |
| ret half %result |
| } |
| |
| define float @v_minimumnum_f32(float %x, float %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_f32: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_f32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: v_minimumnum_f32: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 |
| ; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: v_minimumnum_f32: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 |
| ; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_f32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_f32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 |
| ; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_f32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0 |
| ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_f32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 |
| ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_f32: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_f32: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call float @llvm.minimumnum.f32(float %x, float %y) |
| ret float %result |
| } |
| |
| define float @v_minimumnum_f32_nnan(float %x, float %y) { |
| ; GFX7-LABEL: v_minimumnum_f32_nnan: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v_minimumnum_f32_nnan: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_f32_nnan: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_f32_nnan: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_f32_nnan: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_minimumnum_f32_nnan: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = call nnan float @llvm.minimumnum.f32(float %x, float %y) |
| ret float %result |
| } |
| |
| define double @v_minimumnum_f64(double %x, double %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f64: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f64: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_f64: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_f64: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: v_minimumnum_f64: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX9-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: v_minimumnum_f64: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX9-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX9-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_f64: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX10-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_f64: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX10-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_f64: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_f64: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_f64: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_f64: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call double @llvm.minimumnum.f64(double %x, double %y) |
| ret double %result |
| } |
| |
| define double @v_minimumnum_f64_nnan(double %x, double %y) { |
| ; GFX7-LABEL: v_minimumnum_f64_nnan: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v_minimumnum_f64_nnan: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_f64_nnan: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_f64_nnan: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_f64_nnan: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_minimumnum_f64_nnan: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = call nnan double @llvm.minimumnum.f64(double %x, double %y) |
| ret double %result |
| } |
| |
| define float @v_minimumnum_f32_1.0(float %x) { |
| ; GFX7-LABEL: v_minimumnum_f32_1.0: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX7-NEXT: v_min_f32_e32 v0, 1.0, v0 |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v_minimumnum_f32_1.0: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX8-NEXT: v_min_f32_e32 v0, 1.0, v0 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_f32_1.0: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX9-NEXT: v_min_f32_e32 v0, 1.0, v0 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_f32_1.0: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_f32_1.0: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_min_f32_e32 v0, 1.0, v0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_minimumnum_f32_1.0: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-NEXT: v_min_num_f32_e32 v0, 1.0, v0 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = call float @llvm.minimumnum.f32(float %x, float 1.0) |
| ret float %result |
| } |
| |
| define float @v_minimumnum_f32_rhs_not_snan(float %x, float %y) { |
| ; GFX7-LABEL: v_minimumnum_f32_rhs_not_snan: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v_minimumnum_f32_rhs_not_snan: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_f32_rhs_not_snan: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 |
| ; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_f32_rhs_not_snan: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 |
| ; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX10-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_f32_rhs_not_snan: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_minimumnum_f32_rhs_not_snan: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %canon.y = call float @llvm.canonicalize.f32(float %y) |
| %result = call float @llvm.minimumnum.f32(float %x, float %canon.y) |
| ret float %result |
| } |
| |
| define float @v_minimumnum_f32_lhs_not_snan(float %x, float %y) { |
| ; GFX7-LABEL: v_minimumnum_f32_lhs_not_snan: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v_minimumnum_f32_lhs_not_snan: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_f32_lhs_not_snan: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 |
| ; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_f32_lhs_not_snan: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 |
| ; GFX10-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_f32_lhs_not_snan: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_minimumnum_f32_lhs_not_snan: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %canon.x = call float @llvm.canonicalize.f32(float %x) |
| %result = call float @llvm.minimumnum.f32(float %canon.x, float %y) |
| ret float %result |
| } |
| |
| define float @v_minimumnum_f32_both_operands_not_snan(float %x, float %y) { |
| ; GFX7-LABEL: v_minimumnum_f32_both_operands_not_snan: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v_minimumnum_f32_both_operands_not_snan: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX8-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_f32_both_operands_not_snan: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 |
| ; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_f32_both_operands_not_snan: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX10-NEXT: v_max_f32_e32 v1, v1, v1 |
| ; GFX10-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_f32_both_operands_not_snan: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_minimumnum_f32_both_operands_not_snan: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %canon.x = call float @llvm.canonicalize.f32(float %x) |
| %canon.y = call float @llvm.canonicalize.f32(float %y) |
| %result = call float @llvm.minimumnum.f32(float %canon.x, float %canon.y) |
| ret float %result |
| } |
| |
| define double @v_minimumnum_f64_1.0(double %x) { |
| ; GFX7-LABEL: v_minimumnum_f64_1.0: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], 1.0 |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v_minimumnum_f64_1.0: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], 1.0 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_f64_1.0: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], 1.0 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_f64_1.0: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], 1.0 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_f64_1.0: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], 1.0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_minimumnum_f64_1.0: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-NEXT: v_min_num_f64_e32 v[0:1], 1.0, v[0:1] |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = call double @llvm.minimumnum.f64(double %x, double 1.0) |
| ret double %result |
| } |
| |
| define half @v_minimumnum_f16_v_s(half %x, half inreg %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f16_v_s: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, s16 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f16_v_s: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, s16 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_f16_v_s: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f16_e64 v1, s16, s16 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_f16_v_s: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX8-GISEL-NEXT: v_max_f16_e64 v1, s16, s16 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_minimumnum_f16_v_s: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_max_f16_e64 v1, s16, s16 |
| ; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX900-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_minimumnum_f16_v_s: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX900-GISEL-NEXT: v_max_f16_e64 v1, s16, s16 |
| ; GFX900-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-SDAG-LABEL: v_minimumnum_f16_v_s: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-SDAG-NEXT: v_max_f16_e64 v1, s0, s0 |
| ; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX950-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-GISEL-LABEL: v_minimumnum_f16_v_s: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX950-GISEL-NEXT: v_max_f16_e64 v1, s0, s0 |
| ; GFX950-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_f16_v_s: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f16_e64 v1, s16, s16 |
| ; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX10-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_f16_v_s: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX10-GISEL-NEXT: v_max_f16_e64 v1, s16, s16 |
| ; GFX10-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16_v_s: |
| ; GFX11-TRUE16-SDAG: ; %bb.0: |
| ; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, s0, s0 |
| ; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l |
| ; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-TRUE16-SDAG-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h |
| ; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16_v_s: |
| ; GFX11-TRUE16-GISEL: ; %bb.0: |
| ; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l |
| ; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, s0, s0 |
| ; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-TRUE16-GISEL-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h |
| ; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16_v_s: |
| ; GFX11-FAKE16-SDAG: ; %bb.0: |
| ; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, s0, s0 |
| ; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-FAKE16-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16_v_s: |
| ; GFX11-FAKE16-GISEL: ; %bb.0: |
| ; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, s0, s0 |
| ; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_v_s: |
| ; GFX12-TRUE16-SDAG: ; %bb.0: |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, s0, s0 |
| ; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l |
| ; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h |
| ; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16_v_s: |
| ; GFX12-TRUE16-GISEL: ; %bb.0: |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l |
| ; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, s0, s0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h |
| ; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16_v_s: |
| ; GFX12-FAKE16-SDAG: ; %bb.0: |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, s0, s0 |
| ; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1 |
| ; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16_v_s: |
| ; GFX12-FAKE16-GISEL: ; %bb.0: |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 |
| ; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, s0, s0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 |
| ; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call half @llvm.minimumnum.f16(half %x, half %y) |
| ret half %result |
| } |
| |
| define half @v_minimumnum_f16_s_s(half inreg %x, half inreg %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f16_s_s: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, s16 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, s17 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f16_s_s: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, s16 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, s17 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_f16_s_s: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f16_e64 v0, s17, s17 |
| ; GFX8-SDAG-NEXT: v_max_f16_e64 v1, s16, s16 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v1, v0 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_f16_s_s: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_max_f16_e64 v0, s16, s16 |
| ; GFX8-GISEL-NEXT: v_max_f16_e64 v1, s17, s17 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_minimumnum_f16_s_s: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_max_f16_e64 v0, s17, s17 |
| ; GFX900-SDAG-NEXT: v_max_f16_e64 v1, s16, s16 |
| ; GFX900-SDAG-NEXT: v_min_f16_e32 v0, v1, v0 |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_minimumnum_f16_s_s: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_max_f16_e64 v0, s16, s16 |
| ; GFX900-GISEL-NEXT: v_max_f16_e64 v1, s17, s17 |
| ; GFX900-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-SDAG-LABEL: v_minimumnum_f16_s_s: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-SDAG-NEXT: v_max_f16_e64 v0, s1, s1 |
| ; GFX950-SDAG-NEXT: v_max_f16_e64 v1, s0, s0 |
| ; GFX950-SDAG-NEXT: v_min_f16_e32 v0, v1, v0 |
| ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-GISEL-LABEL: v_minimumnum_f16_s_s: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-GISEL-NEXT: v_max_f16_e64 v0, s0, s0 |
| ; GFX950-GISEL-NEXT: v_max_f16_e64 v1, s1, s1 |
| ; GFX950-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_f16_s_s: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f16_e64 v0, s17, s17 |
| ; GFX10-SDAG-NEXT: v_max_f16_e64 v1, s16, s16 |
| ; GFX10-SDAG-NEXT: v_min_f16_e32 v0, v1, v0 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_f16_s_s: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f16_e64 v0, s16, s16 |
| ; GFX10-GISEL-NEXT: v_max_f16_e64 v1, s17, s17 |
| ; GFX10-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16_s_s: |
| ; GFX11-TRUE16-SDAG: ; %bb.0: |
| ; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.l, s1, s1 |
| ; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, s0, s0 |
| ; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-TRUE16-SDAG-NEXT: v_min_f16_e32 v0.l, v0.h, v0.l |
| ; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16_s_s: |
| ; GFX11-TRUE16-GISEL: ; %bb.0: |
| ; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.l, s0, s0 |
| ; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, s1, s1 |
| ; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-TRUE16-GISEL-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h |
| ; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16_s_s: |
| ; GFX11-FAKE16-SDAG: ; %bb.0: |
| ; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v0, s1, s1 |
| ; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, s0, s0 |
| ; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-FAKE16-SDAG-NEXT: v_min_f16_e32 v0, v1, v0 |
| ; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16_s_s: |
| ; GFX11-FAKE16-GISEL: ; %bb.0: |
| ; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v0, s0, s0 |
| ; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, s1, s1 |
| ; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_s_s: |
| ; GFX12-TRUE16-SDAG: ; %bb.0: |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, s1, s1 |
| ; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, s0, s0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.h, v0.l |
| ; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16_s_s: |
| ; GFX12-TRUE16-GISEL: ; %bb.0: |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, s0, s0 |
| ; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, s1, s1 |
| ; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h |
| ; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16_s_s: |
| ; GFX12-FAKE16-SDAG: ; %bb.0: |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, s1, s1 |
| ; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, s0, s0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v1, v0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16_s_s: |
| ; GFX12-FAKE16-GISEL: ; %bb.0: |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, s0, s0 |
| ; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, s1, s1 |
| ; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 |
| ; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call half @llvm.minimumnum.f16(half %x, half %y) |
| ret half %result |
| } |
| |
| define float @v_minimumnum_f32_s_v(float inreg %x, float %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f32_s_v: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f32_s_v: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s16 |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v1, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_f32_s_v: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16 |
| ; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_f32_s_v: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s16 |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v1, v0 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_minimumnum_f32_s_v: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX900-SDAG-NEXT: v_max_f32_e64 v1, s16, s16 |
| ; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_minimumnum_f32_s_v: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_max_f32_e64 v1, s16, s16 |
| ; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v1, v0 |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-SDAG-LABEL: v_minimumnum_f32_s_v: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX950-SDAG-NEXT: v_max_f32_e64 v1, s0, s0 |
| ; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 |
| ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-GISEL-LABEL: v_minimumnum_f32_s_v: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-GISEL-NEXT: v_max_f32_e64 v1, s0, s0 |
| ; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v1, v0 |
| ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_f32_s_v: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX10-SDAG-NEXT: v_max_f32_e64 v1, s16, s16 |
| ; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_f32_s_v: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f32_e64 v1, s16, s16 |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v1, v0 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_f32_s_v: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX11-SDAG-NEXT: v_max_f32_e64 v1, s0, s0 |
| ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_f32_s_v: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_max_f32_e64 v1, s0, s0 |
| ; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v1, v0 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_f32_s_v: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0 |
| ; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v1, v0 |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_f32_s_v: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, s0, s0 |
| ; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v1, v0 |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call float @llvm.minimumnum.f32(float %x, float %y) |
| ret float %result |
| } |
| |
| define float @v_minimumnum_f32_v_s(float %x, float inreg %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f32_v_s: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f32_v_s: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s16 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_f32_v_s: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16 |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_f32_v_s: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s16 |
| ; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_minimumnum_f32_v_s: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_max_f32_e64 v1, s16, s16 |
| ; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_minimumnum_f32_v_s: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX900-GISEL-NEXT: v_max_f32_e64 v1, s16, s16 |
| ; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-SDAG-LABEL: v_minimumnum_f32_v_s: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-SDAG-NEXT: v_max_f32_e64 v1, s0, s0 |
| ; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-GISEL-LABEL: v_minimumnum_f32_v_s: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX950-GISEL-NEXT: v_max_f32_e64 v1, s0, s0 |
| ; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_f32_v_s: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f32_e64 v1, s16, s16 |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_f32_v_s: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX10-GISEL-NEXT: v_max_f32_e64 v1, s16, s16 |
| ; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_f32_v_s: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: v_max_f32_e64 v1, s0, s0 |
| ; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_f32_v_s: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX11-GISEL-NEXT: v_max_f32_e64 v1, s0, s0 |
| ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_f32_v_s: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0 |
| ; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_f32_v_s: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0 |
| ; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, s0, s0 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call float @llvm.minimumnum.f32(float %x, float %y) |
| ret float %result |
| } |
| |
| define float @v_minimumnum_f32_s_s(float inreg %x, float inreg %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f32_s_s: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_mul_f32_e64 v0, 1.0, s17 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f32_s_s: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, s16 |
| ; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s17 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_f32_s_s: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_mul_f32_e64 v0, 1.0, s17 |
| ; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, s16 |
| ; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_f32_s_s: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, s16 |
| ; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, s17 |
| ; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_minimumnum_f32_s_s: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_max_f32_e64 v0, s17, s17 |
| ; GFX900-SDAG-NEXT: v_max_f32_e64 v1, s16, s16 |
| ; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_minimumnum_f32_s_s: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_max_f32_e64 v0, s16, s16 |
| ; GFX900-GISEL-NEXT: v_max_f32_e64 v1, s17, s17 |
| ; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-SDAG-LABEL: v_minimumnum_f32_s_s: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-SDAG-NEXT: v_max_f32_e64 v0, s1, s1 |
| ; GFX950-SDAG-NEXT: v_max_f32_e64 v1, s0, s0 |
| ; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 |
| ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-GISEL-LABEL: v_minimumnum_f32_s_s: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-GISEL-NEXT: v_max_f32_e64 v0, s0, s0 |
| ; GFX950-GISEL-NEXT: v_max_f32_e64 v1, s1, s1 |
| ; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_f32_s_s: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f32_e64 v0, s17, s17 |
| ; GFX10-SDAG-NEXT: v_max_f32_e64 v1, s16, s16 |
| ; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_f32_s_s: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f32_e64 v0, s16, s16 |
| ; GFX10-GISEL-NEXT: v_max_f32_e64 v1, s17, s17 |
| ; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_f32_s_s: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: v_max_f32_e64 v0, s1, s1 |
| ; GFX11-SDAG-NEXT: v_max_f32_e64 v1, s0, s0 |
| ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v1, v0 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_f32_s_s: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_max_f32_e64 v0, s0, s0 |
| ; GFX11-GISEL-NEXT: v_max_f32_e64 v1, s1, s1 |
| ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_f32_s_s: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_max_num_f32_e64 v0, s1, s1 |
| ; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v1, v0 |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_f32_s_s: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_max_num_f32_e64 v0, s0, s0 |
| ; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, s1, s1 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call float @llvm.minimumnum.f32(float %x, float %y) |
| ret float %result |
| } |
| |
| define double @v_minimumnum_f64_s_v(double inreg %x, double %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f64_s_v: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX7-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] |
| ; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f64_s_v: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] |
| ; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_f64_s_v: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX8-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] |
| ; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_f64_s_v: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] |
| ; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_minimumnum_f64_s_v: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX900-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] |
| ; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_minimumnum_f64_s_v: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] |
| ; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-SDAG-LABEL: v_minimumnum_f64_s_v: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX950-SDAG-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] |
| ; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] |
| ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-GISEL-LABEL: v_minimumnum_f64_s_v: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-GISEL-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] |
| ; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] |
| ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_f64_s_v: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] |
| ; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX10-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_f64_s_v: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] |
| ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_minimumnum_f64_s_v: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_max_num_f64_e64 v[2:3], s[0:1], s[0:1] |
| ; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[2:3], v[0:1] |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = call double @llvm.minimumnum.f64(double %x, double %y) |
| ret double %result |
| } |
| |
| define double @v_minimumnum_f64_v_s(double %x, double inreg %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f64_v_s: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] |
| ; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f64_v_s: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX7-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] |
| ; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_f64_v_s: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] |
| ; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_f64_v_s: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX8-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] |
| ; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_minimumnum_f64_v_s: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] |
| ; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_minimumnum_f64_v_s: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX900-GISEL-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] |
| ; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-SDAG-LABEL: v_minimumnum_f64_v_s: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-SDAG-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] |
| ; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-GISEL-LABEL: v_minimumnum_f64_v_s: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX950-GISEL-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] |
| ; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_f64_v_s: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] |
| ; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_f64_v_s: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] |
| ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_minimumnum_f64_v_s: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_max_num_f64_e64 v[2:3], s[0:1], s[0:1] |
| ; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = call double @llvm.minimumnum.f64(double %x, double %y) |
| ret double %result |
| } |
| |
| define double @v_minimumnum_f64_s_s(double inreg %x, double inreg %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f64_s_s: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] |
| ; GFX7-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] |
| ; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f64_s_s: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_max_f64 v[0:1], s[16:17], s[16:17] |
| ; GFX7-GISEL-NEXT: v_max_f64 v[2:3], s[18:19], s[18:19] |
| ; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_f64_s_s: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] |
| ; GFX8-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] |
| ; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_f64_s_s: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_max_f64 v[0:1], s[16:17], s[16:17] |
| ; GFX8-GISEL-NEXT: v_max_f64 v[2:3], s[18:19], s[18:19] |
| ; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_minimumnum_f64_s_s: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] |
| ; GFX900-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] |
| ; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_minimumnum_f64_s_s: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_max_f64 v[0:1], s[16:17], s[16:17] |
| ; GFX900-GISEL-NEXT: v_max_f64 v[2:3], s[18:19], s[18:19] |
| ; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-SDAG-LABEL: v_minimumnum_f64_s_s: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-SDAG-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3] |
| ; GFX950-SDAG-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] |
| ; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] |
| ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-GISEL-LABEL: v_minimumnum_f64_s_s: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-GISEL-NEXT: v_max_f64 v[0:1], s[0:1], s[0:1] |
| ; GFX950-GISEL-NEXT: v_max_f64 v[2:3], s[2:3], s[2:3] |
| ; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_f64_s_s: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f64 v[0:1], s[18:19], s[18:19] |
| ; GFX10-SDAG-NEXT: v_max_f64 v[2:3], s[16:17], s[16:17] |
| ; GFX10-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_f64_s_s: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f64 v[0:1], s[16:17], s[16:17] |
| ; GFX10-GISEL-NEXT: v_max_f64 v[2:3], s[18:19], s[18:19] |
| ; GFX10-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_f64_s_s: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3] |
| ; GFX11-SDAG-NEXT: v_max_f64 v[2:3], s[0:1], s[0:1] |
| ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-SDAG-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_f64_s_s: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_max_f64 v[0:1], s[0:1], s[0:1] |
| ; GFX11-GISEL-NEXT: v_max_f64 v[2:3], s[2:3], s[2:3] |
| ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_f64_s_s: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e64 v[0:1], s[2:3], s[2:3] |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e64 v[2:3], s[0:1], s[0:1] |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[0:1], v[2:3], v[0:1] |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_f64_s_s: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e64 v[0:1], s[0:1], s[0:1] |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e64 v[2:3], s[2:3], s[2:3] |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call double @llvm.minimumnum.f64(double %x, double %y) |
| ret double %result |
| } |
| |
| define float @v_minimumnum_f32_fabs_rhs(float %x, float %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f32_fabs_rhs: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, |v1| |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f32_fabs_rhs: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, |v1| |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_f32_fabs_rhs: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, |v1| |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_f32_fabs_rhs: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, |v1| |
| ; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: v_minimumnum_f32_fabs_rhs: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| |
| ; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: v_minimumnum_f32_fabs_rhs: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX9-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| |
| ; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_f32_fabs_rhs: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_f32_fabs_rhs: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX10-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| |
| ; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_f32_fabs_rhs: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| |
| ; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_f32_fabs_rhs: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX11-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| |
| ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_f32_fabs_rhs: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| |
| ; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_f32_fabs_rhs: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0 |
| ; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %fabs.y = call float @llvm.fabs.f32(float %y) |
| %result = call float @llvm.minimumnum.f32(float %x, float %fabs.y) |
| ret float %result |
| } |
| |
| define float @v_minimumnum_f32_fneg_fabs_rhs(float %x, float %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, -1.0, |v1| |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, -1.0, |v1| |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, -1.0, |v1| |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, -1.0, |v1| |
| ; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| |
| ; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| |
| ; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX10-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| |
| ; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| |
| ; GFX11-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX11-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| |
| ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1| |
| ; GFX12-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0 |
| ; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1| |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %fabs.y = call float @llvm.fabs.f32(float %y) |
| %fneg.fabs.y = fneg float %fabs.y |
| %result = call float @llvm.minimumnum.f32(float %x, float %fneg.fabs.y) |
| ret float %result |
| } |
| |
| define float @v_minimumnum_f32_fabs(float %x, float %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f32_fabs: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, |v1| |
| ; GFX7-SDAG-NEXT: v_mul_f32_e64 v0, 1.0, |v0| |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f32_fabs: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, |v0| |
| ; GFX7-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, |v1| |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_f32_fabs: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_mul_f32_e64 v1, 1.0, |v1| |
| ; GFX8-SDAG-NEXT: v_mul_f32_e64 v0, 1.0, |v0| |
| ; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_f32_fabs: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, |v0| |
| ; GFX8-GISEL-NEXT: v_mul_f32_e64 v1, 1.0, |v1| |
| ; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: v_minimumnum_f32_fabs: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| |
| ; GFX9-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0| |
| ; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: v_minimumnum_f32_fabs: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0| |
| ; GFX9-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| |
| ; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_f32_fabs: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| |
| ; GFX10-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0| |
| ; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_f32_fabs: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0| |
| ; GFX10-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| |
| ; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_f32_fabs: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| |
| ; GFX11-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0| |
| ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_f32_fabs: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0| |
| ; GFX11-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| |
| ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_f32_fabs: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| |
| ; GFX12-SDAG-NEXT: v_max_num_f32_e64 v0, |v0|, |v0| |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_f32_fabs: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_max_num_f32_e64 v0, |v0|, |v0| |
| ; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, |v1|, |v1| |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %fabs.x = call float @llvm.fabs.f32(float %x) |
| %fabs.y = call float @llvm.fabs.f32(float %y) |
| %result = call float @llvm.minimumnum.f32(float %fabs.x, float %fabs.y) |
| ret float %result |
| } |
| |
| define float @v_minimumnum_f32_fneg(float %x, float %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f32_fneg: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, -1.0, v1 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, -1.0, v0 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f32_fneg: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, -1.0, v0 |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, -1.0, v1 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_f32_fneg: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, -1.0, v1 |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, -1.0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_f32_fneg: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, -1.0, v0 |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, -1.0, v1 |
| ; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: v_minimumnum_f32_fneg: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1 |
| ; GFX9-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0 |
| ; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: v_minimumnum_f32_fneg: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 |
| ; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 |
| ; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_f32_fneg: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1 |
| ; GFX10-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0 |
| ; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_f32_fneg: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 |
| ; GFX10-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 |
| ; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_f32_fneg: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1 |
| ; GFX11-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0 |
| ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_f32_fneg: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 |
| ; GFX11-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 |
| ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_f32_fneg: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_max_num_f32_e64 v1, -v1, -v1 |
| ; GFX12-SDAG-NEXT: v_max_num_f32_e64 v0, -v0, -v0 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_f32_fneg: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_max_num_f32_e64 v0, -v0, -v0 |
| ; GFX12-GISEL-NEXT: v_max_num_f32_e64 v1, -v1, -v1 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %fneg.x = fneg float %x |
| %fneg.y = fneg float %y |
| %result = call float @llvm.minimumnum.f32(float %fneg.x, float %fneg.y) |
| ret float %result |
| } |
| |
| define half @v_minimumnum_f16_fabs_rhs(half %x, half %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f16_fabs_rhs: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1| |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f16_fabs_rhs: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, |v1| |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_f16_fabs_rhs: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_f16_fabs_rhs: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX8-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: v_minimumnum_f16_fabs_rhs: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| |
| ; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: v_minimumnum_f16_fabs_rhs: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX9-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| |
| ; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_f16_fabs_rhs: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| |
| ; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX10-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_f16_fabs_rhs: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX10-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| |
| ; GFX10-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16_fabs_rhs: |
| ; GFX11-TRUE16-SDAG: ; %bb.0: |
| ; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l| |
| ; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l |
| ; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-TRUE16-SDAG-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h |
| ; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16_fabs_rhs: |
| ; GFX11-TRUE16-GISEL: ; %bb.0: |
| ; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l |
| ; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l| |
| ; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-TRUE16-GISEL-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h |
| ; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16_fabs_rhs: |
| ; GFX11-FAKE16-SDAG: ; %bb.0: |
| ; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| |
| ; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-FAKE16-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16_fabs_rhs: |
| ; GFX11-FAKE16-GISEL: ; %bb.0: |
| ; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| |
| ; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_fabs_rhs: |
| ; GFX12-TRUE16-SDAG: ; %bb.0: |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| |
| ; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l |
| ; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h |
| ; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16_fabs_rhs: |
| ; GFX12-TRUE16-GISEL: ; %bb.0: |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l |
| ; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| |
| ; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h |
| ; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16_fabs_rhs: |
| ; GFX12-FAKE16-SDAG: ; %bb.0: |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| |
| ; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1 |
| ; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16_fabs_rhs: |
| ; GFX12-FAKE16-GISEL: ; %bb.0: |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 |
| ; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| |
| ; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 |
| ; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %fabs.y = call half @llvm.fabs.f16(half %y) |
| %result = call half @llvm.minimumnum.f16(half %x, half %fabs.y) |
| ret half %result |
| } |
| |
| define half @v_minimumnum_f16_fneg_fabs_rhs(half %x, half %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v1, -|v1| |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, -|v1| |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX8-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| |
| ; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX9-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| |
| ; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| |
| ; GFX10-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX10-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX10-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| |
| ; GFX10-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs: |
| ; GFX11-TRUE16-SDAG: ; %bb.0: |
| ; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, -|v1.l|, -|v1.l| |
| ; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l |
| ; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-TRUE16-SDAG-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h |
| ; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs: |
| ; GFX11-TRUE16-GISEL: ; %bb.0: |
| ; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l |
| ; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, -|v1.l|, -|v1.l| |
| ; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-TRUE16-GISEL-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h |
| ; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs: |
| ; GFX11-FAKE16-SDAG: ; %bb.0: |
| ; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| |
| ; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-FAKE16-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs: |
| ; GFX11-FAKE16-GISEL: ; %bb.0: |
| ; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| |
| ; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs: |
| ; GFX12-TRUE16-SDAG: ; %bb.0: |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, -|v1.l|, -|v1.l| |
| ; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l |
| ; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h |
| ; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs: |
| ; GFX12-TRUE16-GISEL: ; %bb.0: |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l |
| ; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, -|v1.l|, -|v1.l| |
| ; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h |
| ; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs: |
| ; GFX12-FAKE16-SDAG: ; %bb.0: |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1| |
| ; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1 |
| ; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs: |
| ; GFX12-FAKE16-GISEL: ; %bb.0: |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 |
| ; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1| |
| ; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 |
| ; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %fabs.y = call half @llvm.fabs.f16(half %y) |
| %fneg.fabs.y = fneg half %fabs.y |
| %result = call half @llvm.minimumnum.f16(half %x, half %fneg.fabs.y) |
| ret half %result |
| } |
| |
| define half @v_minimumnum_f16_fabs(half %x, half %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f16_fabs: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1| |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f16_fabs: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, |v0| |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, |v1| |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_f16_fabs: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| |
| ; GFX8-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0| |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_f16_fabs: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0| |
| ; GFX8-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: v_minimumnum_f16_fabs: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| |
| ; GFX9-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0| |
| ; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: v_minimumnum_f16_fabs: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0| |
| ; GFX9-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| |
| ; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_f16_fabs: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| |
| ; GFX10-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0| |
| ; GFX10-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_f16_fabs: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0| |
| ; GFX10-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| |
| ; GFX10-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16_fabs: |
| ; GFX11-TRUE16-SDAG: ; %bb.0: |
| ; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l| |
| ; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.l, |v0.l|, |v0.l| |
| ; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-TRUE16-SDAG-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h |
| ; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16_fabs: |
| ; GFX11-TRUE16-GISEL: ; %bb.0: |
| ; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.l, |v0.l|, |v0.l| |
| ; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, |v1.l|, |v1.l| |
| ; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-TRUE16-GISEL-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h |
| ; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16_fabs: |
| ; GFX11-FAKE16-SDAG: ; %bb.0: |
| ; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| |
| ; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0| |
| ; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-FAKE16-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16_fabs: |
| ; GFX11-FAKE16-GISEL: ; %bb.0: |
| ; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0| |
| ; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| |
| ; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_fabs: |
| ; GFX12-TRUE16-SDAG: ; %bb.0: |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| |
| ; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, |v0.l|, |v0.l| |
| ; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h |
| ; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16_fabs: |
| ; GFX12-TRUE16-GISEL: ; %bb.0: |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, |v0.l|, |v0.l| |
| ; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l| |
| ; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h |
| ; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16_fabs: |
| ; GFX12-FAKE16-SDAG: ; %bb.0: |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| |
| ; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, |v0|, |v0| |
| ; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1 |
| ; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16_fabs: |
| ; GFX12-FAKE16-GISEL: ; %bb.0: |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, |v0|, |v0| |
| ; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, |v1|, |v1| |
| ; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 |
| ; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %fabs.x = call half @llvm.fabs.f16(half %x) |
| %fabs.y = call half @llvm.fabs.f16(half %y) |
| %result = call half @llvm.minimumnum.f16(half %fabs.x, half %fabs.y) |
| ret half %result |
| } |
| |
| define half @v_minimumnum_f16_fneg(half %x, half %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f16_fneg: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e64 v1, -v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f16_fneg: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, -v1 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_f16_fneg: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_f16_fneg: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 |
| ; GFX8-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: v_minimumnum_f16_fneg: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1 |
| ; GFX9-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0 |
| ; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: v_minimumnum_f16_fneg: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 |
| ; GFX9-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1 |
| ; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_f16_fneg: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1 |
| ; GFX10-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0 |
| ; GFX10-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_f16_fneg: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 |
| ; GFX10-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1 |
| ; GFX10-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-TRUE16-SDAG-LABEL: v_minimumnum_f16_fneg: |
| ; GFX11-TRUE16-SDAG: ; %bb.0: |
| ; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.h, -v1.l, -v1.l |
| ; GFX11-TRUE16-SDAG-NEXT: v_max_f16_e64 v0.l, -v0.l, -v0.l |
| ; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-TRUE16-SDAG-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h |
| ; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-TRUE16-GISEL-LABEL: v_minimumnum_f16_fneg: |
| ; GFX11-TRUE16-GISEL: ; %bb.0: |
| ; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.l, -v0.l, -v0.l |
| ; GFX11-TRUE16-GISEL-NEXT: v_max_f16_e64 v0.h, -v1.l, -v1.l |
| ; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-TRUE16-GISEL-NEXT: v_min_f16_e32 v0.l, v0.l, v0.h |
| ; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-FAKE16-SDAG-LABEL: v_minimumnum_f16_fneg: |
| ; GFX11-FAKE16-SDAG: ; %bb.0: |
| ; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1 |
| ; GFX11-FAKE16-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0 |
| ; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-FAKE16-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-FAKE16-GISEL-LABEL: v_minimumnum_f16_fneg: |
| ; GFX11-FAKE16-GISEL: ; %bb.0: |
| ; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 |
| ; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1 |
| ; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_fneg: |
| ; GFX12-TRUE16-SDAG: ; %bb.0: |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, -v1.l, -v1.l |
| ; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, -v0.l, -v0.l |
| ; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h |
| ; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16_fneg: |
| ; GFX12-TRUE16-GISEL: ; %bb.0: |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, -v0.l, -v0.l |
| ; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, -v1.l, -v1.l |
| ; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h |
| ; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16_fneg: |
| ; GFX12-FAKE16-SDAG: ; %bb.0: |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, -v1, -v1 |
| ; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, -v0, -v0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1 |
| ; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16_fneg: |
| ; GFX12-FAKE16-GISEL: ; %bb.0: |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, -v0, -v0 |
| ; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, -v1, -v1 |
| ; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 |
| ; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %fneg.x = fneg half %x |
| %fneg.y = fneg half %y |
| %result = call half @llvm.minimumnum.f16(half %fneg.x, half %fneg.y) |
| ret half %result |
| } |
| |
| define double @v_minimumnum_f64_fneg(double %x, double %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f64_fneg: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] |
| ; GFX7-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] |
| ; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f64_fneg: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] |
| ; GFX7-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] |
| ; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_f64_fneg: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] |
| ; GFX8-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] |
| ; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_f64_fneg: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] |
| ; GFX8-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] |
| ; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: v_minimumnum_f64_fneg: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] |
| ; GFX9-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] |
| ; GFX9-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: v_minimumnum_f64_fneg: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] |
| ; GFX9-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] |
| ; GFX9-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_f64_fneg: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] |
| ; GFX10-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] |
| ; GFX10-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_f64_fneg: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] |
| ; GFX10-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] |
| ; GFX10-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_f64_fneg: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] |
| ; GFX11-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] |
| ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_f64_fneg: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] |
| ; GFX11-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] |
| ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_f64_fneg: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e64 v[2:3], -v[2:3], -v[2:3] |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e64 v[0:1], -v[0:1], -v[0:1] |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_f64_fneg: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e64 v[0:1], -v[0:1], -v[0:1] |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e64 v[2:3], -v[2:3], -v[2:3] |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %fneg.x = fneg double %x |
| %fneg.y = fneg double %y |
| %result = call double @llvm.minimumnum.f64(double %fneg.x, double %fneg.y) |
| ret double %result |
| } |
| |
| define <2 x half> @v_minimumnum_v2f16(<2 x half> %x, <2 x half> %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_v2f16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v2 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v3 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_v2f16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_v2f16: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v2, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_v2f16: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v0, v0 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v3, v1, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v2, v3 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_minimumnum_v2f16: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v1 |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_minimumnum_v2f16: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v0, v0, v1 |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-SDAG-LABEL: v_minimumnum_v2f16: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX950-SDAG-NEXT: s_nop 0 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v1 |
| ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-GISEL-LABEL: v_minimumnum_v2f16: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX950-GISEL-NEXT: s_nop 0 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v0, v0, v1 |
| ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_v2f16: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v0, v0, v1 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_v2f16: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v0, v0, v1 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_v2f16: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, v1 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_v2f16: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v1 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_v2f16: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v1 |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_v2f16: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v1 |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> %x, <2 x half> %y) |
| ret <2 x half> %result |
| } |
| |
| define <2 x half> @v_minimumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_v2f16_nnan: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v2 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v3 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_v2f16_nnan: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_v2f16_nnan: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_v2f16_nnan: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v0, v1 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_v2f16_nnan: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_pk_min_f16 v0, v0, v1 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_v2f16_nnan: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_pk_min_f16 v0, v0, v1 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_v2f16_nnan: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_pk_min_f16 v0, v0, v1 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_minimumnum_v2f16_nnan: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v1 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = call nnan <2 x half> @llvm.minimumnum.v2f16(<2 x half> %x, <2 x half> %y) |
| ret <2 x half> %result |
| } |
| |
| define <3 x half> @v_minimumnum_v3f16(<3 x half> %x, <3 x half> %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_v3f16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v3 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v4 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v5 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_v3f16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v3 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v4 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v5 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v4 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_v3f16: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v4, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v5, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v2 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v3 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_v3f16: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v0, v0 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v5, v2, v2 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v4, v5 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v3, v3 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v1, v1, v2 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: v_minimumnum_v3f16: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX9-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX9-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX9-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX9-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX9-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_minimumnum_v3f16: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v2, v3, v3 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v1, v1, v2 |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-GISEL-LABEL: v_minimumnum_v3f16: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v3, v3 |
| ; GFX950-GISEL-NEXT: s_nop 0 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v1, v1, v2 |
| ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_v3f16: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_v3f16: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_v3f16: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_v3f16: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_v3f16: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v2 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v3 |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_v3f16: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v2 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v3 |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call <3 x half> @llvm.minimumnum.v3f16(<3 x half> %x, <3 x half> %y) |
| ret <3 x half> %result |
| } |
| |
| define <3 x half> @v_minimumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_v3f16_nnan: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v3 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v4 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v5 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_v3f16_nnan: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v3 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v4 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v5 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v4 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_v3f16_nnan: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v2 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v3 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_v3f16_nnan: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v0, v2 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v1, v1, v3 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: v_minimumnum_v3f16_nnan: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX9-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: v_minimumnum_v3f16_nnan: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX9-GISEL-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_v3f16_nnan: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX10-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_v3f16_nnan: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX11-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_minimumnum_v3f16_nnan: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2 |
| ; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = call nnan <3 x half> @llvm.minimumnum.v3f16(<3 x half> %x, <3 x half> %y) |
| ret <3 x half> %result |
| } |
| |
| define <4 x half> @v_minimumnum_v4f16(<4 x half> %x, <4 x half> %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_v4f16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v4 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v5 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v6 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v7 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_v4f16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v4 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v6 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v5 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v7 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v4 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v5 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_v4f16: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v4, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v5, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v5, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v6, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v3 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v2 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v5 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v4 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_v4f16: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v0, v0 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v5, v2, v2 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v4, v5 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v2, v1, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v5, v3, v3 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v2, v5 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_minimumnum_v4f16: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v3, v3 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v2 |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_minimumnum_v4f16: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v2, v3, v3 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v1, v1, v2 |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-SDAG-LABEL: v_minimumnum_v4f16: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v3, v3 |
| ; GFX950-SDAG-NEXT: s_nop 0 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v2 |
| ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-GISEL-LABEL: v_minimumnum_v4f16: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v3, v3 |
| ; GFX950-GISEL-NEXT: s_nop 0 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v1, v1, v2 |
| ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_v4f16: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_v4f16: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_v4f16: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_v4f16: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_v4f16: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v2 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v3 |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_v4f16: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v2 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v3 |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call <4 x half> @llvm.minimumnum.v4f16(<4 x half> %x, <4 x half> %y) |
| ret <4 x half> %result |
| } |
| |
| define <4 x half> @v_minimumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_v4f16_nnan: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v4 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v5 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v6 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v7 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_v4f16_nnan: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v4 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v6 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v5 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v7 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v4 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v5 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_v4f16_nnan: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v3 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v2 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v5 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v4 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_v4f16_nnan: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v0, v2 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v1, v3 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_v4f16_nnan: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX9-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_v4f16_nnan: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX10-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_v4f16_nnan: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX11-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_minimumnum_v4f16_nnan: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2 |
| ; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = call nnan <4 x half> @llvm.minimumnum.v4f16(<4 x half> %x, <4 x half> %y) |
| ret <4 x half> %result |
| } |
| |
| define <6 x half> @v_minimumnum_v6f16(<6 x half> %x, <6 x half> %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_v6f16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v11, v11 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v10, v10 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v9, v9 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v8, v8 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v11, v11 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v10, v10 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v8, v8 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v9, v9 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v6 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v7 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v8 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v9 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v4, v4, v10 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v5, v5, v11 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_v6f16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v6 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v7 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v6 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v8 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v7 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v9 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v6 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v10 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v7 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v11 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v4, v4, v6 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v5, v5, v7 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v5, v5 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_v6f16: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v6, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v7, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v6, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v7, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v8, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v7, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v8, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v9, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v5 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v4 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v2, v2, v5 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v4 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v3 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v8 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v7 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v2, v2, v6 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_v6f16: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v6, v0, v0 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v7, v3, v3 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v6, v6, v7 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v3, v1, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v7, v4, v4 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v3, v7 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v2, v2 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v7, v5, v5 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v4, v7 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v6, v0 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v2, v4, v2 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_minimumnum_v6f16: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v3 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v4, v4 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v5, v5 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v2, v2, v3 |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_minimumnum_v6f16: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v0, v0, v3 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v3, v4, v4 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v3, v5, v5 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v2, v2, v3 |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-SDAG-LABEL: v_minimumnum_v6f16: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v3 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v4, v4 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v5, v5 |
| ; GFX950-SDAG-NEXT: s_nop 0 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v2, v2, v3 |
| ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-GISEL-LABEL: v_minimumnum_v6f16: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v0, v0, v3 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v3, v4, v4 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v3, v5, v5 |
| ; GFX950-GISEL-NEXT: s_nop 0 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v2, v2, v3 |
| ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_v6f16: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v0, v0, v3 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v1, v1, v4 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v2, v2, v5 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_v6f16: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v0, v0, v3 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v1, v1, v4 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v2, v2, v5 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_v6f16: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, v3 |
| ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v1, v1, v4 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v2, v2, v5 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_v6f16: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v3 |
| ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v1, v1, v4 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v2, v2, v5 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_v6f16: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v3 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v4 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v2, v2, v5 |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_v6f16: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v3 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v4 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v2, v2, v5 |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call <6 x half> @llvm.minimumnum.v6f16(<6 x half> %x, <6 x half> %y) |
| ret <6 x half> %result |
| } |
| |
| define <8 x half> @v_minimumnum_v8f16(<8 x half> %x, <8 x half> %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_v8f16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v15, v15 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v14, v14 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v13, v13 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v12, v12 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v11, v11 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v10, v10 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v8, v8 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v9, v9 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v15, v15 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v14, v14 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v13, v13 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v12, v12 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v8, v8 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v9, v9 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v10, v10 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v11, v11 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v8 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v9 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v10 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v11 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v4, v4, v12 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v5, v5, v13 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v6, v6, v14 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v7, v7, v15 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_v8f16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v8 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v9 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v8 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v10 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v9 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v11 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v8 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v12 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v9 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v13 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v4, v4, v8 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v6 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v14 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v5, v5, v9 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v7 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v15 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v6, v6, v8 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v7, v7, v9 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v5, v5 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v6, v6 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v7, v7 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_v8f16: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v8, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v9, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v8, v9, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v9, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v10, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v10, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v11, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v10, v11, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v11, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v12, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v7, v7, v7 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v6, v6, v6 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v5 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v4 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v11, v12, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v3, v3, v7 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v2, v2, v6 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v5 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v4 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v11 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v10 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v2, v2, v9 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v3, v3, v8 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_v8f16: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v8, v0, v0 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v9, v4, v4 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v8, v8, v9 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v4, v1, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v9, v5, v5 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v4, v9 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v5, v2, v2 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v9, v6, v6 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v6, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v5, v5, v9 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v2, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v6, v3, v3 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v9, v7, v7 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v6, v6, v9 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v3, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v8, v0 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v4, v1 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v2, v5, v2 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v3, v6, v3 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_minimumnum_v8f16: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v4 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v5, v5 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v4 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v6, v6 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v2, v2, v4 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v7, v7 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v3, v3, v4 |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_minimumnum_v8f16: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v0, v0, v4 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v4, v5, v5 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v1, v1, v4 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v4, v6, v6 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v2, v2, v4 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v4, v7, v7 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v3, v3, v4 |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-SDAG-LABEL: v_minimumnum_v8f16: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v4 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v5, v5 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v4 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v6, v6 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v2, v2, v4 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v7, v7 |
| ; GFX950-SDAG-NEXT: s_nop 0 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v3, v3, v4 |
| ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-GISEL-LABEL: v_minimumnum_v8f16: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v0, v0, v4 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v5, v5 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v1, v1, v4 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v6, v6 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v2, v2, v4 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v7, v7 |
| ; GFX950-GISEL-NEXT: s_nop 0 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v3, v3, v4 |
| ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_v8f16: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v0, v0, v4 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v1, v1, v5 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v2, v2, v6 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v3, v3, v7 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_v8f16: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v0, v0, v4 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v1, v1, v5 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v2, v2, v6 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v3, v3, v7 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_v8f16: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, v4 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v1, v1, v5 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v2, v2, v6 |
| ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v3, v3, v7 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_v8f16: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v4 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v1, v1, v5 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v2, v2, v6 |
| ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v3, v3, v7 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_v8f16: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v4 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v5 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v2, v2, v6 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v3, v3, v7 |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_v8f16: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v4 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v5 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v2, v2, v6 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v3, v3, v7 |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> %x, <8 x half> %y) |
| ret <8 x half> %result |
| } |
| |
| define <16 x half> @v_minimumnum_v16f16(<16 x half> %x, <16 x half> %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_v16f16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v16 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v16 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v17 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v17, v20 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v16 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v18 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v18, v21 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v17, v17 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v18, v18 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v16 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v19 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v19, v22 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v20, v23 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v4, v4, v17 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v19, v19 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v5, v5, v18 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v8, v8 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v16 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v16, off, s[0:3], s32 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v6, v6, v19 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v17, v24 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v9, v9 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v18, v25 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v10, v10 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v19, v26 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v20, v20 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v8, v8 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v17, v17 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v9, v9 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v18, v18 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v10, v10 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v19, v19 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v7, v7, v20 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v11, v11 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v20, v27 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v8, v8, v17 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v9, v9, v18 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v10, v10, v19 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v12, v12 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v17, v28 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v13, v13 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v18, v29 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v14, v14 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v19, v30 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v15, v15 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v11, v11 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v20, v20 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v12, v12 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v17, v17 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v13, v13 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v18, v18 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v14, v14 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v19, v19 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v15, v15 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v11, v11, v20 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v12, v12, v17 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v13, v13, v18 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v14, v14, v19 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v16 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v15, v15, v16 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_v16f16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v16 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v16 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v17 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v17, v20 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v16 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v18 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v18, v21 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v6 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v4, v4, v17 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v16 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v19 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v19, v22 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v5, v5, v18 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v7 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v16 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v16, off, s[0:3], s32 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v6, v6, v19 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v17, v23 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v8 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v18, v24 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v9 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v19, v25 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v10, v10 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v20, v26 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v7, v7, v17 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v8, v8, v18 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v9, v9, v19 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v10, v10, v20 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v11, v11 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v17, v27 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v12, v12 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v18, v28 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v13, v13 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v19, v29 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v14, v14 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v20, v30 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v15, v15 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v11, v11, v17 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v12, v12, v18 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v13, v13, v19 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v14, v14, v20 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v5, v5 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v6, v6 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v7, v7 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v8, v8 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v9, v9 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v10, v10 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v11, v11 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v12, v12 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v13, v13 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v14, v14 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v16 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v15, v15, v16 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v15, v15 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_v16f16: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v16, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v17, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v16, v17, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v17, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v18, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v17, v18, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v18, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v19, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v18, v19, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v19, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v20, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v19, v20, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v20, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v21, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v20, v21, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v21, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v22, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v21, v22, v21 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v22, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v23, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v22, v23, v22 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v23, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v24, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v15, v15, v15 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v7, v7, v7 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v14, v14, v14 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v6, v6, v6 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v13, v13, v13 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v5 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v12, v12, v12 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v4 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v11, v11, v11 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v10, v10, v10 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v9, v9, v9 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v8, v8, v8 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v23, v24, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v7, v7, v15 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v6, v6, v14 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v5, v5, v13 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v4, v4, v12 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v3, v3, v11 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v2, v2, v10 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v9 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v8 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v23 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v22 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v2, v2, v21 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v3, v3, v20 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v4, v4, v19 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v5, v5, v18 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v6, v6, v17 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v7, v7, v16 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_v16f16: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v16, v0, v0 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v8, v8 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v8, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v16, v16, v17 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v8, v1, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v9, v9 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v9, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v8, v8, v17 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v1, v1, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v9, v2, v2 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v10, v10 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v10, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v9, v9, v17 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v2, v2, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v10, v3, v3 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v11, v11 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v11, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v10, v10, v17 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v3, v3, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v11, v4, v4 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v12, v12 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v12, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v11, v11, v17 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v4, v4, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v12, v5, v5 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v13, v13 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v13, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v12, v12, v17 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v5, v5, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v13, v6, v6 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v14, v14 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v6, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v14, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v13, v13, v17 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v6, v6, v14 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v14, v7, v7 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v15, v15 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v15, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v14, v14, v17 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v7, v7, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v16, v0 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v8, v1 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v2, v9, v2 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v3, v10, v3 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v4, v11, v4 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v5, v12, v5 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v6, v13, v6 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v7, v14, v7 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_minimumnum_v16f16: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v8 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v9, v9 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v8 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v10, v10 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v2, v2, v8 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v11, v11 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v3, v3, v8 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v12, v12 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v4, v4, v8 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v13, v13 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v5, v5, v8 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v14, v14 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v6, v6, v8 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v15, v15 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v7, v7, v8 |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_minimumnum_v16f16: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v8, v8, v8 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v0, v0, v8 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v8, v9, v9 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v1, v1, v8 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v8, v10, v10 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v2, v2, v8 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v8, v11, v11 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v3, v3, v8 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v8, v12, v12 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v4, v4, v8 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v8, v13, v13 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v5, v5, v8 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v8, v14, v14 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v6, v6, v8 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v8, v15, v15 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v7, v7, v8 |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-SDAG-LABEL: v_minimumnum_v16f16: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v8 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v9, v9 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v8 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v10, v10 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v2, v2, v8 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v11, v11 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v3, v3, v8 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v12, v12 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v4, v4, v8 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v13, v13 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v5, v5, v8 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v14, v14 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v6, v6, v8 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v15, v15 |
| ; GFX950-SDAG-NEXT: s_nop 0 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v7, v7, v8 |
| ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-GISEL-LABEL: v_minimumnum_v16f16: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v8, v8, v8 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v0, v0, v8 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v8, v9, v9 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v1, v1, v8 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v8, v10, v10 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v2, v2, v8 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v8, v11, v11 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v3, v3, v8 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v8, v12, v12 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v4, v4, v8 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v8, v13, v13 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v5, v5, v8 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v8, v14, v14 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v6, v6, v8 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v8, v15, v15 |
| ; GFX950-GISEL-NEXT: s_nop 0 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v7, v7, v8 |
| ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_v16f16: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v9, v9, v9 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v10, v10, v10 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v0, v0, v8 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v8, v11, v11 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v1, v1, v9 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v2, v2, v10 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v9, v12, v12 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v10, v13, v13 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v11, v14, v14 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v12, v15, v15 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v3, v3, v8 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v4, v4, v9 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v5, v5, v10 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v6, v6, v11 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v7, v7, v12 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_v16f16: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v8, v8, v8 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v9, v9, v9 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v10, v10, v10 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v0, v0, v8 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v1, v1, v9 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v8, v11, v11 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v2, v2, v10 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v9, v12, v12 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v10, v13, v13 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v11, v14, v14 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v12, v15, v15 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v3, v3, v8 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v4, v4, v9 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v5, v5, v10 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v6, v6, v11 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v7, v7, v12 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_v16f16: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v9, v9, v9 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v10, v10, v10 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, v8 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v8, v11, v11 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v1, v1, v9 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v2, v2, v10 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v9, v12, v12 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v10, v13, v13 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v11, v14, v14 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v12, v15, v15 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v3, v3, v8 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v4, v4, v9 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v5, v5, v10 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v6, v6, v11 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v7, v7, v12 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_v16f16: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v8, v8, v8 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v9, v9, v9 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v10, v10, v10 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v8 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v1, v1, v9 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v8, v11, v11 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v2, v2, v10 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v9, v12, v12 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v10, v13, v13 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v11, v14, v14 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v12, v15, v15 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v3, v3, v8 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v4, v4, v9 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v5, v5, v10 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v6, v6, v11 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v7, v7, v12 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_v16f16: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v8, v8, v8 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v9, v9, v9 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v10, v10, v10 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v8 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v8, v11, v11 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v9 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v2, v2, v10 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v9, v12, v12 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v10, v13, v13 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v11, v14, v14 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v12, v15, v15 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v3, v3, v8 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v4, v4, v9 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v5, v5, v10 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v6, v6, v11 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v7, v7, v12 |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_v16f16: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v8, v8, v8 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v9, v9, v9 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v10, v10, v10 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v8 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v9 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v8, v11, v11 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v2, v2, v10 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v9, v12, v12 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v10, v13, v13 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v11, v14, v14 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v12, v15, v15 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v3, v3, v8 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v4, v4, v9 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v5, v5, v10 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v6, v6, v11 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v7, v7, v12 |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> %x, <16 x half> %y) |
| ret <16 x half> %result |
| } |
| |
| define <32 x half> @v_minimumnum_v32f16(<32 x half> %x, <32 x half> %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_v32f16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v8, v8 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v9, v9 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v10, v10 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v11, v11 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v8, v8 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v9, v9 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v10, v10 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v11, v11 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v12, v12 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v13, v13 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v14, v14 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v15, v15 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v12, v12 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v13, v13 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v14, v14 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v15, v15 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v16, v16 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v17, v17 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v18, v18 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v19, v19 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v16, v16 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v17, v17 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v18, v18 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v19, v19 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v20, v20 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v21, v21 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v22, v22 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v23, v23 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v20, v20 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v21, v21 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v22, v22 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v23, v23 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v24, v24 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v25, v25 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v26, v26 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v27, v27 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v24, v24 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v25, v25 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v26, v26 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v27, v27 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v28, v28 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v29, v29 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v30, v30 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:128 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v28, v28 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v29, v29 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v30, v30 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:8 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v32, v32 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v32, v32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v4, v4, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v5, v5, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v6, v6, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v7, v7, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v8, v8, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:40 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v9, v9, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v10, v10, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:48 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v11, v11, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v12, v12, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:56 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v13, v13, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v14, v14, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v15, v15, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v16, v16, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:72 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v17, v17, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v18, v18, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:80 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v19, v19, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v20, v20, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:88 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v21, v21, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v22, v22, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:96 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v23, v23, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v24, v24, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:104 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v25, v25, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v26, v26, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:112 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v27, v27, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v28, v28, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:120 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v29, v29, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:124 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v30, v30, v31 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v31, v31, v32 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_v32f16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v6, v6 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v7, v7 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v8, v8 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v9, v9 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v10, v10 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v11, v11 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v12, v12 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v13, v13 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v14, v14 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v15, v15 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v16, v16 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v17, v17 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v18, v18 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v19, v19 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v20, v20 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v21, v21 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v22, v22 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v23, v23 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v24, v24 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v25, v25 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v26, v26 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v27, v27 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v28, v28 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v29, v29 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v30, v30 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:128 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:8 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v32, v32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:16 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v4, v4, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v5, v5, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v5, v5 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v6, v6, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:32 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v6, v6 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v7, v7, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v7, v7 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v8, v8, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:40 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v8, v8 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v9, v9, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v9, v9 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v10, v10, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:48 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v10, v10 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v11, v11, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v11, v11 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v12, v12, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:56 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v12, v12 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v13, v13, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v13, v13 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v14, v14, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:64 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v14, v14 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v15, v15, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v15, v15 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v16, v16, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:72 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v16, v16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v17, v17, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v17, v17 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v18, v18, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:80 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v18, v18 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v19, v19, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v19, v19 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v20, v20, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:88 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v20, v20 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v21, v21, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v21, v21 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v22, v22, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:96 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v22, v22 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v23, v23, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v23, v23 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v24, v24, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:104 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v24, v24 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v25, v25, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v25, v25 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v26, v26, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:112 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v26, v26 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v27, v27, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v27, v27 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v28, v28, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:120 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v28, v28 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v29, v29, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:124 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v29, v29 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v30, v30, v31 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v30, v30 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v31, v31, v32 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v31, v31 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_v32f16: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill |
| ; GFX8-SDAG-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill |
| ; GFX8-SDAG-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill |
| ; GFX8-SDAG-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill |
| ; GFX8-SDAG-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill |
| ; GFX8-SDAG-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill |
| ; GFX8-SDAG-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill |
| ; GFX8-SDAG-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill |
| ; GFX8-SDAG-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill |
| ; GFX8-SDAG-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill |
| ; GFX8-SDAG-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill |
| ; GFX8-SDAG-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v38, v27, v27 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v39, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v48, v26, v26 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v49, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v50, v25, v25 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v51, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v40, v22, v22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v41, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v58, v17, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v59, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v17, v17, v17 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v52, v24, v24 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v53, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v54, v23, v23 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v55, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v42, v21, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v43, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v44, v20, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v45, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v46, v19, v19 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v47, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v56, v18, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v57, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v38, v39, v38 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v39, v49, v48 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v48, v51, v50 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v51, v41, v40 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v40, v59, v58 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v17 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v49, v53, v52 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v50, v55, v54 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v52, v43, v42 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v53, v45, v44 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v54, v47, v46 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v55, v57, v56 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v40 |
| ; GFX8-SDAG-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload |
| ; GFX8-SDAG-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload |
| ; GFX8-SDAG-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload |
| ; GFX8-SDAG-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload |
| ; GFX8-SDAG-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload |
| ; GFX8-SDAG-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload |
| ; GFX8-SDAG-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload |
| ; GFX8-SDAG-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload |
| ; GFX8-SDAG-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload |
| ; GFX8-SDAG-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload |
| ; GFX8-SDAG-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload |
| ; GFX8-SDAG-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload |
| ; GFX8-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v32, v30, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v33, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v34, v29, v29 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v35, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v36, v28, v28 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v37, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v32, v33, v32 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v33, v16, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v34, v35, v34 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v35, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v36, v37, v36 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v37, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v15, v15, v15 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v33, v35, v33 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v30, v30, v30 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v14, v14, v14 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v29, v29, v29 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v13, v13, v13 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v28, v28, v28 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v12, v12, v12 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v27, v27, v27 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v11, v11, v11 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v26, v26, v26 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v10, v10, v10 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v25, v25, v25 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v9, v9, v9 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v24, v24, v24 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v8, v8, v8 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v23, v23, v23 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v7, v7, v7 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v22, v22, v22 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v6, v6, v6 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v21, v21, v21 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v5, v5, v5 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v20, v20, v20 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v4, v4, v4 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v19, v19, v19 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v3, v3 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v18, v18, v18 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v2, v2 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v16, v16, v16 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v14, v14, v30 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v13, v13, v29 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v12, v12, v28 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v11, v11, v27 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v10, v10, v26 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v9, v9, v25 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v8, v8, v24 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v7, v7, v23 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v6, v6, v22 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v5, v5, v21 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v4, v4, v20 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v3, v3, v19 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v2, v2, v18 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v16 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v33 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v2, v2, v55 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v3, v3, v54 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v4, v4, v53 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v5, v5, v52 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v6, v6, v51 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v7, v7, v50 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v8, v8, v49 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v9, v9, v48 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v10, v10, v39 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v11, v11, v38 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v12, v12, v36 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v13, v13, v34 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v14, v14, v32 |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f16_sdwa v35, v31, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_max_f16_e32 v31, v31, v31 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v35, v37, v35 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v15, v15, v31 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v15, v15, v35 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_v32f16: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v31, v0, v0 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v16, v16 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v16, v16, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v31, v31, v32 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v16, v1, v1 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v17, v17 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v17, v17, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v16, v16, v32 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v1, v1, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v17, v2, v2 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v18, v18 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v18, v18, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v17, v17, v32 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v2, v2, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v18, v3, v3 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v19, v19 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v19, v19, v19 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v18, v18, v32 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v3, v3, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v19, v4, v4 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v20, v20 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v20, v20, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v19, v19, v32 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v4, v4, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v20, v5, v5 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v21, v21 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v21, v21, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v20, v20, v32 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v5, v5, v21 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v21, v6, v6 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v22, v22 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v6, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v22, v22, v22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v21, v21, v32 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v6, v6, v22 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v22, v7, v7 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v23, v23 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v23, v23, v23 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v22, v22, v32 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v7, v7, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v23, v8, v8 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v24, v24 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v8, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v24, v24, v24 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v23, v23, v32 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v8, v8, v24 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v24, v9, v9 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v25, v25 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v9, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v25, v25, v25 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v24, v24, v32 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v9, v9, v25 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v25, v10, v10 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v26, v26 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v10, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v26, v26, v26 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v25, v25, v32 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v10, v10, v26 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v26, v11, v11 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v27, v27 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v11, v11, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v27, v27, v27 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v26, v26, v32 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v11, v11, v27 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v27, v12, v12 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v28, v28 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v12, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v28, v28, v28 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v27, v27, v32 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v12, v12, v28 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v28, v13, v13 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v29, v29 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v13, v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v29, v29, v29 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v28, v28, v32 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v13, v13, v29 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v29, v14, v14 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v30, v30 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v14, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v30, v30, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v14, v14, v30 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: buffer_load_dword v30, off, s[0:3], s32 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v29, v29, v32 |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v32, v15, v15 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v15, v15, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v31, v0 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v16, v1 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v2, v17, v2 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v3, v18, v3 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v4, v19, v4 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v5, v20, v5 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v6, v21, v6 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v7, v22, v7 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v8, v23, v8 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v9, v24, v9 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v10, v25, v10 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v11, v26, v11 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v12, v27, v12 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v13, v28, v13 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v14, v29, v14 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: v_max_f16_e32 v33, v30, v30 |
| ; GFX8-GISEL-NEXT: v_max_f16_sdwa v30, v30, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v32, v32, v33 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v15, v15, v30 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v15, v32, v15 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_minimumnum_v32f16: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v16, v16 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v16 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v17, v17 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v16 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v18, v18 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v2, v2, v16 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v19, v19 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v3, v3, v16 |
| ; GFX900-SDAG-NEXT: buffer_load_dword v16, off, s[0:3], s32 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v17, v20, v20 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v18, v21, v21 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v19, v22, v22 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v20, v23, v23 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v21, v24, v24 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v22, v25, v25 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v9, v9, v9 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v23, v26, v26 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v10, v10, v10 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v24, v27, v27 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v11, v11, v11 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v25, v28, v28 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v12, v12, v12 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v26, v29, v29 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v13, v13, v13 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v27, v30, v30 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v14, v14, v14 |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v15, v15, v15 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v4, v4, v17 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v5, v5, v18 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v6, v6, v19 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v7, v7, v20 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v8, v8, v21 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v9, v9, v22 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v10, v10, v23 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v11, v11, v24 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v12, v12, v25 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v13, v13, v26 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v14, v14, v27 |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX900-SDAG-NEXT: v_pk_max_f16 v16, v16, v16 |
| ; GFX900-SDAG-NEXT: v_pk_min_f16 v15, v15, v16 |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_minimumnum_v32f16: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v16, v16 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v0, v0, v16 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v17, v17 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v1, v1, v16 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v18, v18 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v2, v2, v16 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v19, v19 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v3, v3, v16 |
| ; GFX900-GISEL-NEXT: buffer_load_dword v16, off, s[0:3], s32 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v17, v20, v20 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v18, v21, v21 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v19, v22, v22 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v20, v23, v23 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v8, v8, v8 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v21, v24, v24 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v9, v9, v9 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v22, v25, v25 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v10, v10, v10 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v23, v26, v26 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v11, v11, v11 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v24, v27, v27 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v12, v12, v12 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v25, v28, v28 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v13, v13, v13 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v26, v29, v29 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v14, v14, v14 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v27, v30, v30 |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v15, v15, v15 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v4, v4, v17 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v5, v5, v18 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v6, v6, v19 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v7, v7, v20 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v8, v8, v21 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v9, v9, v22 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v10, v10, v23 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v11, v11, v24 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v12, v12, v25 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v13, v13, v26 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v14, v14, v27 |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX900-GISEL-NEXT: v_pk_max_f16 v16, v16, v16 |
| ; GFX900-GISEL-NEXT: v_pk_min_f16 v15, v15, v16 |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-SDAG-LABEL: v_minimumnum_v32f16: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-SDAG-NEXT: scratch_load_dword v31, off, s32 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v16, v16, v16 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v17, v17, v17 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v18, v18, v18 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v19, v19, v19 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v20, v20, v20 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v21, v21, v21 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v22, v22, v22 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v23, v23, v23 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v24, v24, v24 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v25, v25, v25 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v9, v9, v9 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v26, v26, v26 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v10, v10, v10 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v27, v27, v27 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v11, v11, v11 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v28, v28, v28 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v12, v12, v12 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v29, v29, v29 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v13, v13, v13 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v30, v30, v30 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v14, v14, v14 |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v15, v15, v15 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v16 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v17 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v2, v2, v18 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v3, v3, v19 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v4, v4, v20 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v5, v5, v21 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v6, v6, v22 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v7, v7, v23 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v8, v8, v24 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v9, v9, v25 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v10, v10, v26 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v11, v11, v27 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v12, v12, v28 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v13, v13, v29 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v14, v14, v30 |
| ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX950-SDAG-NEXT: v_pk_max_f16 v16, v31, v31 |
| ; GFX950-SDAG-NEXT: s_nop 0 |
| ; GFX950-SDAG-NEXT: v_pk_min_f16 v15, v15, v16 |
| ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-GISEL-LABEL: v_minimumnum_v32f16: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-GISEL-NEXT: scratch_load_dword v31, off, s32 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v16, v16, v16 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v17, v17, v17 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v18, v18, v18 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v19, v19, v19 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v20, v20, v20 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v21, v21, v21 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v22, v22, v22 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v23, v23, v23 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v8, v8, v8 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v24, v24, v24 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v9, v9, v9 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v25, v25, v25 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v10, v10, v10 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v26, v26, v26 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v11, v11, v11 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v27, v27, v27 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v12, v12, v12 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v28, v28, v28 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v13, v13, v13 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v29, v29, v29 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v14, v14, v14 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v30, v30, v30 |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v15, v15, v15 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v0, v0, v16 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v1, v1, v17 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v2, v2, v18 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v3, v3, v19 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v4, v4, v20 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v5, v5, v21 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v6, v6, v22 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v7, v7, v23 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v8, v8, v24 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v9, v9, v25 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v10, v10, v26 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v11, v11, v27 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v12, v12, v28 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v13, v13, v29 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v14, v14, v30 |
| ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX950-GISEL-NEXT: v_pk_max_f16 v16, v31, v31 |
| ; GFX950-GISEL-NEXT: s_nop 0 |
| ; GFX950-GISEL-NEXT: v_pk_min_f16 v15, v15, v16 |
| ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_v32f16: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v16, v16, v16 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v17, v17, v17 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v18, v18, v18 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v19, v19, v19 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v20, v20, v20 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v21, v21, v21 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v22, v22, v22 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v23, v23, v23 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v24, v24, v24 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v25, v25, v25 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v9, v9, v9 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v26, v26, v26 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v10, v10, v10 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v27, v27, v27 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v11, v11, v11 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v28, v28, v28 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v12, v12, v12 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v29, v29, v29 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v13, v13, v13 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v30, v30, v30 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v14, v14, v14 |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v15, v15, v15 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v0, v0, v16 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v1, v1, v17 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v2, v2, v18 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v3, v3, v19 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v4, v4, v20 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v5, v5, v21 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v6, v6, v22 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v7, v7, v23 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v8, v8, v24 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v9, v9, v25 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v10, v10, v26 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v11, v11, v27 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v12, v12, v28 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v13, v13, v29 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v14, v14, v30 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: v_pk_max_f16 v16, v31, v31 |
| ; GFX10-SDAG-NEXT: v_pk_min_f16 v15, v15, v16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_v32f16: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v16, v16, v16 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v17, v17, v17 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v18, v18, v18 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v19, v19, v19 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v20, v20, v20 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v21, v21, v21 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v22, v22, v22 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v23, v23, v23 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v8, v8, v8 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v24, v24, v24 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v9, v9, v9 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v25, v25, v25 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v10, v10, v10 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v26, v26, v26 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v11, v11, v11 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v27, v27, v27 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v12, v12, v12 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v28, v28, v28 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v13, v13, v13 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v29, v29, v29 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v14, v14, v14 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v30, v30, v30 |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v15, v15, v15 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v0, v0, v16 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v1, v1, v17 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v2, v2, v18 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v3, v3, v19 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v4, v4, v20 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v5, v5, v21 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v6, v6, v22 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v7, v7, v23 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v8, v8, v24 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v9, v9, v25 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v10, v10, v26 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v11, v11, v27 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v12, v12, v28 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v13, v13, v29 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v14, v14, v30 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: v_pk_max_f16 v16, v31, v31 |
| ; GFX10-GISEL-NEXT: v_pk_min_f16 v15, v15, v16 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_v32f16: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: scratch_load_b32 v31, off, s32 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v16, v16, v16 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v17, v17, v17 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v18, v18, v18 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v19, v19, v19 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v20, v20, v20 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v21, v21, v21 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v22, v22, v22 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v6, v6, v6 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v23, v23, v23 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v7, v7, v7 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v24, v24, v24 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v8, v8, v8 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v25, v25, v25 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v9, v9, v9 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v26, v26, v26 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v10, v10, v10 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v27, v27, v27 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v11, v11, v11 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v28, v28, v28 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v12, v12, v12 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v29, v29, v29 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v13, v13, v13 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v30, v30, v30 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v14, v14, v14 |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v15, v15, v15 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v0, v16 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v1, v1, v17 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v2, v2, v18 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v3, v3, v19 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v4, v4, v20 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v5, v5, v21 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v6, v6, v22 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v7, v7, v23 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v8, v8, v24 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v9, v9, v25 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v10, v10, v26 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v11, v11, v27 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v12, v12, v28 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v13, v13, v29 |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v14, v14, v30 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: v_pk_max_f16 v16, v31, v31 |
| ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-SDAG-NEXT: v_pk_min_f16 v15, v15, v16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_v32f16: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: scratch_load_b32 v31, off, s32 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v0 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v16, v16, v16 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v1 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v17, v17, v17 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v2 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v18, v18, v18 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v3 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v19, v19, v19 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v4, v4, v4 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v20, v20, v20 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v5, v5, v5 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v21, v21, v21 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v6, v6, v6 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v22, v22, v22 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v7, v7, v7 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v23, v23, v23 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v8, v8, v8 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v24, v24, v24 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v9, v9, v9 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v25, v25, v25 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v10, v10, v10 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v26, v26, v26 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v11, v11, v11 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v27, v27, v27 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v12, v12, v12 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v28, v28, v28 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v13, v13, v13 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v29, v29, v29 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v14, v14, v14 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v30, v30, v30 |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v15, v15, v15 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v16 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v1, v1, v17 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v2, v2, v18 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v3, v3, v19 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v4, v4, v20 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v5, v5, v21 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v6, v6, v22 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v7, v7, v23 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v8, v8, v24 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v9, v9, v25 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v10, v10, v26 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v11, v11, v27 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v12, v12, v28 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v13, v13, v29 |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v14, v14, v30 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: v_pk_max_f16 v16, v31, v31 |
| ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-GISEL-NEXT: v_pk_min_f16 v15, v15, v16 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_v32f16: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: scratch_load_b32 v31, off, s32 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v16, v16, v16 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v17, v17, v17 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v18, v18, v18 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v19, v19, v19 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v20, v20, v20 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v21, v21, v21 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v22, v22, v22 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v23, v23, v23 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v24, v24, v24 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v8, v8, v8 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v25, v25, v25 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v9, v9, v9 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v26, v26, v26 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v10, v10, v10 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v27, v27, v27 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v11, v11, v11 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v28, v28, v28 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v12, v12, v12 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v29, v29, v29 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v13, v13, v13 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v30, v30, v30 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v14, v14, v14 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v15, v15, v15 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v16 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v17 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v2, v2, v18 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v3, v3, v19 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v4, v4, v20 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v5, v5, v21 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v6, v6, v22 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v7, v7, v23 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v8, v8, v24 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v9, v9, v25 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v10, v10, v26 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v11, v11, v27 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v12, v12, v28 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v13, v13, v29 |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v14, v14, v30 |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v16, v31, v31 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v15, v15, v16 |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_v32f16: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: scratch_load_b32 v31, off, s32 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v16, v16, v16 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v17, v17, v17 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v18, v18, v18 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v19, v19, v19 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v20, v20, v20 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v21, v21, v21 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v22, v22, v22 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v23, v23, v23 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v8, v8, v8 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v24, v24, v24 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v9, v9, v9 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v25, v25, v25 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v10, v10, v10 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v26, v26, v26 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v11, v11, v11 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v27, v27, v27 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v12, v12, v12 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v28, v28, v28 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v13, v13, v13 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v29, v29, v29 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v14, v14, v14 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v30, v30, v30 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v15, v15, v15 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v16 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v17 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v2, v2, v18 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v3, v3, v19 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v4, v4, v20 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v5, v5, v21 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v6, v6, v22 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v7, v7, v23 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v8, v8, v24 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v9, v9, v25 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v10, v10, v26 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v11, v11, v27 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v12, v12, v28 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v13, v13, v29 |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v14, v14, v30 |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v16, v31, v31 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v15, v15, v16 |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call <32 x half> @llvm.minimumnum.v32f16(<32 x half> %x, <32 x half> %y) |
| ret <32 x half> %result |
| } |
| |
| define <2 x float> @v_minimumnum_v2f32(<2 x float> %x, <2 x float> %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_v2f32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v2 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v3 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v2 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_v2f32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v2 |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v3 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v2 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_v2f32: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v2 |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v3 |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX8-SDAG-NEXT: v_min_f32_e32 v1, v1, v2 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_v2f32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 |
| ; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v2 |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v3 |
| ; GFX8-GISEL-NEXT: v_min_f32_e32 v1, v1, v2 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: v_minimumnum_v2f32: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 |
| ; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v2 |
| ; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v3, v3 |
| ; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 |
| ; GFX9-SDAG-NEXT: v_min_f32_e32 v1, v1, v2 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: v_minimumnum_v2f32: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 |
| ; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v2 |
| ; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 |
| ; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v3, v3 |
| ; GFX9-GISEL-NEXT: v_min_f32_e32 v1, v1, v2 |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_v2f32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 |
| ; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v2 |
| ; GFX10-SDAG-NEXT: v_min_f32_e32 v1, v1, v3 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_v2f32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 |
| ; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v2 |
| ; GFX10-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_v2f32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 |
| ; GFX11-SDAG-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 |
| ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-SDAG-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_v2f32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 |
| ; GFX11-GISEL-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 |
| ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-GISEL-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_v2f32: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 |
| ; GFX12-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-SDAG-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3 |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_v2f32: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 |
| ; GFX12-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3 |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> %x, <2 x float> %y) |
| ret <2 x float> %result |
| } |
| |
| define <2 x float> @v_minimumnum_v2f32_nnan(<2 x float> %x, <2 x float> %y) { |
| ; GFX7-LABEL: v_minimumnum_v2f32_nnan: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_min_f32_e32 v0, v0, v2 |
| ; GFX7-NEXT: v_min_f32_e32 v1, v1, v3 |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v_minimumnum_v2f32_nnan: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_min_f32_e32 v0, v0, v2 |
| ; GFX8-NEXT: v_min_f32_e32 v1, v1, v3 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_v2f32_nnan: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_min_f32_e32 v0, v0, v2 |
| ; GFX9-NEXT: v_min_f32_e32 v1, v1, v3 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_v2f32_nnan: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_min_f32_e32 v0, v0, v2 |
| ; GFX10-NEXT: v_min_f32_e32 v1, v1, v3 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_v2f32_nnan: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_minimumnum_v2f32_nnan: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = call nnan <2 x float> @llvm.minimumnum.v2f32(<2 x float> %x, <2 x float> %y) |
| ret <2 x float> %result |
| } |
| |
| define <3 x float> @v_minimumnum_v3f32(<3 x float> %x, <3 x float> %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_v3f32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v3 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v4 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v3 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v5 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v3 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_v3f32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v3 |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v4 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v5 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v3 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_v3f32: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3 |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v3 |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v4 |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX8-SDAG-NEXT: v_min_f32_e32 v1, v1, v3 |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v5 |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 |
| ; GFX8-SDAG-NEXT: v_min_f32_e32 v2, v2, v3 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_v3f32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 |
| ; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v3 |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v4 |
| ; GFX8-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v5 |
| ; GFX8-GISEL-NEXT: v_min_f32_e32 v2, v2, v3 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: v_minimumnum_v3f32: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 |
| ; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v3 |
| ; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v4, v4 |
| ; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 |
| ; GFX9-SDAG-NEXT: v_min_f32_e32 v1, v1, v3 |
| ; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v5, v5 |
| ; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 |
| ; GFX9-SDAG-NEXT: v_min_f32_e32 v2, v2, v3 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: v_minimumnum_v3f32: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 |
| ; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v3 |
| ; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 |
| ; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v4, v4 |
| ; GFX9-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 |
| ; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 |
| ; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v5, v5 |
| ; GFX9-GISEL-NEXT: v_min_f32_e32 v2, v2, v3 |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_v3f32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v4, v4, v4 |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v5, v5, v5 |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 |
| ; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v3 |
| ; GFX10-SDAG-NEXT: v_min_f32_e32 v1, v1, v4 |
| ; GFX10-SDAG-NEXT: v_min_f32_e32 v2, v2, v5 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_v3f32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v4, v4, v4 |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v5, v5, v5 |
| ; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v3 |
| ; GFX10-GISEL-NEXT: v_min_f32_e32 v1, v1, v4 |
| ; GFX10-GISEL-NEXT: v_min_f32_e32 v2, v2, v5 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_v3f32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: v_dual_max_f32 v3, v3, v3 :: v_dual_max_f32 v0, v0, v0 |
| ; GFX11-SDAG-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v1, v1, v1 |
| ; GFX11-SDAG-NEXT: v_dual_max_f32 v5, v5, v5 :: v_dual_max_f32 v2, v2, v2 |
| ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX11-SDAG-NEXT: v_dual_min_f32 v0, v0, v3 :: v_dual_min_f32 v1, v1, v4 |
| ; GFX11-SDAG-NEXT: v_min_f32_e32 v2, v2, v5 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_v3f32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v3, v3, v3 |
| ; GFX11-GISEL-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v4, v4, v4 |
| ; GFX11-GISEL-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v5, v5, v5 |
| ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX11-GISEL-NEXT: v_dual_min_f32 v0, v0, v3 :: v_dual_min_f32 v1, v1, v4 |
| ; GFX11-GISEL-NEXT: v_min_f32_e32 v2, v2, v5 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_v3f32: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_dual_max_num_f32 v3, v3, v3 :: v_dual_max_num_f32 v0, v0, v0 |
| ; GFX12-SDAG-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v1, v1, v1 |
| ; GFX12-SDAG-NEXT: v_dual_max_num_f32 v5, v5, v5 :: v_dual_max_num_f32 v2, v2, v2 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX12-SDAG-NEXT: v_dual_min_num_f32 v0, v0, v3 :: v_dual_min_num_f32 v1, v1, v4 |
| ; GFX12-SDAG-NEXT: v_min_num_f32_e32 v2, v2, v5 |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_v3f32: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v3, v3, v3 |
| ; GFX12-GISEL-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v4, v4, v4 |
| ; GFX12-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v5, v5, v5 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX12-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v3 :: v_dual_min_num_f32 v1, v1, v4 |
| ; GFX12-GISEL-NEXT: v_min_num_f32_e32 v2, v2, v5 |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call <3 x float> @llvm.minimumnum.v3f32(<3 x float> %x, <3 x float> %y) |
| ret <3 x float> %result |
| } |
| |
| define <3 x float> @v_minimumnum_v3f32_nnan(<3 x float> %x, <3 x float> %y) { |
| ; GFX7-LABEL: v_minimumnum_v3f32_nnan: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_min_f32_e32 v0, v0, v3 |
| ; GFX7-NEXT: v_min_f32_e32 v1, v1, v4 |
| ; GFX7-NEXT: v_min_f32_e32 v2, v2, v5 |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v_minimumnum_v3f32_nnan: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_min_f32_e32 v0, v0, v3 |
| ; GFX8-NEXT: v_min_f32_e32 v1, v1, v4 |
| ; GFX8-NEXT: v_min_f32_e32 v2, v2, v5 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_v3f32_nnan: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_min_f32_e32 v0, v0, v3 |
| ; GFX9-NEXT: v_min_f32_e32 v1, v1, v4 |
| ; GFX9-NEXT: v_min_f32_e32 v2, v2, v5 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_v3f32_nnan: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_min_f32_e32 v0, v0, v3 |
| ; GFX10-NEXT: v_min_f32_e32 v1, v1, v4 |
| ; GFX10-NEXT: v_min_f32_e32 v2, v2, v5 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_v3f32_nnan: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_min_f32 v0, v0, v3 :: v_dual_min_f32 v1, v1, v4 |
| ; GFX11-NEXT: v_min_f32_e32 v2, v2, v5 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_minimumnum_v3f32_nnan: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v3 :: v_dual_min_num_f32 v1, v1, v4 |
| ; GFX12-NEXT: v_min_num_f32_e32 v2, v2, v5 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = call nnan <3 x float> @llvm.minimumnum.v3f32(<3 x float> %x, <3 x float> %y) |
| ret <3 x float> %result |
| } |
| |
| define <4 x float> @v_minimumnum_v4f32(<4 x float> %x, <4 x float> %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_v4f32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v4 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v5 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v4 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v6 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v4 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v7 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v4 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_v4f32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v4 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v4 |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v5 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v4 |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v6 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v4 |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 |
| ; GFX7-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v7 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v4 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_v4f32: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4 |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX8-SDAG-NEXT: v_min_f32_e32 v0, v0, v4 |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v5 |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX8-SDAG-NEXT: v_min_f32_e32 v1, v1, v4 |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v6 |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 |
| ; GFX8-SDAG-NEXT: v_min_f32_e32 v2, v2, v4 |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v7 |
| ; GFX8-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3 |
| ; GFX8-SDAG-NEXT: v_min_f32_e32 v3, v3, v4 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_v4f32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v4 |
| ; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v4 |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v5 |
| ; GFX8-GISEL-NEXT: v_min_f32_e32 v1, v1, v4 |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v6 |
| ; GFX8-GISEL-NEXT: v_min_f32_e32 v2, v2, v4 |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 |
| ; GFX8-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v7 |
| ; GFX8-GISEL-NEXT: v_min_f32_e32 v3, v3, v4 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: v_minimumnum_v4f32: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v4, v4 |
| ; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v4 |
| ; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v5, v5 |
| ; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 |
| ; GFX9-SDAG-NEXT: v_min_f32_e32 v1, v1, v4 |
| ; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v6, v6 |
| ; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 |
| ; GFX9-SDAG-NEXT: v_min_f32_e32 v2, v2, v4 |
| ; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v7, v7 |
| ; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 |
| ; GFX9-SDAG-NEXT: v_min_f32_e32 v3, v3, v4 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: v_minimumnum_v4f32: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v4, v4 |
| ; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v4 |
| ; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 |
| ; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v5, v5 |
| ; GFX9-GISEL-NEXT: v_min_f32_e32 v1, v1, v4 |
| ; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 |
| ; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v6, v6 |
| ; GFX9-GISEL-NEXT: v_min_f32_e32 v2, v2, v4 |
| ; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 |
| ; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v7, v7 |
| ; GFX9-GISEL-NEXT: v_min_f32_e32 v3, v3, v4 |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_v4f32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v4, v4, v4 |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v5, v5, v5 |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v6, v6, v6 |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v7, v7, v7 |
| ; GFX10-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 |
| ; GFX10-SDAG-NEXT: v_min_f32_e32 v0, v0, v4 |
| ; GFX10-SDAG-NEXT: v_min_f32_e32 v1, v1, v5 |
| ; GFX10-SDAG-NEXT: v_min_f32_e32 v2, v2, v6 |
| ; GFX10-SDAG-NEXT: v_min_f32_e32 v3, v3, v7 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_v4f32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v4, v4, v4 |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v5, v5, v5 |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v6, v6, v6 |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 |
| ; GFX10-GISEL-NEXT: v_max_f32_e32 v7, v7, v7 |
| ; GFX10-GISEL-NEXT: v_min_f32_e32 v0, v0, v4 |
| ; GFX10-GISEL-NEXT: v_min_f32_e32 v1, v1, v5 |
| ; GFX10-GISEL-NEXT: v_min_f32_e32 v2, v2, v6 |
| ; GFX10-GISEL-NEXT: v_min_f32_e32 v3, v3, v7 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_v4f32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5 |
| ; GFX11-SDAG-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 |
| ; GFX11-SDAG-NEXT: v_dual_max_f32 v6, v6, v6 :: v_dual_max_f32 v7, v7, v7 |
| ; GFX11-SDAG-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 |
| ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX11-SDAG-NEXT: v_dual_min_f32 v0, v0, v4 :: v_dual_min_f32 v1, v1, v5 |
| ; GFX11-SDAG-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_v4f32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 |
| ; GFX11-GISEL-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5 |
| ; GFX11-GISEL-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3 |
| ; GFX11-GISEL-NEXT: v_dual_max_f32 v6, v6, v6 :: v_dual_max_f32 v7, v7, v7 |
| ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX11-GISEL-NEXT: v_dual_min_f32 v0, v0, v4 :: v_dual_min_f32 v1, v1, v5 |
| ; GFX11-GISEL-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_v4f32: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5 |
| ; GFX12-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 |
| ; GFX12-SDAG-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7 |
| ; GFX12-SDAG-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX12-SDAG-NEXT: v_dual_min_num_f32 v0, v0, v4 :: v_dual_min_num_f32 v1, v1, v5 |
| ; GFX12-SDAG-NEXT: v_dual_min_num_f32 v2, v2, v6 :: v_dual_min_num_f32 v3, v3, v7 |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_v4f32: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 |
| ; GFX12-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5 |
| ; GFX12-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3 |
| ; GFX12-GISEL-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX12-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v4 :: v_dual_min_num_f32 v1, v1, v5 |
| ; GFX12-GISEL-NEXT: v_dual_min_num_f32 v2, v2, v6 :: v_dual_min_num_f32 v3, v3, v7 |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> %x, <4 x float> %y) |
| ret <4 x float> %result |
| } |
| |
| define <4 x float> @v_minimumnum_v4f32_nnan(<4 x float> %x, <4 x float> %y) { |
| ; GFX7-LABEL: v_minimumnum_v4f32_nnan: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_min_f32_e32 v0, v0, v4 |
| ; GFX7-NEXT: v_min_f32_e32 v1, v1, v5 |
| ; GFX7-NEXT: v_min_f32_e32 v2, v2, v6 |
| ; GFX7-NEXT: v_min_f32_e32 v3, v3, v7 |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v_minimumnum_v4f32_nnan: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_min_f32_e32 v0, v0, v4 |
| ; GFX8-NEXT: v_min_f32_e32 v1, v1, v5 |
| ; GFX8-NEXT: v_min_f32_e32 v2, v2, v6 |
| ; GFX8-NEXT: v_min_f32_e32 v3, v3, v7 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_v4f32_nnan: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_min_f32_e32 v0, v0, v4 |
| ; GFX9-NEXT: v_min_f32_e32 v1, v1, v5 |
| ; GFX9-NEXT: v_min_f32_e32 v2, v2, v6 |
| ; GFX9-NEXT: v_min_f32_e32 v3, v3, v7 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_v4f32_nnan: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_min_f32_e32 v0, v0, v4 |
| ; GFX10-NEXT: v_min_f32_e32 v1, v1, v5 |
| ; GFX10-NEXT: v_min_f32_e32 v2, v2, v6 |
| ; GFX10-NEXT: v_min_f32_e32 v3, v3, v7 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_v4f32_nnan: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_min_f32 v0, v0, v4 :: v_dual_min_f32 v1, v1, v5 |
| ; GFX11-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_minimumnum_v4f32_nnan: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v4 :: v_dual_min_num_f32 v1, v1, v5 |
| ; GFX12-NEXT: v_dual_min_num_f32 v2, v2, v6 :: v_dual_min_num_f32 v3, v3, v7 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = call nnan <4 x float> @llvm.minimumnum.v4f32(<4 x float> %x, <4 x float> %y) |
| ret <4 x float> %result |
| } |
| |
| define <2 x double> @v_minimumnum_v2f64(<2 x double> %x, <2 x double> %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_v2f64: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX7-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] |
| ; GFX7-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_v2f64: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX7-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX7-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] |
| ; GFX7-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_v2f64: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX8-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] |
| ; GFX8-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_v2f64: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX8-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX8-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] |
| ; GFX8-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_minimumnum_v2f64: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX900-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] |
| ; GFX900-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_minimumnum_v2f64: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX900-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX900-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] |
| ; GFX900-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-SDAG-LABEL: v_minimumnum_v2f64: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] |
| ; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7] |
| ; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5] |
| ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-GISEL-LABEL: v_minimumnum_v2f64: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] |
| ; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7] |
| ; GFX950-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5] |
| ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_v2f64: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX10-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX10-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] |
| ; GFX10-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_v2f64: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX10-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX10-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX10-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] |
| ; GFX10-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_v2f64: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX11-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX11-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] |
| ; GFX11-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_v2f64: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX11-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX11-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] |
| ; GFX11-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_v2f64: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[4:5] |
| ; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[6:7] |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_v2f64: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[4:5] |
| ; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[6:7] |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> %x, <2 x double> %y) |
| ret <2 x double> %result |
| } |
| |
| define <2 x double> @v_minimumnum_v2f64_nnan(<2 x double> %x, <2 x double> %y) { |
| ; GFX7-LABEL: v_minimumnum_v2f64_nnan: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] |
| ; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v_minimumnum_v2f64_nnan: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] |
| ; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_v2f64_nnan: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] |
| ; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_v2f64_nnan: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] |
| ; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_v2f64_nnan: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] |
| ; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_minimumnum_v2f64_nnan: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[4:5] |
| ; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[6:7] |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = call nnan <2 x double> @llvm.minimumnum.v2f64(<2 x double> %x, <2 x double> %y) |
| ret <2 x double> %result |
| } |
| |
| define <3 x double> @v_minimumnum_v3f64(<3 x double> %x, <3 x double> %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_v3f64: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX7-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] |
| ; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX7-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] |
| ; GFX7-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] |
| ; GFX7-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] |
| ; GFX7-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_v3f64: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX7-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX7-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] |
| ; GFX7-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX7-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] |
| ; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] |
| ; GFX7-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] |
| ; GFX7-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_v3f64: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX8-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] |
| ; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX8-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] |
| ; GFX8-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] |
| ; GFX8-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] |
| ; GFX8-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_v3f64: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX8-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX8-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] |
| ; GFX8-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX8-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] |
| ; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] |
| ; GFX8-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] |
| ; GFX8-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_minimumnum_v3f64: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX900-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] |
| ; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX900-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] |
| ; GFX900-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] |
| ; GFX900-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] |
| ; GFX900-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_minimumnum_v3f64: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX900-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX900-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] |
| ; GFX900-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX900-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] |
| ; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] |
| ; GFX900-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] |
| ; GFX900-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-SDAG-LABEL: v_minimumnum_v3f64: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] |
| ; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[8:9], v[8:9] |
| ; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] |
| ; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[10:11], v[10:11] |
| ; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[6:7] |
| ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-GISEL-LABEL: v_minimumnum_v3f64: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX950-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] |
| ; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX950-GISEL-NEXT: v_max_f64 v[6:7], v[8:9], v[8:9] |
| ; GFX950-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] |
| ; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX950-GISEL-NEXT: v_max_f64 v[6:7], v[10:11], v[10:11] |
| ; GFX950-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[6:7] |
| ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_v3f64: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX10-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] |
| ; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX10-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] |
| ; GFX10-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX10-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] |
| ; GFX10-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] |
| ; GFX10-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_v3f64: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX10-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX10-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] |
| ; GFX10-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX10-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] |
| ; GFX10-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] |
| ; GFX10-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] |
| ; GFX10-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_v3f64: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX11-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] |
| ; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX11-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] |
| ; GFX11-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX11-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] |
| ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) |
| ; GFX11-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] |
| ; GFX11-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_v3f64: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX11-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX11-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] |
| ; GFX11-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX11-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] |
| ; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] |
| ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) |
| ; GFX11-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] |
| ; GFX11-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_v3f64: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9] |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11] |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] |
| ; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[6:7] |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) |
| ; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[8:9] |
| ; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[10:11] |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_v3f64: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9] |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11] |
| ; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[6:7] |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) |
| ; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[8:9] |
| ; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[10:11] |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call <3 x double> @llvm.minimumnum.v3f64(<3 x double> %x, <3 x double> %y) |
| ret <3 x double> %result |
| } |
| |
| define <3 x double> @v_minimumnum_v3f64_nnan(<3 x double> %x, <3 x double> %y) { |
| ; GFX7-LABEL: v_minimumnum_v3f64_nnan: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] |
| ; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] |
| ; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v_minimumnum_v3f64_nnan: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] |
| ; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] |
| ; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_v3f64_nnan: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] |
| ; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] |
| ; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_v3f64_nnan: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] |
| ; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] |
| ; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_v3f64_nnan: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] |
| ; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] |
| ; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_minimumnum_v3f64_nnan: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[6:7] |
| ; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[8:9] |
| ; GFX12-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[10:11] |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = call nnan <3 x double> @llvm.minimumnum.v3f64(<3 x double> %x, <3 x double> %y) |
| ret <3 x double> %result |
| } |
| |
| define <4 x double> @v_minimumnum_v4f64(<4 x double> %x, <4 x double> %y) { |
| ; GFX7-SDAG-LABEL: v_minimumnum_v4f64: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] |
| ; GFX7-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX7-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] |
| ; GFX7-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX7-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] |
| ; GFX7-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX7-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] |
| ; GFX7-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX7-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] |
| ; GFX7-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] |
| ; GFX7-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] |
| ; GFX7-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_v4f64: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX7-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] |
| ; GFX7-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX7-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] |
| ; GFX7-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX7-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] |
| ; GFX7-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX7-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] |
| ; GFX7-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] |
| ; GFX7-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] |
| ; GFX7-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] |
| ; GFX7-GISEL-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_v4f64: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] |
| ; GFX8-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX8-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] |
| ; GFX8-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX8-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] |
| ; GFX8-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX8-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] |
| ; GFX8-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX8-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] |
| ; GFX8-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] |
| ; GFX8-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] |
| ; GFX8-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_v4f64: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX8-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] |
| ; GFX8-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX8-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] |
| ; GFX8-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX8-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] |
| ; GFX8-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX8-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] |
| ; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] |
| ; GFX8-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] |
| ; GFX8-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] |
| ; GFX8-GISEL-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_minimumnum_v4f64: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] |
| ; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX900-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] |
| ; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX900-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] |
| ; GFX900-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX900-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] |
| ; GFX900-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] |
| ; GFX900-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] |
| ; GFX900-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] |
| ; GFX900-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_minimumnum_v4f64: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX900-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] |
| ; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX900-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] |
| ; GFX900-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX900-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] |
| ; GFX900-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX900-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] |
| ; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] |
| ; GFX900-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] |
| ; GFX900-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] |
| ; GFX900-GISEL-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-SDAG-LABEL: v_minimumnum_v4f64: |
| ; GFX950-SDAG: ; %bb.0: |
| ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] |
| ; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] |
| ; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[10:11], v[10:11] |
| ; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] |
| ; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[12:13], v[12:13] |
| ; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[8:9] |
| ; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[14:15], v[14:15] |
| ; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX950-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[8:9] |
| ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX950-GISEL-LABEL: v_minimumnum_v4f64: |
| ; GFX950-GISEL: ; %bb.0: |
| ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX950-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] |
| ; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] |
| ; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX950-GISEL-NEXT: v_max_f64 v[8:9], v[10:11], v[10:11] |
| ; GFX950-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] |
| ; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX950-GISEL-NEXT: v_max_f64 v[8:9], v[12:13], v[12:13] |
| ; GFX950-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[8:9] |
| ; GFX950-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX950-GISEL-NEXT: v_max_f64 v[8:9], v[14:15], v[14:15] |
| ; GFX950-GISEL-NEXT: v_min_f64 v[6:7], v[6:7], v[8:9] |
| ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: v_minimumnum_v4f64: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] |
| ; GFX10-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX10-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] |
| ; GFX10-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX10-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] |
| ; GFX10-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX10-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] |
| ; GFX10-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX10-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] |
| ; GFX10-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] |
| ; GFX10-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] |
| ; GFX10-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: v_minimumnum_v4f64: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX10-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] |
| ; GFX10-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX10-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] |
| ; GFX10-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX10-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] |
| ; GFX10-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX10-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] |
| ; GFX10-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] |
| ; GFX10-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] |
| ; GFX10-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] |
| ; GFX10-GISEL-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: v_minimumnum_v4f64: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] |
| ; GFX11-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX11-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] |
| ; GFX11-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX11-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] |
| ; GFX11-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX11-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] |
| ; GFX11-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX11-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] |
| ; GFX11-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] |
| ; GFX11-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] |
| ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) |
| ; GFX11-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: v_minimumnum_v4f64: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] |
| ; GFX11-GISEL-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] |
| ; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] |
| ; GFX11-GISEL-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] |
| ; GFX11-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] |
| ; GFX11-GISEL-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] |
| ; GFX11-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] |
| ; GFX11-GISEL-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] |
| ; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] |
| ; GFX11-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] |
| ; GFX11-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] |
| ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) |
| ; GFX11-GISEL-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_v4f64: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9] |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11] |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[12:13], v[12:13], v[12:13] |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[14:15], v[14:15], v[14:15] |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] |
| ; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[8:9] |
| ; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[10:11] |
| ; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[12:13] |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) |
| ; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[6:7], v[6:7], v[14:15] |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_v4f64: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[8:9], v[8:9], v[8:9] |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[10:11], v[10:11], v[10:11] |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5] |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[12:13], v[12:13], v[12:13] |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7] |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[14:15], v[14:15], v[14:15] |
| ; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[8:9] |
| ; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[10:11] |
| ; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[12:13] |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) |
| ; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[6:7], v[6:7], v[14:15] |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call <4 x double> @llvm.minimumnum.v4f64(<4 x double> %x, <4 x double> %y) |
| ret <4 x double> %result |
| } |
| |
| define <4 x double> @v_minimumnum_v4f64_nnan(<4 x double> %x, <4 x double> %y) { |
| ; GFX7-LABEL: v_minimumnum_v4f64_nnan: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] |
| ; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] |
| ; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] |
| ; GFX7-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v_minimumnum_v4f64_nnan: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] |
| ; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] |
| ; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] |
| ; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_v4f64_nnan: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] |
| ; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] |
| ; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] |
| ; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_v4f64_nnan: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] |
| ; GFX10-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] |
| ; GFX10-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] |
| ; GFX10-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_v4f64_nnan: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] |
| ; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] |
| ; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] |
| ; GFX11-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_minimumnum_v4f64_nnan: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[8:9] |
| ; GFX12-NEXT: v_min_num_f64_e32 v[2:3], v[2:3], v[10:11] |
| ; GFX12-NEXT: v_min_num_f64_e32 v[4:5], v[4:5], v[12:13] |
| ; GFX12-NEXT: v_min_num_f64_e32 v[6:7], v[6:7], v[14:15] |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = call nnan <4 x double> @llvm.minimumnum.v4f64(<4 x double> %x, <4 x double> %y) |
| ret <4 x double> %result |
| } |
| |
| define half @v_minimumnum_f16_no_ieee(half %x, half %y) #0 { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f16_no_ieee: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f16_no_ieee: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v_minimumnum_f16_no_ieee: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_f16_no_ieee: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_f16_no_ieee: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-TRUE16-LABEL: v_minimumnum_f16_no_ieee: |
| ; GFX11-TRUE16: ; %bb.0: |
| ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v1.l |
| ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-FAKE16-LABEL: v_minimumnum_f16_no_ieee: |
| ; GFX11-FAKE16: ; %bb.0: |
| ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_no_ieee: |
| ; GFX12-TRUE16-SDAG: ; %bb.0: |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l |
| ; GFX12-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l |
| ; GFX12-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h |
| ; GFX12-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-TRUE16-GISEL-LABEL: v_minimumnum_f16_no_ieee: |
| ; GFX12-TRUE16-GISEL: ; %bb.0: |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l |
| ; GFX12-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l |
| ; GFX12-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h |
| ; GFX12-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-FAKE16-SDAG-LABEL: v_minimumnum_f16_no_ieee: |
| ; GFX12-FAKE16-SDAG: ; %bb.0: |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v1, v1, v1 |
| ; GFX12-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0 |
| ; GFX12-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1 |
| ; GFX12-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-FAKE16-GISEL-LABEL: v_minimumnum_f16_no_ieee: |
| ; GFX12-FAKE16-GISEL: ; %bb.0: |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0 |
| ; GFX12-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v1, v1, v1 |
| ; GFX12-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1 |
| ; GFX12-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call half @llvm.minimumnum.f16(half %x, half %y) |
| ret half %result |
| } |
| |
| define half @v_minimumnum_f16_nan_no_ieee(half %x, half %y) #0 { |
| ; GFX7-SDAG-LABEL: v_minimumnum_f16_nan_no_ieee: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_f16_nan_no_ieee: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v_minimumnum_f16_nan_no_ieee: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_f16_nan_no_ieee: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_f16_nan_no_ieee: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-TRUE16-LABEL: v_minimumnum_f16_nan_no_ieee: |
| ; GFX11-TRUE16: ; %bb.0: |
| ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-TRUE16-NEXT: v_min_f16_e32 v0.l, v0.l, v1.l |
| ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-FAKE16-LABEL: v_minimumnum_f16_nan_no_ieee: |
| ; GFX11-FAKE16: ; %bb.0: |
| ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-TRUE16-LABEL: v_minimumnum_f16_nan_no_ieee: |
| ; GFX12-TRUE16: ; %bb.0: |
| ; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v1.l |
| ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-FAKE16-LABEL: v_minimumnum_f16_nan_no_ieee: |
| ; GFX12-FAKE16: ; %bb.0: |
| ; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 |
| ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| %result = call nnan half @llvm.minimumnum.f16(half %x, half %y) |
| ret half %result |
| } |
| |
| define float @v_minimumnum_f32_no_ieee(float %x, float %y) #0 { |
| ; GFX7-LABEL: v_minimumnum_f32_no_ieee: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v_minimumnum_f32_no_ieee: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_f32_no_ieee: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_f32_no_ieee: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_f32_no_ieee: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_f32_no_ieee: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1 |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_f32_no_ieee: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1 |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call float @llvm.minimumnum.f32(float %x, float %y) |
| ret float %result |
| } |
| |
| define float @v_minimumnum_f32_nnan_no_ieee(float %x, float %y) #0 { |
| ; GFX7-LABEL: v_minimumnum_f32_nnan_no_ieee: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v_minimumnum_f32_nnan_no_ieee: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_f32_nnan_no_ieee: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_f32_nnan_no_ieee: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_f32_nnan_no_ieee: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_min_f32_e32 v0, v0, v1 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_minimumnum_f32_nnan_no_ieee: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = call nnan float @llvm.minimumnum.f32(float %x, float %y) |
| ret float %result |
| } |
| |
| define double @v_minimumnum_f64_no_ieee(double %x, double %y) #0 { |
| ; GFX7-LABEL: v_minimumnum_f64_no_ieee: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v_minimumnum_f64_no_ieee: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_f64_no_ieee: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_f64_no_ieee: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_f64_no_ieee: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_f64_no_ieee: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] |
| ; GFX12-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-SDAG-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_f64_no_ieee: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1] |
| ; GFX12-GISEL-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3] |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-GISEL-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call double @llvm.minimumnum.f64(double %x, double %y) |
| ret double %result |
| } |
| |
| define double @v_minimumnum_f64_nnan_no_ieee(double %x, double %y) #0 { |
| ; GFX7-LABEL: v_minimumnum_f64_nnan_no_ieee: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v_minimumnum_f64_nnan_no_ieee: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_f64_nnan_no_ieee: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_f64_nnan_no_ieee: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_f64_nnan_no_ieee: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_minimumnum_f64_nnan_no_ieee: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_min_num_f64_e32 v[0:1], v[0:1], v[2:3] |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = call nnan double @llvm.minimumnum.f64(double %x, double %y) |
| ret double %result |
| } |
| |
| define <2 x half> @v_minimumnum_v2f16_no_ieee(<2 x half> %x, <2 x half> %y) #0 { |
| ; GFX7-SDAG-LABEL: v_minimumnum_v2f16_no_ieee: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v2 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v3 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_v2f16_no_ieee: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_v2f16_no_ieee: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_v2f16_no_ieee: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v0, v1 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_v2f16_no_ieee: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_pk_min_f16 v0, v0, v1 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_v2f16_no_ieee: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_pk_min_f16 v0, v0, v1 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_v2f16_no_ieee: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_pk_min_f16 v0, v0, v1 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-SDAG-LABEL: v_minimumnum_v2f16_no_ieee: |
| ; GFX12-SDAG: ; %bb.0: |
| ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX12-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v1 |
| ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-GISEL-LABEL: v_minimumnum_v2f16_no_ieee: |
| ; GFX12-GISEL: ; %bb.0: |
| ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0 |
| ; GFX12-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1 |
| ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v1 |
| ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> %x, <2 x half> %y) |
| ret <2 x half> %result |
| } |
| |
| define <2 x half> @v_minimumnum_v2f16_nnan_no_ieee(<2 x half> %x, <2 x half> %y) #0 { |
| ; GFX7-SDAG-LABEL: v_minimumnum_v2f16_nnan_no_ieee: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v2 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v3 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_v2f16_nnan_no_ieee: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_v2f16_nnan_no_ieee: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_v2f16_nnan_no_ieee: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v0, v1 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_v2f16_nnan_no_ieee: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_pk_min_f16 v0, v0, v1 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_v2f16_nnan_no_ieee: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_pk_min_f16 v0, v0, v1 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_v2f16_nnan_no_ieee: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_pk_min_f16 v0, v0, v1 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_minimumnum_v2f16_nnan_no_ieee: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v1 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = call nnan <2 x half> @llvm.minimumnum.v2f16(<2 x half> %x, <2 x half> %y) |
| ret <2 x half> %result |
| } |
| |
| define <3 x half> @v_minimumnum_v3f16_nnan_no_ieee(<3 x half> %x, <3 x half> %y) #0 { |
| ; GFX7-SDAG-LABEL: v_minimumnum_v3f16_nnan_no_ieee: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v3 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v4 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v5 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_v3f16_nnan_no_ieee: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v3 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v4 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v5 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v4 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_v3f16_nnan_no_ieee: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v2 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v3 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_v3f16_nnan_no_ieee: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v0, v2 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v1, v1, v3 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-SDAG-LABEL: v_minimumnum_v3f16_nnan_no_ieee: |
| ; GFX9-SDAG: ; %bb.0: |
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX9-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: v_minimumnum_v3f16_nnan_no_ieee: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX9-GISEL-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_v3f16_nnan_no_ieee: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX10-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_v3f16_nnan_no_ieee: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX11-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_minimumnum_v3f16_nnan_no_ieee: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2 |
| ; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = call nnan <3 x half> @llvm.minimumnum.v3f16(<3 x half> %x, <3 x half> %y) |
| ret <3 x half> %result |
| } |
| |
| define <4 x half> @v_minimumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y) #0 { |
| ; GFX7-SDAG-LABEL: v_minimumnum_v4f16_nnan_no_ieee: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v7, v7 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v6, v6 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v7, v7 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v6, v6 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v0, v0, v4 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v1, v1, v5 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v2, v2, v6 |
| ; GFX7-SDAG-NEXT: v_min_f32_e32 v3, v3, v7 |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: v_minimumnum_v4f16_nnan_no_ieee: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v5 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v0, v0, v4 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v6 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v1, v1, v5 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v7 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v2, v2, v4 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; GFX7-GISEL-NEXT: v_min_f32_e32 v3, v3, v5 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-SDAG-LABEL: v_minimumnum_v4f16_nnan_no_ieee: |
| ; GFX8-SDAG: ; %bb.0: |
| ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v1, v1, v3 |
| ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v2 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v5 |
| ; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v4 |
| ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: v_minimumnum_v4f16_nnan_no_ieee: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v4, v0, v2 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v1, v3 |
| ; GFX8-GISEL-NEXT: v_min_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1 |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_minimumnum_v4f16_nnan_no_ieee: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX9-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_minimumnum_v4f16_nnan_no_ieee: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX10-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_minimumnum_v4f16_nnan_no_ieee: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_pk_min_f16 v0, v0, v2 |
| ; GFX11-NEXT: v_pk_min_f16 v1, v1, v3 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: v_minimumnum_v4f16_nnan_no_ieee: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2 |
| ; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %result = call nnan <4 x half> @llvm.minimumnum.v4f16(<4 x half> %x, <4 x half> %y) |
| ret <4 x half> %result |
| } |
| |
| attributes #0 = { "amdgpu-ieee"="false" } |
| ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| ; GFX900: {{.*}} |
| ; GFX950: {{.*}} |