blob: 8489193b1f58087440303a42d8dfe584ca6dd995 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,TRUE16,GFX1170-SDAG,GFX1170-SDAG-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,FAKE16,GFX1170-SDAG,GFX1170-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,TRUE16,GFX1170-GISEL,GFX1170-GISEL-TRUE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,FAKE16,GFX1170-GISEL,GFX1170-GISEL-FAKE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12-GISEL,GFX12-GISEL-FAKE16 %s
define amdgpu_ps float @test_minmax_f32(float %a, float %b, float %c) {
; GCN-LABEL: test_minmax_f32:
; GCN: ; %bb.0:
; GCN-NEXT: v_maximumminimum_f32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%max = call float @llvm.maximum.f32(float %a, float %b)
%minmax = call float @llvm.minimum.f32(float %max, float %c)
ret float %minmax
}
define amdgpu_ps void @s_test_minmax_f32(float inreg %a, float inreg %b, float inreg %c, ptr addrspace(1) inreg %out) {
; GFX1170-SDAG-LABEL: s_test_minmax_f32:
; GFX1170-SDAG: ; %bb.0:
; GFX1170-SDAG-NEXT: v_maximum_f32 v0, s0, s1
; GFX1170-SDAG-NEXT: v_mov_b32_e32 v1, 0
; GFX1170-SDAG-NEXT: s_mov_b32 s5, s4
; GFX1170-SDAG-NEXT: s_mov_b32 s4, s3
; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1170-SDAG-NEXT: v_minimum_f32 v0, v0, s2
; GFX1170-SDAG-NEXT: global_store_b32 v1, v0, s[4:5]
; GFX1170-SDAG-NEXT: s_endpgm
;
; GFX1170-GISEL-LABEL: s_test_minmax_f32:
; GFX1170-GISEL: ; %bb.0:
; GFX1170-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1170-GISEL-NEXT: s_mov_b32 s6, s3
; GFX1170-GISEL-NEXT: s_mov_b32 s7, s4
; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-NEXT: v_maximumminimum_f32 v0, s0, s1, v0
; GFX1170-GISEL-NEXT: global_store_b32 v1, v0, s[6:7]
; GFX1170-GISEL-NEXT: s_endpgm
;
; GFX12-SDAG-LABEL: s_test_minmax_f32:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_maximum_f32 s0, s0, s1
; GFX12-SDAG-NEXT: s_mov_b32 s5, s4
; GFX12-SDAG-NEXT: s_mov_b32 s4, s3
; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
; GFX12-SDAG-NEXT: s_minimum_f32 s0, s0, s2
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[4:5]
; GFX12-SDAG-NEXT: s_endpgm
;
; GFX12-GISEL-LABEL: s_test_minmax_f32:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: s_maximum_f32 s0, s0, s1
; GFX12-GISEL-NEXT: s_mov_b32 s6, s3
; GFX12-GISEL-NEXT: s_mov_b32 s7, s4
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX12-GISEL-NEXT: s_minimum_f32 s0, s0, s2
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[6:7]
; GFX12-GISEL-NEXT: s_endpgm
%smax = call float @llvm.maximum.f32(float %a, float %b)
%sminmax = call float @llvm.minimum.f32(float %smax, float %c)
store float %sminmax, ptr addrspace(1) %out
ret void
}
define amdgpu_ps float @test_minmax_commuted_f32(float %a, float %b, float %c) {
; GCN-LABEL: test_minmax_commuted_f32:
; GCN: ; %bb.0:
; GCN-NEXT: v_maximumminimum_f32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%max = call float @llvm.maximum.f32(float %a, float %b)
%minmax = call float @llvm.minimum.f32(float %c, float %max)
ret float %minmax
}
define amdgpu_ps float @test_maxmin_f32(float %a, float %b, float %c) {
; GCN-LABEL: test_maxmin_f32:
; GCN: ; %bb.0:
; GCN-NEXT: v_minimummaximum_f32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%min = call float @llvm.minimum.f32(float %a, float %b)
%maxmin = call float @llvm.maximum.f32(float %min, float %c)
ret float %maxmin
}
define amdgpu_ps float @test_maxmin_commuted_f32(float %a, float %b, float %c) {
; GCN-LABEL: test_maxmin_commuted_f32:
; GCN: ; %bb.0:
; GCN-NEXT: v_minimummaximum_f32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%min = call float @llvm.minimum.f32(float %a, float %b)
%maxmin = call float @llvm.maximum.f32(float %c, float %min)
ret float %maxmin
}
define amdgpu_ps half @test_minmax_f16(half %a, half %b, half %c) {
; TRUE16-LABEL: test_minmax_f16:
; TRUE16: ; %bb.0:
; TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
; TRUE16-NEXT: ; return to shader part epilog
;
; FAKE16-LABEL: test_minmax_f16:
; FAKE16: ; %bb.0:
; FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
; FAKE16-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-TRUE16-LABEL: test_minmax_f16:
; GFX12-SDAG-TRUE16: ; %bb.0:
; GFX12-SDAG-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-FAKE16-LABEL: test_minmax_f16:
; GFX12-SDAG-FAKE16: ; %bb.0:
; GFX12-SDAG-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
; GFX12-SDAG-FAKE16-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-TRUE16-LABEL: test_minmax_f16:
; GFX12-GISEL-TRUE16: ; %bb.0:
; GFX12-GISEL-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-FAKE16-LABEL: test_minmax_f16:
; GFX12-GISEL-FAKE16: ; %bb.0:
; GFX12-GISEL-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog
%max = call half @llvm.maximum.f16(half %a, half %b)
%minmax = call half @llvm.minimum.f16(half %max, half %c)
ret half %minmax
}
define amdgpu_ps half @test_minmax_commuted_f16(half %a, half %b, half %c) {
; TRUE16-LABEL: test_minmax_commuted_f16:
; TRUE16: ; %bb.0:
; TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
; TRUE16-NEXT: ; return to shader part epilog
;
; FAKE16-LABEL: test_minmax_commuted_f16:
; FAKE16: ; %bb.0:
; FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
; FAKE16-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-TRUE16-LABEL: test_minmax_commuted_f16:
; GFX12-SDAG-TRUE16: ; %bb.0:
; GFX12-SDAG-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-FAKE16-LABEL: test_minmax_commuted_f16:
; GFX12-SDAG-FAKE16: ; %bb.0:
; GFX12-SDAG-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
; GFX12-SDAG-FAKE16-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-TRUE16-LABEL: test_minmax_commuted_f16:
; GFX12-GISEL-TRUE16: ; %bb.0:
; GFX12-GISEL-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-FAKE16-LABEL: test_minmax_commuted_f16:
; GFX12-GISEL-FAKE16: ; %bb.0:
; GFX12-GISEL-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog
%max = call half @llvm.maximum.f16(half %a, half %b)
%minmax = call half @llvm.minimum.f16(half %c, half %max)
ret half %minmax
}
define amdgpu_ps half @test_maxmin_commuted_f16(half %a, half %b, half %c) {
; TRUE16-LABEL: test_maxmin_commuted_f16:
; TRUE16: ; %bb.0:
; TRUE16-NEXT: v_minimummaximum_f16 v0.l, v0.l, v1.l, v2.l
; TRUE16-NEXT: ; return to shader part epilog
;
; FAKE16-LABEL: test_maxmin_commuted_f16:
; FAKE16: ; %bb.0:
; FAKE16-NEXT: v_minimummaximum_f16 v0, v0, v1, v2
; FAKE16-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-TRUE16-LABEL: test_maxmin_commuted_f16:
; GFX12-SDAG-TRUE16: ; %bb.0:
; GFX12-SDAG-TRUE16-NEXT: v_minimummaximum_f16 v0.l, v0.l, v1.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-FAKE16-LABEL: test_maxmin_commuted_f16:
; GFX12-SDAG-FAKE16: ; %bb.0:
; GFX12-SDAG-FAKE16-NEXT: v_minimummaximum_f16 v0, v0, v1, v2
; GFX12-SDAG-FAKE16-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-TRUE16-LABEL: test_maxmin_commuted_f16:
; GFX12-GISEL-TRUE16: ; %bb.0:
; GFX12-GISEL-TRUE16-NEXT: v_minimummaximum_f16 v0.l, v0.l, v1.l, v2.l
; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-FAKE16-LABEL: test_maxmin_commuted_f16:
; GFX12-GISEL-FAKE16: ; %bb.0:
; GFX12-GISEL-FAKE16-NEXT: v_minimummaximum_f16 v0, v0, v1, v2
; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog
%min = call half @llvm.minimum.f16(half %a, half %b)
%maxmin = call half @llvm.maximum.f16(half %c, half %min)
ret half %maxmin
}
define amdgpu_ps void @s_test_minmax_f16(half inreg %a, half inreg %b, half inreg %c, ptr addrspace(1) inreg %out) {
; GFX1170-SDAG-TRUE16-LABEL: s_test_minmax_f16:
; GFX1170-SDAG-TRUE16: ; %bb.0:
; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, s0, s1
; GFX1170-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX1170-SDAG-TRUE16-NEXT: s_mov_b32 s5, s4
; GFX1170-SDAG-TRUE16-NEXT: s_mov_b32 s4, s3
; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, s2
; GFX1170-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[4:5]
; GFX1170-SDAG-TRUE16-NEXT: s_endpgm
;
; GFX1170-SDAG-FAKE16-LABEL: s_test_minmax_f16:
; GFX1170-SDAG-FAKE16: ; %bb.0:
; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, s0, s1
; GFX1170-SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0
; GFX1170-SDAG-FAKE16-NEXT: s_mov_b32 s5, s4
; GFX1170-SDAG-FAKE16-NEXT: s_mov_b32 s4, s3
; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, v0, s2
; GFX1170-SDAG-FAKE16-NEXT: global_store_b16 v1, v0, s[4:5]
; GFX1170-SDAG-FAKE16-NEXT: s_endpgm
;
; GFX1170-GISEL-TRUE16-LABEL: s_test_minmax_f16:
; GFX1170-GISEL-TRUE16: ; %bb.0:
; GFX1170-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
; GFX1170-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX1170-GISEL-TRUE16-NEXT: s_mov_b32 s6, s3
; GFX1170-GISEL-TRUE16-NEXT: s_mov_b32 s7, s4
; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1170-GISEL-TRUE16-NEXT: v_maximumminimum_f16 v0.l, s0, s1, v0.l
; GFX1170-GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[6:7]
; GFX1170-GISEL-TRUE16-NEXT: s_endpgm
;
; GFX1170-GISEL-FAKE16-LABEL: s_test_minmax_f16:
; GFX1170-GISEL-FAKE16: ; %bb.0:
; GFX1170-GISEL-FAKE16-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1170-GISEL-FAKE16-NEXT: s_mov_b32 s6, s3
; GFX1170-GISEL-FAKE16-NEXT: s_mov_b32 s7, s4
; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1170-GISEL-FAKE16-NEXT: v_maximumminimum_f16 v0, s0, s1, v0
; GFX1170-GISEL-FAKE16-NEXT: global_store_b16 v1, v0, s[6:7]
; GFX1170-GISEL-FAKE16-NEXT: s_endpgm
;
; GFX12-SDAG-TRUE16-LABEL: s_test_minmax_f16:
; GFX12-SDAG-TRUE16: ; %bb.0:
; GFX12-SDAG-TRUE16-NEXT: s_maximum_f16 s0, s0, s1
; GFX12-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX12-SDAG-TRUE16-NEXT: s_mov_b32 s5, s4
; GFX12-SDAG-TRUE16-NEXT: s_mov_b32 s4, s3
; GFX12-SDAG-TRUE16-NEXT: s_minimum_f16 s0, s0, s2
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
; GFX12-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[4:5]
; GFX12-SDAG-TRUE16-NEXT: s_endpgm
;
; GFX12-SDAG-FAKE16-LABEL: s_test_minmax_f16:
; GFX12-SDAG-FAKE16: ; %bb.0:
; GFX12-SDAG-FAKE16-NEXT: s_maximum_f16 s0, s0, s1
; GFX12-SDAG-FAKE16-NEXT: s_mov_b32 s5, s4
; GFX12-SDAG-FAKE16-NEXT: s_mov_b32 s4, s3
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
; GFX12-SDAG-FAKE16-NEXT: s_minimum_f16 s0, s0, s2
; GFX12-SDAG-FAKE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; GFX12-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[4:5]
; GFX12-SDAG-FAKE16-NEXT: s_endpgm
;
; GFX12-GISEL-TRUE16-LABEL: s_test_minmax_f16:
; GFX12-GISEL-TRUE16: ; %bb.0:
; GFX12-GISEL-TRUE16-NEXT: s_maximum_f16 s0, s0, s1
; GFX12-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX12-GISEL-TRUE16-NEXT: s_mov_b32 s6, s3
; GFX12-GISEL-TRUE16-NEXT: s_mov_b32 s7, s4
; GFX12-GISEL-TRUE16-NEXT: s_minimum_f16 s0, s0, s2
; GFX12-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX12-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
; GFX12-GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[6:7]
; GFX12-GISEL-TRUE16-NEXT: s_endpgm
;
; GFX12-GISEL-FAKE16-LABEL: s_test_minmax_f16:
; GFX12-GISEL-FAKE16: ; %bb.0:
; GFX12-GISEL-FAKE16-NEXT: s_maximum_f16 s0, s0, s1
; GFX12-GISEL-FAKE16-NEXT: s_mov_b32 s6, s3
; GFX12-GISEL-FAKE16-NEXT: s_mov_b32 s7, s4
; GFX12-GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
; GFX12-GISEL-FAKE16-NEXT: s_minimum_f16 s0, s0, s2
; GFX12-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX12-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s0
; GFX12-GISEL-FAKE16-NEXT: global_store_b16 v1, v0, s[6:7]
; GFX12-GISEL-FAKE16-NEXT: s_endpgm
%smax = call half @llvm.maximum.f16(half %a, half %b)
%sminmax = call half @llvm.minimum.f16(half %smax, half %c)
store half %sminmax, ptr addrspace(1) %out
ret void
}
declare half @llvm.minimum.f16(half, half)
declare half @llvm.maximum.f16(half, half)
declare float @llvm.minimum.f32(float, float)
declare float @llvm.maximum.f32(float, float)