| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,SDAG %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,GISEL %s |
| |
| define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) { |
| ; GCN-LABEL: add_max_u32_vvv: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_add_max_u32_e64 v0, v0, v1, v2 |
| ; GCN-NEXT: ; return to shader part epilog |
| %add = add i32 %a, %b |
| %max = call i32 @llvm.umax.i32(i32 %add, i32 %c) |
| %ret = bitcast i32 %max to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) { |
| ; GCN-LABEL: add_max_u32_svv: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_add_max_u32_e64 v0, s0, v0, v1 |
| ; GCN-NEXT: ; return to shader part epilog |
| %add = add i32 %a, %b |
| %max = call i32 @llvm.umax.i32(i32 %add, i32 %c) |
| %ret = bitcast i32 %max to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @add_max_u32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) { |
| ; SDAG-LABEL: add_max_u32_ssv: |
| ; SDAG: ; %bb.0: |
| ; SDAG-NEXT: v_add_max_u32_e64 v0, s0, s1, v0 |
| ; SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GISEL-LABEL: add_max_u32_ssv: |
| ; GISEL: ; %bb.0: |
| ; GISEL-NEXT: s_add_co_i32 s0, s0, s1 |
| ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GISEL-NEXT: v_max_u32_e32 v0, s0, v0 |
| ; GISEL-NEXT: ; return to shader part epilog |
| %add = add i32 %a, %b |
| %max = call i32 @llvm.umax.i32(i32 %add, i32 %c) |
| %ret = bitcast i32 %max to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @add_max_u32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c) { |
| ; GCN-LABEL: add_max_u32_sss: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_add_co_i32 s0, s0, s1 |
| ; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| ; GCN-NEXT: s_max_u32 s0, s0, s2 |
| ; GCN-NEXT: v_mov_b32_e32 v0, s0 |
| ; GCN-NEXT: ; return to shader part epilog |
| %add = add i32 %a, %b |
| %max = call i32 @llvm.umax.i32(i32 %add, i32 %c) |
| %ret = bitcast i32 %max to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) { |
| ; GCN-LABEL: add_max_u32_vsi: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_add_max_u32_e64 v0, v0, s0, 4 |
| ; GCN-NEXT: ; return to shader part epilog |
| %add = add i32 %a, %b |
| %max = call i32 @llvm.umax.i32(i32 %add, i32 4) |
| %ret = bitcast i32 %max to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) { |
| ; GCN-LABEL: add_max_u32_svl: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_add_max_u32_e64 v0, s0, v0, 0x64 |
| ; GCN-NEXT: ; return to shader part epilog |
| %add = add i32 %a, %b |
| %max = call i32 @llvm.umax.i32(i32 %add, i32 100) |
| %ret = bitcast i32 %max to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) { |
| ; SDAG-LABEL: add_max_u32_slv: |
| ; SDAG: ; %bb.0: |
| ; SDAG-NEXT: v_add_max_u32_e64 v0, 0x64, s0, v0 |
| ; SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GISEL-LABEL: add_max_u32_slv: |
| ; GISEL: ; %bb.0: |
| ; GISEL-NEXT: s_addk_co_i32 s0, 0x64 |
| ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GISEL-NEXT: v_max_u32_e32 v0, s0, v0 |
| ; GISEL-NEXT: ; return to shader part epilog |
| %add = add i32 %a, 100 |
| %max = call i32 @llvm.umax.i32(i32 %add, i32 %b) |
| %ret = bitcast i32 %max to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) { |
| ; GCN-LABEL: add_max_i32_vvv: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_add_max_i32_e64 v0, v0, v1, v2 |
| ; GCN-NEXT: ; return to shader part epilog |
| %add = add i32 %a, %b |
| %max = call i32 @llvm.smax.i32(i32 %add, i32 %c) |
| %ret = bitcast i32 %max to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) { |
| ; GCN-LABEL: add_min_u32_vvv: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_add_min_u32_e64 v0, v0, v1, v2 |
| ; GCN-NEXT: ; return to shader part epilog |
| %add = add i32 %a, %b |
| %max = call i32 @llvm.umin.i32(i32 %add, i32 %c) |
| %ret = bitcast i32 %max to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @add_min_i32_vvv(i32 %a, i32 %b, i32 %c) { |
| ; GCN-LABEL: add_min_i32_vvv: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_add_min_i32_e64 v0, v0, v1, v2 |
| ; GCN-NEXT: ; return to shader part epilog |
| %add = add i32 %a, %b |
| %max = call i32 @llvm.smin.i32(i32 %add, i32 %c) |
| %ret = bitcast i32 %max to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @add_max_v2u16_vvv(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c) { |
| ; GCN-LABEL: add_max_v2u16_vvv: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_pk_add_max_u16 v0, v0, v1, v2 |
| ; GCN-NEXT: ; return to shader part epilog |
| %add = add <2 x i16> %a, %b |
| %max = call <2 x i16> @llvm.umax.v216(<2 x i16> %add, <2 x i16> %c) |
| %ret = bitcast <2 x i16> %max to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @add_max_v2u16_svv(<2 x i16> inreg %a, <2 x i16> %b, <2 x i16> %c) { |
| ; GCN-LABEL: add_max_v2u16_svv: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_pk_add_max_u16 v0, s0, v0, v1 |
| ; GCN-NEXT: ; return to shader part epilog |
| %add = add <2 x i16> %a, %b |
| %max = call <2 x i16> @llvm.umax.v216(<2 x i16> %add, <2 x i16> %c) |
| %ret = bitcast <2 x i16> %max to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @add_max_v2u16_ssv(<2 x i16> inreg %a, <2 x i16> inreg %b, <2 x i16> %c) { |
| ; SDAG-LABEL: add_max_v2u16_ssv: |
| ; SDAG: ; %bb.0: |
| ; SDAG-NEXT: v_pk_add_max_u16 v0, s0, s1, v0 |
| ; SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GISEL-LABEL: add_max_v2u16_ssv: |
| ; GISEL: ; %bb.0: |
| ; GISEL-NEXT: s_lshr_b32 s2, s0, 16 |
| ; GISEL-NEXT: s_lshr_b32 s3, s1, 16 |
| ; GISEL-NEXT: s_add_co_i32 s0, s0, s1 |
| ; GISEL-NEXT: s_add_co_i32 s2, s2, s3 |
| ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| ; GISEL-NEXT: s_pack_ll_b32_b16 s0, s0, s2 |
| ; GISEL-NEXT: v_pk_max_u16 v0, s0, v0 |
| ; GISEL-NEXT: ; return to shader part epilog |
| %add = add <2 x i16> %a, %b |
| %max = call <2 x i16> @llvm.umax.v216(<2 x i16> %add, <2 x i16> %c) |
| %ret = bitcast <2 x i16> %max to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @add_max_v2u16_sss(<2 x i16> inreg %a, <2 x i16> inreg %b, <2 x i16> inreg %c) { |
| ; SDAG-LABEL: add_max_v2u16_sss: |
| ; SDAG: ; %bb.0: |
| ; SDAG-NEXT: v_pk_add_u16 v0, s0, s1 |
| ; SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; SDAG-NEXT: v_pk_max_u16 v0, v0, s2 |
| ; SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GISEL-LABEL: add_max_v2u16_sss: |
| ; GISEL: ; %bb.0: |
| ; GISEL-NEXT: s_lshr_b32 s3, s0, 16 |
| ; GISEL-NEXT: s_lshr_b32 s4, s1, 16 |
| ; GISEL-NEXT: s_add_co_i32 s0, s0, s1 |
| ; GISEL-NEXT: s_add_co_i32 s3, s3, s4 |
| ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GISEL-NEXT: s_pack_ll_b32_b16 s0, s0, s3 |
| ; GISEL-NEXT: s_and_b32 s3, s2, 0xffff |
| ; GISEL-NEXT: s_lshr_b32 s1, s0, 16 |
| ; GISEL-NEXT: s_and_b32 s0, s0, 0xffff |
| ; GISEL-NEXT: s_lshr_b32 s2, s2, 16 |
| ; GISEL-NEXT: s_max_u32 s0, s0, s3 |
| ; GISEL-NEXT: s_max_u32 s1, s1, s2 |
| ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| ; GISEL-NEXT: s_pack_ll_b32_b16 s0, s0, s1 |
| ; GISEL-NEXT: v_mov_b32_e32 v0, s0 |
| ; GISEL-NEXT: ; return to shader part epilog |
| %add = add <2 x i16> %a, %b |
| %max = call <2 x i16> @llvm.umax.v216(<2 x i16> %add, <2 x i16> %c) |
| %ret = bitcast <2 x i16> %max to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @add_max_v2u16_vsi(<2 x i16> %a, <2 x i16> inreg %b) { |
| ; GCN-LABEL: add_max_v2u16_vsi: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_pk_add_max_u16 v0, v0, s0, 4 |
| ; GCN-NEXT: ; return to shader part epilog |
| %add = add <2 x i16> %a, %b |
| %max = call <2 x i16> @llvm.umax.v216(<2 x i16> %add, <2 x i16> <i16 4, i16 0>) |
| %ret = bitcast <2 x i16> %max to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @add_max_v2u16_svl(<2 x i16> inreg %a, <2 x i16> %b) { |
| ; GCN-LABEL: add_max_v2u16_svl: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_pk_add_max_u16 v0, s0, v0, 0x650064 |
| ; GCN-NEXT: ; return to shader part epilog |
| %add = add <2 x i16> %a, %b |
| %max = call <2 x i16> @llvm.umax.v216(<2 x i16> %add, <2 x i16> <i16 100, i16 101>) |
| %ret = bitcast <2 x i16> %max to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @add_max_v2u16_slv(<2 x i16> inreg %a, <2 x i16> %b) { |
| ; SDAG-LABEL: add_max_v2u16_slv: |
| ; SDAG: ; %bb.0: |
| ; SDAG-NEXT: v_pk_add_max_u16 v0, 0x640064, s0, v0 |
| ; SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GISEL-LABEL: add_max_v2u16_slv: |
| ; GISEL: ; %bb.0: |
| ; GISEL-NEXT: s_lshr_b32 s1, s0, 16 |
| ; GISEL-NEXT: s_add_co_i32 s0, s0, 0x640064 |
| ; GISEL-NEXT: s_addk_co_i32 s1, 0x64 |
| ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| ; GISEL-NEXT: s_pack_ll_b32_b16 s0, s0, s1 |
| ; GISEL-NEXT: v_pk_max_u16 v0, s0, v0 |
| ; GISEL-NEXT: ; return to shader part epilog |
| %add = add <2 x i16> %a, <i16 100, i16 100> |
| %max = call <2 x i16> @llvm.umax.v216(<2 x i16> %add, <2 x i16> %b) |
| %ret = bitcast <2 x i16> %max to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @add_max_v2s16_vvv(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c) { |
| ; GCN-LABEL: add_max_v2s16_vvv: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_pk_add_max_i16 v0, v0, v1, v2 |
| ; GCN-NEXT: ; return to shader part epilog |
| %add = add <2 x i16> %a, %b |
| %max = call <2 x i16> @llvm.smax.v216(<2 x i16> %add, <2 x i16> %c) |
| %ret = bitcast <2 x i16> %max to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @add_min_v2u16_vvv(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c) { |
| ; GCN-LABEL: add_min_v2u16_vvv: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_pk_add_min_u16 v0, v0, v1, v2 |
| ; GCN-NEXT: ; return to shader part epilog |
| %add = add <2 x i16> %a, %b |
| %max = call <2 x i16> @llvm.umin.v216(<2 x i16> %add, <2 x i16> %c) |
| %ret = bitcast <2 x i16> %max to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @add_min_v2s16_vvv(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c) { |
| ; GCN-LABEL: add_min_v2s16_vvv: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: v_pk_add_min_i16 v0, v0, v1, v2 |
| ; GCN-NEXT: ; return to shader part epilog |
| %add = add <2 x i16> %a, %b |
| %max = call <2 x i16> @llvm.smin.v216(<2 x i16> %add, <2 x i16> %c) |
| %ret = bitcast <2 x i16> %max to float |
| ret float %ret |
| } |
| |
| declare <2 x i16> @llvm.smin.v216(<2 x i16>, <2 x i16>) |
| declare <2 x i16> @llvm.smax.v216(<2 x i16>, <2 x i16>) |
| declare <2 x i16> @llvm.umin.v216(<2 x i16>, <2 x i16>) |
| declare <2 x i16> @llvm.umax.v216(<2 x i16>, <2 x i16>) |
| declare i32 @llvm.smin.i32(i32, i32) |
| declare i32 @llvm.smax.i32(i32, i32) |
| declare i32 @llvm.umin.i32(i32, i32) |
| declare i32 @llvm.umax.i32(i32, i32) |