blob: 9272b532f347e604b62d62f33d5168396f93a0c7 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A,GFX90A-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A,GFX90A-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s
define <2 x float> @fneg_v2f32_v(<2 x float> %first) {
; GFX90A-LABEL: fneg_v2f32_v:
; GFX90A: ; %bb.0: ; %bb
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX90A-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: fneg_v2f32_v:
; GFX950: ; %bb.0: ; %bb
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX950-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: fneg_v2f32_v:
; GFX1250: ; %bb.0: ; %bb
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GFX1250-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
bb:
%neg = fneg <2 x float> %first
ret <2 x float> %neg
}
define <2 x float> @fabs_v2f32_v(<2 x float> %first) {
; GFX90A-SDAG-LABEL: fabs_v2f32_v:
; GFX90A-SDAG: ; %bb.0: ; %bb
; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-SDAG-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GFX90A-SDAG-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-GISEL-LABEL: fabs_v2f32_v:
; GFX90A-GISEL: ; %bb.0: ; %bb
; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
; GFX90A-GISEL-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-SDAG-LABEL: fabs_v2f32_v:
; GFX950-SDAG: ; %bb.0: ; %bb
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-SDAG-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GFX950-SDAG-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-GISEL-LABEL: fabs_v2f32_v:
; GFX950-GISEL: ; %bb.0: ; %bb
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: fabs_v2f32_v:
; GFX1250: ; %bb.0: ; %bb
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GFX1250-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
bb:
%abs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %first)
ret <2 x float> %abs
}
define <2 x float> @fneg_fabs_v2f32_v(<2 x float> %first) {
; GFX90A-LABEL: fneg_fabs_v2f32_v:
; GFX90A: ; %bb.0: ; %bb
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
; GFX90A-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GFX90A-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX90A-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: fneg_fabs_v2f32_v:
; GFX950: ; %bb.0: ; %bb
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
; GFX950-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GFX950-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX950-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: fneg_fabs_v2f32_v:
; GFX1250: ; %bb.0: ; %bb
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GFX1250-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GFX1250-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
bb:
%abs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %first)
%neg = fneg <2 x float> %abs
ret <2 x float> %neg
}
define <2 x float> @fneg_v2f32_s(<2 x float> inreg %first) {
; GFX90A-SDAG-LABEL: fneg_v2f32_s:
; GFX90A-SDAG: ; %bb.0: ; %bb
; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-SDAG-NEXT: s_xor_b32 s4, s17, 0x80000000
; GFX90A-SDAG-NEXT: s_xor_b32 s5, s16, 0x80000000
; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, s5
; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, s4
; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-GISEL-LABEL: fneg_v2f32_s:
; GFX90A-GISEL: ; %bb.0: ; %bb
; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-GISEL-NEXT: v_pk_mov_b32 v[0:1], s[16:17], s[16:17] op_sel:[0,1]
; GFX90A-GISEL-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX90A-GISEL-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-SDAG-LABEL: fneg_v2f32_s:
; GFX950-SDAG: ; %bb.0: ; %bb
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-SDAG-NEXT: s_xor_b32 s1, s1, 0x80000000
; GFX950-SDAG-NEXT: s_xor_b32 s0, s0, 0x80000000
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s0
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, s1
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-GISEL-LABEL: fneg_v2f32_s:
; GFX950-GISEL: ; %bb.0: ; %bb
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX950-GISEL-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX950-GISEL-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: fneg_v2f32_s:
; GFX1250-SDAG: ; %bb.0: ; %bb
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_xor_b32 s0, s0, 0x80000000
; GFX1250-SDAG-NEXT: s_xor_b32 s1, s1, 0x80000000
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-GISEL-LABEL: fneg_v2f32_s:
; GFX1250-GISEL: ; %bb.0: ; %bb
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-GISEL-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GFX1250-GISEL-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
bb:
%neg = fneg <2 x float> %first
ret <2 x float> %neg
}
define <2 x float> @fabs_v2f32_s(<2 x float> inreg %first) {
; GFX90A-SDAG-LABEL: fabs_v2f32_s:
; GFX90A-SDAG: ; %bb.0: ; %bb
; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-SDAG-NEXT: s_bitset0_b32 s16, 31
; GFX90A-SDAG-NEXT: s_bitset0_b32 s17, 31
; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, s16
; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, s17
; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-GISEL-LABEL: fabs_v2f32_s:
; GFX90A-GISEL: ; %bb.0: ; %bb
; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-GISEL-NEXT: v_pk_mov_b32 v[0:1], s[16:17], s[16:17] op_sel:[0,1]
; GFX90A-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
; GFX90A-GISEL-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-SDAG-LABEL: fabs_v2f32_s:
; GFX950-SDAG: ; %bb.0: ; %bb
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-SDAG-NEXT: s_bitset0_b32 s0, 31
; GFX950-SDAG-NEXT: s_bitset0_b32 s1, 31
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s0
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, s1
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-GISEL-LABEL: fabs_v2f32_s:
; GFX950-GISEL: ; %bb.0: ; %bb
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX950-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: fabs_v2f32_s:
; GFX1250-SDAG: ; %bb.0: ; %bb
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_bitset0_b32 s0, 31
; GFX1250-SDAG-NEXT: s_bitset0_b32 s1, 31
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-GISEL-LABEL: fabs_v2f32_s:
; GFX1250-GISEL: ; %bb.0: ; %bb
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GFX1250-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
bb:
%abs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %first)
ret <2 x float> %abs
}
define <2 x float> @fneg_fabs_v2f32_s(<2 x float> inreg %first) {
; GFX90A-SDAG-LABEL: fneg_fabs_v2f32_s:
; GFX90A-SDAG: ; %bb.0: ; %bb
; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-SDAG-NEXT: s_bitset0_b32 s17, 31
; GFX90A-SDAG-NEXT: s_bitset0_b32 s16, 31
; GFX90A-SDAG-NEXT: s_xor_b32 s4, s17, 0x80000000
; GFX90A-SDAG-NEXT: s_xor_b32 s5, s16, 0x80000000
; GFX90A-SDAG-NEXT: v_mov_b32_e32 v0, s5
; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, s4
; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-GISEL-LABEL: fneg_fabs_v2f32_s:
; GFX90A-GISEL: ; %bb.0: ; %bb
; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-GISEL-NEXT: v_pk_mov_b32 v[0:1], s[16:17], s[16:17] op_sel:[0,1]
; GFX90A-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
; GFX90A-GISEL-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GFX90A-GISEL-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX90A-GISEL-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-SDAG-LABEL: fneg_fabs_v2f32_s:
; GFX950-SDAG: ; %bb.0: ; %bb
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-SDAG-NEXT: s_bitset0_b32 s1, 31
; GFX950-SDAG-NEXT: s_bitset0_b32 s0, 31
; GFX950-SDAG-NEXT: s_xor_b32 s1, s1, 0x80000000
; GFX950-SDAG-NEXT: s_xor_b32 s0, s0, 0x80000000
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s0
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, s1
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-GISEL-LABEL: fneg_fabs_v2f32_s:
; GFX950-GISEL: ; %bb.0: ; %bb
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX950-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GFX950-GISEL-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX950-GISEL-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-SDAG-LABEL: fneg_fabs_v2f32_s:
; GFX1250-SDAG: ; %bb.0: ; %bb
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX1250-SDAG-NEXT: s_bitset0_b32 s0, 31
; GFX1250-SDAG-NEXT: s_bitset0_b32 s1, 31
; GFX1250-SDAG-NEXT: s_xor_b32 s0, s0, 0x80000000
; GFX1250-SDAG-NEXT: s_xor_b32 s1, s1, 0x80000000
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-GISEL-LABEL: fneg_fabs_v2f32_s:
; GFX1250-GISEL: ; %bb.0: ; %bb
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GFX1250-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-GISEL-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GFX1250-GISEL-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
bb:
%abs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %first)
%neg = fneg <2 x float> %abs
ret <2 x float> %neg
}