llvm/test/CodeGen/AMDGPU/fneg-combines.legal.f16.ll - llvm-project - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,VI,VI-SAFE %s
 ; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,VI,VI-NSZ %s

 ; --------------------------------------------------------------------------------
 ; rcp tests
 ; --------------------------------------------------------------------------------

 define half @v_fneg_rcp_f16(half %a) #0 {
 ; GCN-LABEL: v_fneg_rcp_f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_rcp_f16_e64 v0, -v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %rcp = call half @llvm.amdgcn.rcp.f16(half %a)
   %fneg = fneg half %rcp
   ret half %fneg
 }

 define half @v_fneg_rcp_fneg_f16(half %a) #0 {
 ; GCN-LABEL: v_fneg_rcp_fneg_f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_rcp_f16_e32 v0, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %fneg.a = fneg half %a
   %rcp = call half @llvm.amdgcn.rcp.f16(half %fneg.a)
   %fneg = fneg half %rcp
   ret half %fneg
 }

 define { half, half } @v_fneg_rcp_store_use_fneg_f16(half %a) #0 {
 ; GCN-LABEL: v_fneg_rcp_store_use_fneg_f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_rcp_f16_e32 v2, v0
 ; GCN-NEXT:    v_xor_b32_e32 v1, 0x8000, v0
 ; GCN-NEXT:    v_mov_b32_e32 v0, v2
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %fneg.a = fneg half %a
   %rcp = call half @llvm.amdgcn.rcp.f16(half %fneg.a)
   %fneg = fneg half %rcp
   %insert.0 = insertvalue { half, half } poison, half %fneg, 0
   %insert.1 = insertvalue { half, half } %insert.0, half %fneg.a, 1
   ret { half, half } %insert.1
 }

 define { half, half } @v_fneg_rcp_multi_use_fneg_f16(half %a, half %c) #0 {
 ; GCN-LABEL: v_fneg_rcp_multi_use_fneg_f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_rcp_f16_e32 v2, v0
 ; GCN-NEXT:    v_mul_f16_e64 v1, -v0, v1
 ; GCN-NEXT:    v_mov_b32_e32 v0, v2
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %fneg.a = fneg half %a
   %rcp = call half @llvm.amdgcn.rcp.f16(half %fneg.a)
   %fneg = fneg half %rcp
   %use1 = fmul half %fneg.a, %c
   %insert.0 = insertvalue { half, half } poison, half %fneg, 0
   %insert.1 = insertvalue { half, half } %insert.0, half %use1, 1
   ret { half, half } %insert.1
 }

 ; --------------------------------------------------------------------------------
 ; sin tests
 ; --------------------------------------------------------------------------------

 define half @v_fneg_amdgcn_sin_f16(half %a) #0 {
 ; GCN-LABEL: v_fneg_amdgcn_sin_f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_sin_f16_e64 v0, -v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %sin = call half @llvm.amdgcn.sin.f16(half %a)
   %fneg = fneg half %sin
   ret half %fneg
 }

 ; --------------------------------------------------------------------------------
 ; vintrp tests
 ; --------------------------------------------------------------------------------

 define { float, float } @v_fneg_interp_p1_f16(float %a, float %b) #0 {
 ; GCN-LABEL: v_fneg_interp_p1_f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_mul_f32_e64 v1, v0, -v1
 ; GCN-NEXT:    s_mov_b32 m0, 0
 ; GCN-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
 ; GCN-NEXT:    v_interp_p1ll_f16 v0, v1, attr0.x
 ; GCN-NEXT:    v_interp_p1ll_f16 v1, v1, attr0.y
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %mul = fmul float %a, %b
   %fneg = fneg float %mul
   %intrp0 = call float @llvm.amdgcn.interp.p1.f16(float %fneg, i32 0, i32 0, i1 false, i32 0)
   %intrp1 = call float @llvm.amdgcn.interp.p1.f16(float %fneg, i32 1, i32 0, i1 false, i32 0)
   %insert.0 = insertvalue { float, float } poison, float %intrp0, 0
   %insert.1 = insertvalue { float, float } %insert.0, float %intrp1, 1
   ret { float, float } %insert.1
 }

 define { half, half } @v_fneg_interp_p2_f16(float %a, float %b) #0 {
 ; GCN-LABEL: v_fneg_interp_p2_f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_mul_f32_e64 v1, v0, -v1
 ; GCN-NEXT:    v_mov_b32_e32 v2, 4.0
 ; GCN-NEXT:    s_mov_b32 m0, 0
 ; GCN-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
 ; GCN-NEXT:    v_interp_p2_f16 v0, v1, attr0.x, v2
 ; GCN-NEXT:    v_interp_p2_f16 v1, v1, attr0.y, v2
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %mul = fmul float %a, %b
   %fneg = fneg float %mul
   %intrp0 = call half @llvm.amdgcn.interp.p2.f16(float 4.0, float %fneg, i32 0, i32 0, i1 false, i32 0)
   %intrp1 = call half @llvm.amdgcn.interp.p2.f16(float 4.0, float %fneg, i32 1, i32 0, i1 false, i32 0)
   %insert.0 = insertvalue { half, half } poison, half %intrp0, 0
   %insert.1 = insertvalue { half, half } %insert.0, half %intrp1, 1
   ret { half, half } %insert.1
 }

 ; --------------------------------------------------------------------------------
 ; arithmetic.fence tests
 ; --------------------------------------------------------------------------------

 ; FIXME: Legalization/promote is broken
 define half @v_fneg_arithmetic_fence_f16(half %a) #0 {
 ; GCN-LABEL: v_fneg_arithmetic_fence_f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    ;ARITH_FENCE
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %fence = call half @llvm.arithmetic.fence.f16(half %a)
   %fneg = fneg half %fence
   ret half %fneg
 }

 define half @v_fneg_arithmetic_fence_fmul_f16(half %a, half %b) #0 {
 ; GCN-LABEL: v_fneg_arithmetic_fence_fmul_f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_mul_f16_e32 v0, v0, v1
 ; GCN-NEXT:    ;ARITH_FENCE
 ; GCN-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %mul = fmul half %a, %b
   %fence = call half @llvm.arithmetic.fence.f16(half %mul)
   %fneg = fneg half %fence
   ret half %fneg
 }

 declare half @llvm.amdgcn.rcp.f16(half) #1
 declare half @llvm.amdgcn.sin.f16(half) #1
 declare half @llvm.arithmetic.fence.f16(half) #1
 declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32) #0
 declare half @llvm.amdgcn.interp.p2.f16(float, float, i32, i32, i1, i32) #0

 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind "unsafe-fp-math"="true" }
 attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" }
 attributes #4 = { nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN-NSZ: {{.*}}
 ; GCN-SAFE: {{.*}}
 ; VI: {{.*}}
 ; VI-NSZ: {{.*}}
 ; VI-SAFE: {{.*}}
	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s \| FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,VI,VI-SAFE %s
	; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=fiji < %s \| FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,VI,VI-NSZ %s

	; --------------------------------------------------------------------------------
	; rcp tests
	; --------------------------------------------------------------------------------

	define half @v_fneg_rcp_f16(half %a) #0 {
	; GCN-LABEL: v_fneg_rcp_f16:
	; GCN: ; %bb.0:
	; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
	; GCN-NEXT: v_rcp_f16_e64 v0, -v0
	; GCN-NEXT: s_setpc_b64 s[30:31]
	%rcp = call half @llvm.amdgcn.rcp.f16(half %a)
	%fneg = fneg half %rcp
	ret half %fneg
	}

	define half @v_fneg_rcp_fneg_f16(half %a) #0 {
	; GCN-LABEL: v_fneg_rcp_fneg_f16:
	; GCN: ; %bb.0:
	; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
	; GCN-NEXT: v_rcp_f16_e32 v0, v0
	; GCN-NEXT: s_setpc_b64 s[30:31]
	%fneg.a = fneg half %a
	%rcp = call half @llvm.amdgcn.rcp.f16(half %fneg.a)
	%fneg = fneg half %rcp
	ret half %fneg
	}

	define { half, half } @v_fneg_rcp_store_use_fneg_f16(half %a) #0 {
	; GCN-LABEL: v_fneg_rcp_store_use_fneg_f16:
	; GCN: ; %bb.0:
	; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
	; GCN-NEXT: v_rcp_f16_e32 v2, v0
	; GCN-NEXT: v_xor_b32_e32 v1, 0x8000, v0
	; GCN-NEXT: v_mov_b32_e32 v0, v2
	; GCN-NEXT: s_setpc_b64 s[30:31]
	%fneg.a = fneg half %a
	%rcp = call half @llvm.amdgcn.rcp.f16(half %fneg.a)
	%fneg = fneg half %rcp
	%insert.0 = insertvalue { half, half } poison, half %fneg, 0
	%insert.1 = insertvalue { half, half } %insert.0, half %fneg.a, 1
	ret { half, half } %insert.1
	}

	define { half, half } @v_fneg_rcp_multi_use_fneg_f16(half %a, half %c) #0 {
	; GCN-LABEL: v_fneg_rcp_multi_use_fneg_f16:
	; GCN: ; %bb.0:
	; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
	; GCN-NEXT: v_rcp_f16_e32 v2, v0
	; GCN-NEXT: v_mul_f16_e64 v1, -v0, v1
	; GCN-NEXT: v_mov_b32_e32 v0, v2
	; GCN-NEXT: s_setpc_b64 s[30:31]
	%fneg.a = fneg half %a
	%rcp = call half @llvm.amdgcn.rcp.f16(half %fneg.a)
	%fneg = fneg half %rcp
	%use1 = fmul half %fneg.a, %c
	%insert.0 = insertvalue { half, half } poison, half %fneg, 0
	%insert.1 = insertvalue { half, half } %insert.0, half %use1, 1
	ret { half, half } %insert.1
	}

	; --------------------------------------------------------------------------------
	; sin tests
	; --------------------------------------------------------------------------------

	define half @v_fneg_amdgcn_sin_f16(half %a) #0 {
	; GCN-LABEL: v_fneg_amdgcn_sin_f16:
	; GCN: ; %bb.0:
	; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
	; GCN-NEXT: v_sin_f16_e64 v0, -v0
	; GCN-NEXT: s_setpc_b64 s[30:31]
	%sin = call half @llvm.amdgcn.sin.f16(half %a)
	%fneg = fneg half %sin
	ret half %fneg
	}

	; --------------------------------------------------------------------------------
	; vintrp tests
	; --------------------------------------------------------------------------------

	define { float, float } @v_fneg_interp_p1_f16(float %a, float %b) #0 {
	; GCN-LABEL: v_fneg_interp_p1_f16:
	; GCN: ; %bb.0:
	; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
	; GCN-NEXT: v_mul_f32_e64 v1, v0, -v1
	; GCN-NEXT: s_mov_b32 m0, 0
	; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
	; GCN-NEXT: v_interp_p1ll_f16 v0, v1, attr0.x
	; GCN-NEXT: v_interp_p1ll_f16 v1, v1, attr0.y
	; GCN-NEXT: s_setpc_b64 s[30:31]
	%mul = fmul float %a, %b
	%fneg = fneg float %mul
	%intrp0 = call float @llvm.amdgcn.interp.p1.f16(float %fneg, i32 0, i32 0, i1 false, i32 0)
	%intrp1 = call float @llvm.amdgcn.interp.p1.f16(float %fneg, i32 1, i32 0, i1 false, i32 0)
	%insert.0 = insertvalue { float, float } poison, float %intrp0, 0
	%insert.1 = insertvalue { float, float } %insert.0, float %intrp1, 1
	ret { float, float } %insert.1
	}

	define { half, half } @v_fneg_interp_p2_f16(float %a, float %b) #0 {
	; GCN-LABEL: v_fneg_interp_p2_f16:
	; GCN: ; %bb.0:
	; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
	; GCN-NEXT: v_mul_f32_e64 v1, v0, -v1
	; GCN-NEXT: v_mov_b32_e32 v2, 4.0
	; GCN-NEXT: s_mov_b32 m0, 0
	; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
	; GCN-NEXT: v_interp_p2_f16 v0, v1, attr0.x, v2
	; GCN-NEXT: v_interp_p2_f16 v1, v1, attr0.y, v2
	; GCN-NEXT: s_setpc_b64 s[30:31]
	%mul = fmul float %a, %b
	%fneg = fneg float %mul
	%intrp0 = call half @llvm.amdgcn.interp.p2.f16(float 4.0, float %fneg, i32 0, i32 0, i1 false, i32 0)
	%intrp1 = call half @llvm.amdgcn.interp.p2.f16(float 4.0, float %fneg, i32 1, i32 0, i1 false, i32 0)
	%insert.0 = insertvalue { half, half } poison, half %intrp0, 0
	%insert.1 = insertvalue { half, half } %insert.0, half %intrp1, 1
	ret { half, half } %insert.1
	}

	; --------------------------------------------------------------------------------
	; arithmetic.fence tests
	; --------------------------------------------------------------------------------

	; FIXME: Legalization/promote is broken
	define half @v_fneg_arithmetic_fence_f16(half %a) #0 {
	; GCN-LABEL: v_fneg_arithmetic_fence_f16:
	; GCN: ; %bb.0:
	; GCN-NEXT: ;ARITH_FENCE
	; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
	; GCN-NEXT: v_xor_b32_e32 v0, 0x8000, v0
	; GCN-NEXT: s_setpc_b64 s[30:31]
	%fence = call half @llvm.arithmetic.fence.f16(half %a)
	%fneg = fneg half %fence
	ret half %fneg
	}

	define half @v_fneg_arithmetic_fence_fmul_f16(half %a, half %b) #0 {
	; GCN-LABEL: v_fneg_arithmetic_fence_fmul_f16:
	; GCN: ; %bb.0:
	; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
	; GCN-NEXT: v_mul_f16_e32 v0, v0, v1
	; GCN-NEXT: ;ARITH_FENCE
	; GCN-NEXT: v_xor_b32_e32 v0, 0x8000, v0
	; GCN-NEXT: s_setpc_b64 s[30:31]
	%mul = fmul half %a, %b
	%fence = call half @llvm.arithmetic.fence.f16(half %mul)
	%fneg = fneg half %fence
	ret half %fneg
	}

	declare half @llvm.amdgcn.rcp.f16(half) #1
	declare half @llvm.amdgcn.sin.f16(half) #1
	declare half @llvm.arithmetic.fence.f16(half) #1
	declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32) #0
	declare half @llvm.amdgcn.interp.p2.f16(float, float, i32, i32, i1, i32) #0

	attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
	attributes #1 = { nounwind readnone }
	attributes #2 = { nounwind "unsafe-fp-math"="true" }
	attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" }
	attributes #4 = { nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
	;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
	; GCN-NSZ: {{.*}}
	; GCN-SAFE: {{.*}}
	; VI: {{.*}}
	; VI-NSZ: {{.*}}
	; VI-SAFE: {{.*}}