llvm/test/CodeGen/X86/combine-fabs.ll - llvm-project - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX

 ;
 ; NOTE: this is generated by utils/update_llc_test_checks.py but we can't check NAN types (PR30443),
 ; so we need to edit it to remove the NAN constant comments
 ;

 ; fabs(c1) -> c2
 define float @combine_fabs_constant() {
 ; SSE-LABEL: combine_fabs_constant:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movss {{.*#+}} xmm0 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_fabs_constant:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0]
 ; AVX-NEXT:    retq
   %1 = call float @llvm.fabs.f32(float -2.0)
   ret float %1
 }

 define <4 x float> @combine_vec_fabs_constant() {
 ; SSE-LABEL: combine_vec_fabs_constant:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0.0E+0,0.0E+0,2.0E+0,2.0E+0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_fabs_constant:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0.0E+0,0.0E+0,2.0E+0,2.0E+0]
 ; AVX-NEXT:    retq
   %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> <float 0.0, float -0.0, float 2.0, float -2.0>)
   ret <4 x float> %1
 }

 ; fabs(fabs(x)) -> fabs(x)
 define float @combine_fabs_fabs(float %a) {
 ; SSE-LABEL: combine_fabs_fabs:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_fabs_fabs:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
 ; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %1 = call float @llvm.fabs.f32(float %a)
   %2 = call float @llvm.fabs.f32(float %1)
   ret float %2
 }

 define <4 x float> @combine_vec_fabs_fabs(<4 x float> %a) {
 ; SSE-LABEL: combine_vec_fabs_fabs:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_fabs_fabs:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
 ; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
   %2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1)
   ret <4 x float> %2
 }

 ; fabs(fneg(x)) -> fabs(x)
 define float @combine_fabs_fneg(float %a) {
 ; SSE-LABEL: combine_fabs_fneg:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_fabs_fneg:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
 ; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %1 = fsub float -0.0, %a
   %2 = call float @llvm.fabs.f32(float %1)
   ret float %2
 }

 define <4 x float> @combine_vec_fabs_fneg(<4 x float> %a) {
 ; SSE-LABEL: combine_vec_fabs_fneg:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_fabs_fneg:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
 ; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %a
   %2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1)
   ret <4 x float> %2
 }

 ; fabs(fcopysign(x, y)) -> fabs(x)
 define float @combine_fabs_fcopysign(float %a, float %b) {
 ; SSE-LABEL: combine_fabs_fcopysign:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_fabs_fcopysign:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
 ; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %1 = call float @llvm.copysign.f32(float %a, float %b)
   %2 = call float @llvm.fabs.f32(float %1)
   ret float %2
 }

 define <4 x float> @combine_vec_fabs_fcopysign(<4 x float> %a, <4 x float> %b) {
 ; SSE-LABEL: combine_vec_fabs_fcopysign:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_fabs_fcopysign:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
 ; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
   %2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1)
   ret <4 x float> %2
 }

 ; store(fabs(load())) - convert scalar to integer
 define void @combine_fabs_int_rmw_f64(ptr %ptr) {
 ; SSE-LABEL: combine_fabs_int_rmw_f64:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    andb $127, 7(%rdi)
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_fabs_int_rmw_f64:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    andb $127, 7(%rdi)
 ; AVX-NEXT:    retq
   %1 = load double, ptr %ptr
   %2 = call double @llvm.fabs.f64(double %1)
   store double %2, ptr %ptr
   ret void
 }

 define void @combine_fabs_int_f32(ptr %src, ptr %dst) {
 ; SSE-LABEL: combine_fabs_int_f32:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
 ; SSE-NEXT:    andl (%rdi), %eax
 ; SSE-NEXT:    movl %eax, (%rsi)
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_fabs_int_f32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
 ; AVX-NEXT:    andl (%rdi), %eax
 ; AVX-NEXT:    movl %eax, (%rsi)
 ; AVX-NEXT:    retq
   %1 = load float, ptr %src
   %2 = call float @llvm.fabs.f32(float %1)
   store float %2, ptr %dst
   ret void
 }

 define void @combine_fabs_int_rmw_bfloat(ptr %ptr) nounwind {
 ; SSE-LABEL: combine_fabs_int_rmw_bfloat:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    andb $127, 1(%rdi)
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_fabs_int_rmw_bfloat:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    andb $127, 1(%rdi)
 ; AVX-NEXT:    retq
   %1 = load bfloat, ptr %ptr
   %2 = call bfloat @llvm.fabs.bf16(bfloat %1)
   store bfloat %2, ptr %ptr
   ret void
 }

 define void @combine_fabs_int_half(ptr %src, ptr %dst) nounwind {
 ; SSE-LABEL: combine_fabs_int_half:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movzwl (%rdi), %eax
 ; SSE-NEXT:    andl $32767, %eax # imm = 0x7FFF
 ; SSE-NEXT:    movw %ax, (%rsi)
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_fabs_int_half:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    movzwl (%rdi), %eax
 ; AVX-NEXT:    andl $32767, %eax # imm = 0x7FFF
 ; AVX-NEXT:    movw %ax, (%rsi)
 ; AVX-NEXT:    retq
   %1 = load half, ptr %src
   %2 = call half @llvm.fabs.f16(half %1)
   store half %2, ptr %dst
   ret void
 }

 ; don't convert vector to scalar
 define void @combine_fabs_vec_int_v4f32(ptr %src, ptr %dst) {
 ; SSE-LABEL: combine_fabs_vec_int_v4f32:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movaps (%rdi), %xmm0
 ; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 ; SSE-NEXT:    movaps %xmm0, (%rsi)
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_fabs_vec_int_v4f32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vbroadcastss {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
 ; AVX-NEXT:    vandps (%rdi), %xmm0, %xmm0
 ; AVX-NEXT:    vmovaps %xmm0, (%rsi)
 ; AVX-NEXT:    retq
   %1 = load <4 x float>, ptr %src
   %2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1)
   store <4 x float> %2, ptr %dst
   ret void
 }

 declare float @llvm.fabs.f32(float %p)
 declare float @llvm.copysign.f32(float %Mag, float %Sgn)

 declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
 declare <4 x float> @llvm.copysign.v4f32(<4 x float> %Mag, <4 x float> %Sgn)
	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 \| FileCheck %s --check-prefix=SSE
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 \| FileCheck %s --check-prefix=AVX

	;
	; NOTE: this is generated by utils/update_llc_test_checks.py but we can't check NAN types (PR30443),
	; so we need to edit it to remove the NAN constant comments
	;

	; fabs(c1) -> c2
	define float @combine_fabs_constant() {
	; SSE-LABEL: combine_fabs_constant:
	; SSE: # %bb.0:
	; SSE-NEXT: movss {{.*#+}} xmm0 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0]
	; SSE-NEXT: retq
	;
	; AVX-LABEL: combine_fabs_constant:
	; AVX: # %bb.0:
	; AVX-NEXT: vmovss {{.*#+}} xmm0 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0]
	; AVX-NEXT: retq
	%1 = call float @llvm.fabs.f32(float -2.0)
	ret float %1
	}

	define <4 x float> @combine_vec_fabs_constant() {
	; SSE-LABEL: combine_vec_fabs_constant:
	; SSE: # %bb.0:
	; SSE-NEXT: movaps {{.*#+}} xmm0 = [0.0E+0,0.0E+0,2.0E+0,2.0E+0]
	; SSE-NEXT: retq
	;
	; AVX-LABEL: combine_vec_fabs_constant:
	; AVX: # %bb.0:
	; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0.0E+0,0.0E+0,2.0E+0,2.0E+0]
	; AVX-NEXT: retq
	%1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> <float 0.0, float -0.0, float 2.0, float -2.0>)
	ret <4 x float> %1
	}

	; fabs(fabs(x)) -> fabs(x)
	define float @combine_fabs_fabs(float %a) {
	; SSE-LABEL: combine_fabs_fabs:
	; SSE: # %bb.0:
	; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
	; SSE-NEXT: retq
	;
	; AVX-LABEL: combine_fabs_fabs:
	; AVX: # %bb.0:
	; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
	; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
	; AVX-NEXT: retq
	%1 = call float @llvm.fabs.f32(float %a)
	%2 = call float @llvm.fabs.f32(float %1)
	ret float %2
	}

	define <4 x float> @combine_vec_fabs_fabs(<4 x float> %a) {
	; SSE-LABEL: combine_vec_fabs_fabs:
	; SSE: # %bb.0:
	; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
	; SSE-NEXT: retq
	;
	; AVX-LABEL: combine_vec_fabs_fabs:
	; AVX: # %bb.0:
	; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
	; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
	; AVX-NEXT: retq
	%1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
	%2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1)
	ret <4 x float> %2
	}

	; fabs(fneg(x)) -> fabs(x)
	define float @combine_fabs_fneg(float %a) {
	; SSE-LABEL: combine_fabs_fneg:
	; SSE: # %bb.0:
	; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
	; SSE-NEXT: retq
	;
	; AVX-LABEL: combine_fabs_fneg:
	; AVX: # %bb.0:
	; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
	; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
	; AVX-NEXT: retq
	%1 = fsub float -0.0, %a
	%2 = call float @llvm.fabs.f32(float %1)
	ret float %2
	}

	define <4 x float> @combine_vec_fabs_fneg(<4 x float> %a) {
	; SSE-LABEL: combine_vec_fabs_fneg:
	; SSE: # %bb.0:
	; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
	; SSE-NEXT: retq
	;
	; AVX-LABEL: combine_vec_fabs_fneg:
	; AVX: # %bb.0:
	; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
	; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
	; AVX-NEXT: retq
	%1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %a
	%2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1)
	ret <4 x float> %2
	}

	; fabs(fcopysign(x, y)) -> fabs(x)
	define float @combine_fabs_fcopysign(float %a, float %b) {
	; SSE-LABEL: combine_fabs_fcopysign:
	; SSE: # %bb.0:
	; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
	; SSE-NEXT: retq
	;
	; AVX-LABEL: combine_fabs_fcopysign:
	; AVX: # %bb.0:
	; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
	; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
	; AVX-NEXT: retq
	%1 = call float @llvm.copysign.f32(float %a, float %b)
	%2 = call float @llvm.fabs.f32(float %1)
	ret float %2
	}

	define <4 x float> @combine_vec_fabs_fcopysign(<4 x float> %a, <4 x float> %b) {
	; SSE-LABEL: combine_vec_fabs_fcopysign:
	; SSE: # %bb.0:
	; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
	; SSE-NEXT: retq
	;
	; AVX-LABEL: combine_vec_fabs_fcopysign:
	; AVX: # %bb.0:
	; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
	; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
	; AVX-NEXT: retq
	%1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
	%2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1)
	ret <4 x float> %2
	}

	; store(fabs(load())) - convert scalar to integer
	define void @combine_fabs_int_rmw_f64(ptr %ptr) {
	; SSE-LABEL: combine_fabs_int_rmw_f64:
	; SSE: # %bb.0:
	; SSE-NEXT: andb $127, 7(%rdi)
	; SSE-NEXT: retq
	;
	; AVX-LABEL: combine_fabs_int_rmw_f64:
	; AVX: # %bb.0:
	; AVX-NEXT: andb $127, 7(%rdi)
	; AVX-NEXT: retq
	%1 = load double, ptr %ptr
	%2 = call double @llvm.fabs.f64(double %1)
	store double %2, ptr %ptr
	ret void
	}

	define void @combine_fabs_int_f32(ptr %src, ptr %dst) {
	; SSE-LABEL: combine_fabs_int_f32:
	; SSE: # %bb.0:
	; SSE-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
	; SSE-NEXT: andl (%rdi), %eax
	; SSE-NEXT: movl %eax, (%rsi)
	; SSE-NEXT: retq
	;
	; AVX-LABEL: combine_fabs_int_f32:
	; AVX: # %bb.0:
	; AVX-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
	; AVX-NEXT: andl (%rdi), %eax
	; AVX-NEXT: movl %eax, (%rsi)
	; AVX-NEXT: retq
	%1 = load float, ptr %src
	%2 = call float @llvm.fabs.f32(float %1)
	store float %2, ptr %dst
	ret void
	}

	define void @combine_fabs_int_rmw_bfloat(ptr %ptr) nounwind {
	; SSE-LABEL: combine_fabs_int_rmw_bfloat:
	; SSE: # %bb.0:
	; SSE-NEXT: andb $127, 1(%rdi)
	; SSE-NEXT: retq
	;
	; AVX-LABEL: combine_fabs_int_rmw_bfloat:
	; AVX: # %bb.0:
	; AVX-NEXT: andb $127, 1(%rdi)
	; AVX-NEXT: retq
	%1 = load bfloat, ptr %ptr
	%2 = call bfloat @llvm.fabs.bf16(bfloat %1)
	store bfloat %2, ptr %ptr
	ret void
	}

	define void @combine_fabs_int_half(ptr %src, ptr %dst) nounwind {
	; SSE-LABEL: combine_fabs_int_half:
	; SSE: # %bb.0:
	; SSE-NEXT: movzwl (%rdi), %eax
	; SSE-NEXT: andl $32767, %eax # imm = 0x7FFF
	; SSE-NEXT: movw %ax, (%rsi)
	; SSE-NEXT: retq
	;
	; AVX-LABEL: combine_fabs_int_half:
	; AVX: # %bb.0:
	; AVX-NEXT: movzwl (%rdi), %eax
	; AVX-NEXT: andl $32767, %eax # imm = 0x7FFF
	; AVX-NEXT: movw %ax, (%rsi)
	; AVX-NEXT: retq
	%1 = load half, ptr %src
	%2 = call half @llvm.fabs.f16(half %1)
	store half %2, ptr %dst
	ret void
	}

	; don't convert vector to scalar
	define void @combine_fabs_vec_int_v4f32(ptr %src, ptr %dst) {
	; SSE-LABEL: combine_fabs_vec_int_v4f32:
	; SSE: # %bb.0:
	; SSE-NEXT: movaps (%rdi), %xmm0
	; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
	; SSE-NEXT: movaps %xmm0, (%rsi)
	; SSE-NEXT: retq
	;
	; AVX-LABEL: combine_fabs_vec_int_v4f32:
	; AVX: # %bb.0:
	; AVX-NEXT: vbroadcastss {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
	; AVX-NEXT: vandps (%rdi), %xmm0, %xmm0
	; AVX-NEXT: vmovaps %xmm0, (%rsi)
	; AVX-NEXT: retq
	%1 = load <4 x float>, ptr %src
	%2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1)
	store <4 x float> %2, ptr %dst
	ret void
	}

	declare float @llvm.fabs.f32(float %p)
	declare float @llvm.copysign.f32(float %Mag, float %Sgn)

	declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
	declare <4 x float> @llvm.copysign.v4f32(<4 x float> %Mag, <4 x float> %Sgn)