llvm/test/CodeGen/AArch64/sve-fmul-idempotency.ll - llvm-project - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -aarch64-sve-intrinsic-opts < %s | FileCheck %s

 ; Idempotent fmuls -- should compile to just a ret.
 define <vscale x 8 x half> @idempotent_fmul_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
 ; CHECK-LABEL: @idempotent_fmul_f16(
 ; CHECK-NEXT:    ret <vscale x 8 x half> [[A:%.*]]
 ;
   %1 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 1.0)
   %2 = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %1)
   ret <vscale x 8 x half> %2
 }

 define <vscale x 4 x float> @idempotent_fmul_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
 ; CHECK-LABEL: @idempotent_fmul_f32(
 ; CHECK-NEXT:    ret <vscale x 4 x float> [[A:%.*]]
 ;
   %1 = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 1.0)
   %2 = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %1)
   ret <vscale x 4 x float> %2
 }

 define <vscale x 2 x double> @idempotent_fmul_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: @idempotent_fmul_f64(
 ; CHECK-NEXT:    ret <vscale x 2 x double> [[A:%.*]]
 ;
   %1 = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 1.0)
   %2 = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %1)
   ret <vscale x 2 x double> %2
 }

 define <vscale x 2 x double> @idempotent_fmul_different_argument_order(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: @idempotent_fmul_different_argument_order(
 ; CHECK-NEXT:    ret <vscale x 2 x double> [[A:%.*]]
 ;
   %1 = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 1.0)
   ; Different argument order to the above tests.
   %2 = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %1, <vscale x 2 x double> %a)
   ret <vscale x 2 x double> %2
 }

 define <vscale x 8 x half> @idempotent_fmul_with_predicated_dup(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
 ; CHECK-LABEL: @idempotent_fmul_with_predicated_dup(
 ; CHECK-NEXT:    ret <vscale x 8 x half> [[A:%.*]]
 ;
   %1 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, half 1.0)
   %2 = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %1)
   ret <vscale x 8 x half> %2
 }

 define <vscale x 8 x half> @idempotent_fmul_two_dups(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
   ; Edge case -- make sure that the case where we're fmultiplying two dups
   ; together is sane.
 ; CHECK-LABEL: @idempotent_fmul_two_dups(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 0xH3C00)
 ; CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 ;
   %1 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 1.0)
   %2 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 1.0)
   %3 = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %1, <vscale x 8 x half> %2)
   ret <vscale x 8 x half> %3
 }

 ; Non-idempotent fmuls -- we don't expect these to be optimised out.
 define <vscale x 8 x half> @non_idempotent_fmul_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
 ; CHECK-LABEL: @non_idempotent_fmul_f16(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 0xH4000)
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[TMP1]])
 ; CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 ;
   %1 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 2.0)
   %2 = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %1)
   ret <vscale x 8 x half> %2
 }

 define <vscale x 4 x float> @non_idempotent_fmul_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
 ; CHECK-LABEL: @non_idempotent_fmul_f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 2.000000e+00)
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[TMP1]])
 ; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 ;
   %1 = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 2.0)
   %2 = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %1)
   ret <vscale x 4 x float> %2
 }

 define <vscale x 2 x double> @non_idempotent_fmul_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: @non_idempotent_fmul_f64(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 2.000000e+00)
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[TMP1]])
 ; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 ;
   %1 = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 2.0)
   %2 = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %1)
   ret <vscale x 2 x double> %2
 }

 define <vscale x 2 x double> @non_idempotent_fmul_with_predicated_dup(<vscale x 2 x i1> %pg1, <vscale x 2 x i1> %pg2, <vscale x 2 x double> %a) {
   ; Different predicates
 ; CHECK-LABEL: @non_idempotent_fmul_with_predicated_dup(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG1:%.*]], double 1.000000e+00)
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> [[PG2:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[TMP1]])
 ; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 ;
   %1 = call <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg1, double 1.0)
   %2 = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %pg2, <vscale x 2 x double> %a, <vscale x 2 x double> %1)
   ret <vscale x 2 x double> %2
 }

 declare <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half)
 declare <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float)
 declare <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double)

 declare <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double)
 declare <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, half)

 declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
 declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
 declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt -S -aarch64-sve-intrinsic-opts < %s \| FileCheck %s

	; Idempotent fmuls -- should compile to just a ret.
	define <vscale x 8 x half> @idempotent_fmul_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
	; CHECK-LABEL: @idempotent_fmul_f16(
	; CHECK-NEXT: ret <vscale x 8 x half> [[A:%.*]]
	;
	%1 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 1.0)
	%2 = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %1)
	ret <vscale x 8 x half> %2
	}

	define <vscale x 4 x float> @idempotent_fmul_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
	; CHECK-LABEL: @idempotent_fmul_f32(
	; CHECK-NEXT: ret <vscale x 4 x float> [[A:%.*]]
	;
	%1 = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 1.0)
	%2 = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %1)
	ret <vscale x 4 x float> %2
	}

	define <vscale x 2 x double> @idempotent_fmul_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
	; CHECK-LABEL: @idempotent_fmul_f64(
	; CHECK-NEXT: ret <vscale x 2 x double> [[A:%.*]]
	;
	%1 = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 1.0)
	%2 = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %1)
	ret <vscale x 2 x double> %2
	}

	define <vscale x 2 x double> @idempotent_fmul_different_argument_order(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
	; CHECK-LABEL: @idempotent_fmul_different_argument_order(
	; CHECK-NEXT: ret <vscale x 2 x double> [[A:%.*]]
	;
	%1 = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 1.0)
	; Different argument order to the above tests.
	%2 = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %1, <vscale x 2 x double> %a)
	ret <vscale x 2 x double> %2
	}

	define <vscale x 8 x half> @idempotent_fmul_with_predicated_dup(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
	; CHECK-LABEL: @idempotent_fmul_with_predicated_dup(
	; CHECK-NEXT: ret <vscale x 8 x half> [[A:%.*]]
	;
	%1 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, half 1.0)
	%2 = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %1)
	ret <vscale x 8 x half> %2
	}

	define <vscale x 8 x half> @idempotent_fmul_two_dups(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
	; Edge case -- make sure that the case where we're fmultiplying two dups
	; together is sane.
	; CHECK-LABEL: @idempotent_fmul_two_dups(
	; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 0xH3C00)
	; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
	;
	%1 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 1.0)
	%2 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 1.0)
	%3 = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %1, <vscale x 8 x half> %2)
	ret <vscale x 8 x half> %3
	}

	; Non-idempotent fmuls -- we don't expect these to be optimised out.
	define <vscale x 8 x half> @non_idempotent_fmul_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
	; CHECK-LABEL: @non_idempotent_fmul_f16(
	; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 0xH4000)
	; CHECK-NEXT: [[TMP2:%.]] = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> [[PG:%.]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[TMP1]])
	; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
	;
	%1 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 2.0)
	%2 = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %1)
	ret <vscale x 8 x half> %2
	}

	define <vscale x 4 x float> @non_idempotent_fmul_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
	; CHECK-LABEL: @non_idempotent_fmul_f32(
	; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 2.000000e+00)
	; CHECK-NEXT: [[TMP2:%.]] = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> [[PG:%.]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[TMP1]])
	; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]]
	;
	%1 = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 2.0)
	%2 = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %1)
	ret <vscale x 4 x float> %2
	}

	define <vscale x 2 x double> @non_idempotent_fmul_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
	; CHECK-LABEL: @non_idempotent_fmul_f64(
	; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 2.000000e+00)
	; CHECK-NEXT: [[TMP2:%.]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> [[PG:%.]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[TMP1]])
	; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
	;
	%1 = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 2.0)
	%2 = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %1)
	ret <vscale x 2 x double> %2
	}

	define <vscale x 2 x double> @non_idempotent_fmul_with_predicated_dup(<vscale x 2 x i1> %pg1, <vscale x 2 x i1> %pg2, <vscale x 2 x double> %a) {
	; Different predicates
	; CHECK-LABEL: @non_idempotent_fmul_with_predicated_dup(
	; CHECK-NEXT: [[TMP1:%.]] = call <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG1:%.]], double 1.000000e+00)
	; CHECK-NEXT: [[TMP2:%.]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> [[PG2:%.]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[TMP1]])
	; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
	;
	%1 = call <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg1, double 1.0)
	%2 = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %pg2, <vscale x 2 x double> %a, <vscale x 2 x double> %1)
	ret <vscale x 2 x double> %2
	}

	declare <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half)
	declare <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float)
	declare <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double)

	declare <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double)
	declare <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, half)

	declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
	declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
	declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)