llvm/test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll - llvm-project.git - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s

 define float @test_fmul_lane_ss2S_0(float %a, <2 x float> %v) {
 ; CHECK-LABEL: test_fmul_lane_ss2S_0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul s0, s0, s1
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 0
   %tmp2 = fmul float %a, %tmp1
   ret float %tmp2
 }

 define float @test_fmul_lane_ss2S_1(float %a, <2 x float> %v) {
 ; CHECK-LABEL: test_fmul_lane_ss2S_1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul s0, s0, v1.s[1]
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 1
   %tmp2 = fmul float %a, %tmp1;
   ret float %tmp2;
 }

 define float @test_fmul_lane_ss2S_1_swap(float %a, <2 x float> %v) {
 ; CHECK-LABEL: test_fmul_lane_ss2S_1_swap:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul s0, s0, v1.s[1]
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 1
   %tmp2 = fmul float %tmp1, %a;
   ret float %tmp2;
 }

 define float @test_fmul_lane_ss4S_0(float %a, <4 x float> %v) {
 ; CHECK-LABEL: test_fmul_lane_ss4S_0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmul s0, s0, s1
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <4 x float> %v, i32 0
   %tmp2 = fmul float %a, %tmp1
   ret float %tmp2
 }

 define float @test_fmul_lane_ss4S_3(float %a, <4 x float> %v) {
 ; CHECK-LABEL: test_fmul_lane_ss4S_3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmul s0, s0, v1.s[3]
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <4 x float> %v, i32 3
   %tmp2 = fmul float %a, %tmp1;
   ret float %tmp2;
 }

 define float @test_fmul_lane_ss4S_3_swap(float %a, <4 x float> %v) {
 ; CHECK-LABEL: test_fmul_lane_ss4S_3_swap:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmul s0, s0, v1.s[3]
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <4 x float> %v, i32 3
   %tmp2 = fmul float %tmp1, %a;
   ret float %tmp2;
 }


 define double @test_fmul_lane_ddD(double %a, <1 x double> %v) {
 ; CHECK-LABEL: test_fmul_lane_ddD:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmul d0, d0, d1
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <1 x double> %v, i32 0
   %tmp2 = fmul double %a, %tmp1;
   ret double %tmp2;
 }


 define double @test_fmul_lane_dd2D_0(double %a, <2 x double> %v) {
 ; CHECK-LABEL: test_fmul_lane_dd2D_0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmul d0, d0, d1
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x double> %v, i32 0
   %tmp2 = fmul double %a, %tmp1
   ret double %tmp2
 }

 define double @test_fmul_lane_dd2D_1(double %a, <2 x double> %v) {
 ; CHECK-LABEL: test_fmul_lane_dd2D_1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmul d0, d0, v1.d[1]
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x double> %v, i32 1
   %tmp2 = fmul double %a, %tmp1;
   ret double %tmp2;
 }


 define double @test_fmul_lane_dd2D_1_swap(double %a, <2 x double> %v) {
 ; CHECK-LABEL: test_fmul_lane_dd2D_1_swap:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmul d0, d0, v1.d[1]
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x double> %v, i32 1
   %tmp2 = fmul double %tmp1, %a;
   ret double %tmp2;
 }

 declare float @llvm.aarch64.neon.fmulx.f32(float, float)

 define float @test_fmulx_lane_f32_0(float %a, <2 x float> %v) {
 ; CHECK-LABEL: test_fmulx_lane_f32_0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx s0, s0, s1
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 0
   %tmp2 = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %tmp1)
   ret float %tmp2;
 }

 define float @test_fmulx_lane_f32_1(float %a, <2 x float> %v) {
 ; CHECK-LABEL: test_fmulx_lane_f32_1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx s0, s0, v1.s[1]
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 1
   %tmp2 = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %tmp1)
   ret float %tmp2;
 }

 define float @test_fmulx_laneq_f32_0(float %a, <4 x float> %v) {
 ; CHECK-LABEL: test_fmulx_laneq_f32_0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmulx s0, s0, s1
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <4 x float> %v, i32 0
   %tmp2 = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %tmp1)
   ret float %tmp2;
 }

 define float @test_fmulx_laneq_f32_3(float %a, <4 x float> %v) {
 ; CHECK-LABEL: test_fmulx_laneq_f32_3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmulx s0, s0, v1.s[3]
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <4 x float> %v, i32 3
   %tmp2 = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %tmp1)
   ret float %tmp2;
 }

 define float @test_fmulx_laneq_f32_3_swap(float %a, <4 x float> %v) {
 ; CHECK-LABEL: test_fmulx_laneq_f32_3_swap:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmulx s0, s0, v1.s[3]
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <4 x float> %v, i32 3
   %tmp2 = call float @llvm.aarch64.neon.fmulx.f32(float %tmp1, float %a)
   ret float %tmp2;
 }

 declare double @llvm.aarch64.neon.fmulx.f64(double, double)

 define double @test_fmulx_lane_f64(double %a, <1 x double> %v) {
 ; CHECK-LABEL: test_fmulx_lane_f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmulx d0, d0, d1
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <1 x double> %v, i32 0
   %tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %tmp1)
   ret double %tmp2;
 }

 define double @test_fmulx_laneq_f64_0(double %a, <2 x double> %v) {
 ; CHECK-LABEL: test_fmulx_laneq_f64_0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmulx d0, d0, d1
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x double> %v, i32 0
   %tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %tmp1)
   ret double %tmp2;
 }


 define double @test_fmulx_laneq_f64_1(double %a, <2 x double> %v) {
 ; CHECK-LABEL: test_fmulx_laneq_f64_1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmulx d0, d0, v1.d[1]
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x double> %v, i32 1
   %tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %tmp1)
   ret double %tmp2;
 }

 define double @test_fmulx_laneq_f64_1_swap(double %a, <2 x double> %v) {
 ; CHECK-LABEL: test_fmulx_laneq_f64_1_swap:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmulx d0, d0, v1.d[1]
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x double> %v, i32 1
   %tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %tmp1, double %a)
   ret double %tmp2;
 }

 define float @test_fmulx_horizontal_f32(<2 x float> %v) {
 ; CHECK-LABEL: test_fmulx_horizontal_f32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmulx s0, s0, v0.s[1]
 ; CHECK-NEXT:    ret
 entry:
   %0 = extractelement <2 x float> %v, i32 0
   %1 = extractelement <2 x float> %v, i32 1
   %2 = call float @llvm.aarch64.neon.fmulx.f32(float %0, float %1)
   ret float %2
 }

 define double @test_fmulx_horizontal_f64(<2 x double> %v) {
 ; CHECK-LABEL: test_fmulx_horizontal_f64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fmulx d0, d0, v0.d[1]
 ; CHECK-NEXT:    ret
 entry:
   %0 = extractelement <2 x double> %v, i32 0
   %1 = extractelement <2 x double> %v, i32 1
   %2 = call double @llvm.aarch64.neon.fmulx.f64(double %0, double %1)
   ret double %2
 }
	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
	; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast \| FileCheck %s

	define float @test_fmul_lane_ss2S_0(float %a, <2 x float> %v) {
	; CHECK-LABEL: test_fmul_lane_ss2S_0:
	; CHECK: // %bb.0:
	; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
	; CHECK-NEXT: fmul s0, s0, s1
	; CHECK-NEXT: ret
	%tmp1 = extractelement <2 x float> %v, i32 0
	%tmp2 = fmul float %a, %tmp1
	ret float %tmp2
	}

	define float @test_fmul_lane_ss2S_1(float %a, <2 x float> %v) {
	; CHECK-LABEL: test_fmul_lane_ss2S_1:
	; CHECK: // %bb.0:
	; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
	; CHECK-NEXT: fmul s0, s0, v1.s[1]
	; CHECK-NEXT: ret
	%tmp1 = extractelement <2 x float> %v, i32 1
	%tmp2 = fmul float %a, %tmp1;
	ret float %tmp2;
	}

	define float @test_fmul_lane_ss2S_1_swap(float %a, <2 x float> %v) {
	; CHECK-LABEL: test_fmul_lane_ss2S_1_swap:
	; CHECK: // %bb.0:
	; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
	; CHECK-NEXT: fmul s0, s0, v1.s[1]
	; CHECK-NEXT: ret
	%tmp1 = extractelement <2 x float> %v, i32 1
	%tmp2 = fmul float %tmp1, %a;
	ret float %tmp2;
	}

	define float @test_fmul_lane_ss4S_0(float %a, <4 x float> %v) {
	; CHECK-LABEL: test_fmul_lane_ss4S_0:
	; CHECK: // %bb.0:
	; CHECK-NEXT: fmul s0, s0, s1
	; CHECK-NEXT: ret
	%tmp1 = extractelement <4 x float> %v, i32 0
	%tmp2 = fmul float %a, %tmp1
	ret float %tmp2
	}

	define float @test_fmul_lane_ss4S_3(float %a, <4 x float> %v) {
	; CHECK-LABEL: test_fmul_lane_ss4S_3:
	; CHECK: // %bb.0:
	; CHECK-NEXT: fmul s0, s0, v1.s[3]
	; CHECK-NEXT: ret
	%tmp1 = extractelement <4 x float> %v, i32 3
	%tmp2 = fmul float %a, %tmp1;
	ret float %tmp2;
	}

	define float @test_fmul_lane_ss4S_3_swap(float %a, <4 x float> %v) {
	; CHECK-LABEL: test_fmul_lane_ss4S_3_swap:
	; CHECK: // %bb.0:
	; CHECK-NEXT: fmul s0, s0, v1.s[3]
	; CHECK-NEXT: ret
	%tmp1 = extractelement <4 x float> %v, i32 3
	%tmp2 = fmul float %tmp1, %a;
	ret float %tmp2;
	}


	define double @test_fmul_lane_ddD(double %a, <1 x double> %v) {
	; CHECK-LABEL: test_fmul_lane_ddD:
	; CHECK: // %bb.0:
	; CHECK-NEXT: fmul d0, d0, d1
	; CHECK-NEXT: ret
	%tmp1 = extractelement <1 x double> %v, i32 0
	%tmp2 = fmul double %a, %tmp1;
	ret double %tmp2;
	}


	define double @test_fmul_lane_dd2D_0(double %a, <2 x double> %v) {
	; CHECK-LABEL: test_fmul_lane_dd2D_0:
	; CHECK: // %bb.0:
	; CHECK-NEXT: fmul d0, d0, d1
	; CHECK-NEXT: ret
	%tmp1 = extractelement <2 x double> %v, i32 0
	%tmp2 = fmul double %a, %tmp1
	ret double %tmp2
	}

	define double @test_fmul_lane_dd2D_1(double %a, <2 x double> %v) {
	; CHECK-LABEL: test_fmul_lane_dd2D_1:
	; CHECK: // %bb.0:
	; CHECK-NEXT: fmul d0, d0, v1.d[1]
	; CHECK-NEXT: ret
	%tmp1 = extractelement <2 x double> %v, i32 1
	%tmp2 = fmul double %a, %tmp1;
	ret double %tmp2;
	}


	define double @test_fmul_lane_dd2D_1_swap(double %a, <2 x double> %v) {
	; CHECK-LABEL: test_fmul_lane_dd2D_1_swap:
	; CHECK: // %bb.0:
	; CHECK-NEXT: fmul d0, d0, v1.d[1]
	; CHECK-NEXT: ret
	%tmp1 = extractelement <2 x double> %v, i32 1
	%tmp2 = fmul double %tmp1, %a;
	ret double %tmp2;
	}

	declare float @llvm.aarch64.neon.fmulx.f32(float, float)

	define float @test_fmulx_lane_f32_0(float %a, <2 x float> %v) {
	; CHECK-LABEL: test_fmulx_lane_f32_0:
	; CHECK: // %bb.0:
	; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
	; CHECK-NEXT: fmulx s0, s0, s1
	; CHECK-NEXT: ret
	%tmp1 = extractelement <2 x float> %v, i32 0
	%tmp2 = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %tmp1)
	ret float %tmp2;
	}

	define float @test_fmulx_lane_f32_1(float %a, <2 x float> %v) {
	; CHECK-LABEL: test_fmulx_lane_f32_1:
	; CHECK: // %bb.0:
	; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
	; CHECK-NEXT: fmulx s0, s0, v1.s[1]
	; CHECK-NEXT: ret
	%tmp1 = extractelement <2 x float> %v, i32 1
	%tmp2 = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %tmp1)
	ret float %tmp2;
	}

	define float @test_fmulx_laneq_f32_0(float %a, <4 x float> %v) {
	; CHECK-LABEL: test_fmulx_laneq_f32_0:
	; CHECK: // %bb.0:
	; CHECK-NEXT: fmulx s0, s0, s1
	; CHECK-NEXT: ret
	%tmp1 = extractelement <4 x float> %v, i32 0
	%tmp2 = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %tmp1)
	ret float %tmp2;
	}

	define float @test_fmulx_laneq_f32_3(float %a, <4 x float> %v) {
	; CHECK-LABEL: test_fmulx_laneq_f32_3:
	; CHECK: // %bb.0:
	; CHECK-NEXT: fmulx s0, s0, v1.s[3]
	; CHECK-NEXT: ret
	%tmp1 = extractelement <4 x float> %v, i32 3
	%tmp2 = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %tmp1)
	ret float %tmp2;
	}

	define float @test_fmulx_laneq_f32_3_swap(float %a, <4 x float> %v) {
	; CHECK-LABEL: test_fmulx_laneq_f32_3_swap:
	; CHECK: // %bb.0:
	; CHECK-NEXT: fmulx s0, s0, v1.s[3]
	; CHECK-NEXT: ret
	%tmp1 = extractelement <4 x float> %v, i32 3
	%tmp2 = call float @llvm.aarch64.neon.fmulx.f32(float %tmp1, float %a)
	ret float %tmp2;
	}

	declare double @llvm.aarch64.neon.fmulx.f64(double, double)

	define double @test_fmulx_lane_f64(double %a, <1 x double> %v) {
	; CHECK-LABEL: test_fmulx_lane_f64:
	; CHECK: // %bb.0:
	; CHECK-NEXT: fmulx d0, d0, d1
	; CHECK-NEXT: ret
	%tmp1 = extractelement <1 x double> %v, i32 0
	%tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %tmp1)
	ret double %tmp2;
	}

	define double @test_fmulx_laneq_f64_0(double %a, <2 x double> %v) {
	; CHECK-LABEL: test_fmulx_laneq_f64_0:
	; CHECK: // %bb.0:
	; CHECK-NEXT: fmulx d0, d0, d1
	; CHECK-NEXT: ret
	%tmp1 = extractelement <2 x double> %v, i32 0
	%tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %tmp1)
	ret double %tmp2;
	}


	define double @test_fmulx_laneq_f64_1(double %a, <2 x double> %v) {
	; CHECK-LABEL: test_fmulx_laneq_f64_1:
	; CHECK: // %bb.0:
	; CHECK-NEXT: fmulx d0, d0, v1.d[1]
	; CHECK-NEXT: ret
	%tmp1 = extractelement <2 x double> %v, i32 1
	%tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %tmp1)
	ret double %tmp2;
	}

	define double @test_fmulx_laneq_f64_1_swap(double %a, <2 x double> %v) {
	; CHECK-LABEL: test_fmulx_laneq_f64_1_swap:
	; CHECK: // %bb.0:
	; CHECK-NEXT: fmulx d0, d0, v1.d[1]
	; CHECK-NEXT: ret
	%tmp1 = extractelement <2 x double> %v, i32 1
	%tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %tmp1, double %a)
	ret double %tmp2;
	}

	define float @test_fmulx_horizontal_f32(<2 x float> %v) {
	; CHECK-LABEL: test_fmulx_horizontal_f32:
	; CHECK: // %bb.0: // %entry
	; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
	; CHECK-NEXT: fmulx s0, s0, v0.s[1]
	; CHECK-NEXT: ret
	entry:
	%0 = extractelement <2 x float> %v, i32 0
	%1 = extractelement <2 x float> %v, i32 1
	%2 = call float @llvm.aarch64.neon.fmulx.f32(float %0, float %1)
	ret float %2
	}

	define double @test_fmulx_horizontal_f64(<2 x double> %v) {
	; CHECK-LABEL: test_fmulx_horizontal_f64:
	; CHECK: // %bb.0: // %entry
	; CHECK-NEXT: fmulx d0, d0, v0.d[1]
	; CHECK-NEXT: ret
	entry:
	%0 = extractelement <2 x double> %v, i32 0
	%1 = extractelement <2 x double> %v, i32 1
	%2 = call double @llvm.aarch64.neon.fmulx.f64(double %0, double %1)
	ret double %2
	}