| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s --mattr=+complxnum,+sve,+fullfp16 -o - | FileCheck %s |
| |
| target triple = "aarch64" |
| |
| define <4 x double> @simple_symmetric_muladd2(<4 x double> %a, <4 x double> %b) { |
| ; CHECK-LABEL: simple_symmetric_muladd2: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov x8, #-7378697629483820647 // =0x9999999999999999 |
| ; CHECK-NEXT: movk x8, #39322 |
| ; CHECK-NEXT: movk x8, #16393, lsl #48 |
| ; CHECK-NEXT: dup v4.2d, x8 |
| ; CHECK-NEXT: fmla v2.2d, v4.2d, v0.2d |
| ; CHECK-NEXT: fmla v3.2d, v4.2d, v1.2d |
| ; CHECK-NEXT: mov v0.16b, v2.16b |
| ; CHECK-NEXT: mov v1.16b, v3.16b |
| ; CHECK-NEXT: ret |
| entry: |
| %ext00 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 0, i32 2> |
| %ext01 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 1, i32 3> |
| %fmul0 = fmul fast <2 x double> %ext00, splat (double 3.200000e+00) |
| %ext10 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 0, i32 2> |
| %ext11 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 1, i32 3> |
| %fadd0 = fadd fast <2 x double> %ext10, %fmul0 |
| %fmul1 = fmul fast <2 x double> %ext01, splat (double 3.200000e+00) |
| %fadd1 = fadd fast <2 x double> %ext11, %fmul1 |
| %interleaved.vec = shufflevector <2 x double> %fadd0, <2 x double> %fadd1, <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| ret <4 x double> %interleaved.vec |
| } |
| |
| define <8 x double> @simple_symmetric_muladd4(<8 x double> %a, <8 x double> %b) { |
| ; CHECK-LABEL: simple_symmetric_muladd4: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov x8, #-7378697629483820647 // =0x9999999999999999 |
| ; CHECK-NEXT: zip1 v16.2d, v0.2d, v2.2d |
| ; CHECK-NEXT: zip2 v0.2d, v0.2d, v2.2d |
| ; CHECK-NEXT: movk x8, #39322 |
| ; CHECK-NEXT: zip1 v2.2d, v1.2d, v3.2d |
| ; CHECK-NEXT: zip2 v1.2d, v1.2d, v3.2d |
| ; CHECK-NEXT: movk x8, #16393, lsl #48 |
| ; CHECK-NEXT: zip1 v3.2d, v4.2d, v6.2d |
| ; CHECK-NEXT: zip2 v4.2d, v4.2d, v6.2d |
| ; CHECK-NEXT: zip1 v17.2d, v5.2d, v7.2d |
| ; CHECK-NEXT: zip2 v5.2d, v5.2d, v7.2d |
| ; CHECK-NEXT: dup v6.2d, x8 |
| ; CHECK-NEXT: fmla v3.2d, v6.2d, v16.2d |
| ; CHECK-NEXT: fmla v4.2d, v6.2d, v0.2d |
| ; CHECK-NEXT: fmla v17.2d, v6.2d, v2.2d |
| ; CHECK-NEXT: fmla v5.2d, v6.2d, v1.2d |
| ; CHECK-NEXT: zip1 v0.2d, v3.2d, v4.2d |
| ; CHECK-NEXT: zip2 v2.2d, v3.2d, v4.2d |
| ; CHECK-NEXT: zip1 v1.2d, v17.2d, v5.2d |
| ; CHECK-NEXT: zip2 v3.2d, v17.2d, v5.2d |
| ; CHECK-NEXT: ret |
| entry: |
| %ext00 = shufflevector <8 x double> %a, <8 x double> poison, <2 x i32> <i32 0, i32 4> |
| %ext01 = shufflevector <8 x double> %a, <8 x double> poison, <2 x i32> <i32 1, i32 5> |
| %ext02 = shufflevector <8 x double> %a, <8 x double> poison, <2 x i32> <i32 2, i32 6> |
| %ext03 = shufflevector <8 x double> %a, <8 x double> poison, <2 x i32> <i32 3, i32 7> |
| %fmul0 = fmul fast <2 x double> %ext00, splat (double 3.200000e+00) |
| %ext10 = shufflevector <8 x double> %b, <8 x double> poison, <2 x i32> <i32 0, i32 4> |
| %ext11 = shufflevector <8 x double> %b, <8 x double> poison, <2 x i32> <i32 1, i32 5> |
| %ext12 = shufflevector <8 x double> %b, <8 x double> poison, <2 x i32> <i32 2, i32 6> |
| %ext13 = shufflevector <8 x double> %b, <8 x double> poison, <2 x i32> <i32 3, i32 7> |
| %fadd0 = fadd fast <2 x double> %ext10, %fmul0 |
| %fmul1 = fmul fast <2 x double> %ext01, splat (double 3.200000e+00) |
| %fadd1 = fadd fast <2 x double> %ext11, %fmul1 |
| %fmul2 = fmul fast <2 x double> %ext02, splat (double 3.200000e+00) |
| %fadd2 = fadd fast <2 x double> %ext12, %fmul2 |
| %fmul3 = fmul fast <2 x double> %ext03, splat (double 3.200000e+00) |
| %fadd3 = fadd fast <2 x double> %ext13, %fmul3 |
| %interleave.pt1 = shufflevector <2 x double> %fadd0, <2 x double> %fadd1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| %interleave.pt2 = shufflevector <2 x double> %fadd2, <2 x double> %fadd3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| %interleaved.vec = shufflevector <4 x double> %interleave.pt1, <4 x double> %interleave.pt2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> |
| ret <8 x double> %interleaved.vec |
| } |