| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=aarch64 < %s | FileCheck %s |
| |
| |
| define void @shared_fneg_across_bbs(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %a, <4 x float> %b, i1 %cond, ptr %out1, ptr %out2) { |
| ; CHECK-LABEL: shared_fneg_across_bbs: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: fmls v2.4s, v1.4s, v0.4s |
| ; CHECK-NEXT: str q2, [x1] |
| ; CHECK-NEXT: tbz w0, #0, .LBB0_2 |
| ; CHECK-NEXT: // %bb.1: // %use_bb |
| ; CHECK-NEXT: fmls v4.4s, v3.4s, v0.4s |
| ; CHECK-NEXT: str q4, [x2] |
| ; CHECK-NEXT: .LBB0_2: // %exit |
| ; CHECK-NEXT: ret |
| entry: |
| %neg = fneg <4 x float> %x |
| %r1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %y, <4 x float> %neg, <4 x float> %z) |
| store <4 x float> %r1, ptr %out1 |
| br i1 %cond, label %use_bb, label %exit |
| |
| use_bb: |
| %r2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %neg, <4 x float> %b) |
| store <4 x float> %r2, ptr %out2 |
| br label %exit |
| |
| exit: |
| ret void |
| } |
| |
| define void @shared_fnegs_across_bbs(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %a, <4 x float> %b, i1 %cond, ptr %out1, ptr %out2) { |
| ; CHECK-LABEL: shared_fnegs_across_bbs: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: fmla v2.4s, v0.4s, v3.4s |
| ; CHECK-NEXT: str q2, [x1] |
| ; CHECK-NEXT: tbz w0, #0, .LBB1_2 |
| ; CHECK-NEXT: // %bb.1: // %use_bb |
| ; CHECK-NEXT: fmla v4.4s, v0.4s, v3.4s |
| ; CHECK-NEXT: str q4, [x2] |
| ; CHECK-NEXT: .LBB1_2: // %exit |
| ; CHECK-NEXT: ret |
| entry: |
| %negx = fneg <4 x float> %x |
| %nega = fneg <4 x float> %a |
| %r1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %nega, <4 x float> %negx, <4 x float> %z) |
| store <4 x float> %r1, ptr %out1 |
| br i1 %cond, label %use_bb, label %exit |
| |
| use_bb: |
| %r2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %nega, <4 x float> %negx, <4 x float> %b) |
| store <4 x float> %r2, ptr %out2 |
| br label %exit |
| |
| exit: |
| ret void |
| } |
| |
| define <4 x float> @shared_fneg_with_other_users(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %a, <4 x float> %b, i1 %cond, ptr %out1, ptr %out2) { |
| ; CHECK-LABEL: shared_fneg_with_other_users: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: fmls v2.4s, v1.4s, v0.4s |
| ; CHECK-NEXT: fneg v1.4s, v0.4s |
| ; CHECK-NEXT: str q2, [x1] |
| ; CHECK-NEXT: tbz w0, #0, .LBB2_2 |
| ; CHECK-NEXT: // %bb.1: // %use_bb |
| ; CHECK-NEXT: fmls v4.4s, v3.4s, v0.4s |
| ; CHECK-NEXT: mov v0.16b, v1.16b |
| ; CHECK-NEXT: str q4, [x2] |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB2_2: // %other_use |
| ; CHECK-NEXT: sub sp, sp, #32 |
| ; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-NEXT: .cfi_offset w30, -16 |
| ; CHECK-NEXT: mov v0.16b, v1.16b |
| ; CHECK-NEXT: str q1, [sp] // 16-byte Spill |
| ; CHECK-NEXT: bl foo |
| ; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload |
| ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload |
| ; CHECK-NEXT: add sp, sp, #32 |
| ; CHECK-NEXT: ret |
| entry: |
| %neg = fneg <4 x float> %x |
| %r1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %y, <4 x float> %neg, <4 x float> %z) |
| store <4 x float> %r1, ptr %out1 |
| br i1 %cond, label %use_bb, label %other_use |
| |
| use_bb: |
| %r2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %neg, <4 x float> %b) |
| store <4 x float> %r2, ptr %out2 |
| br label %exit |
| |
| other_use: |
| call void @foo(<4 x float> %neg) |
| br label %exit |
| |
| exit: |
| ret <4 x float> %neg |
| } |
| |
| define void @shared_fneg_across_bbs_fmuladd(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %a, <4 x float> %b, i1 %cond, ptr %out1, ptr %out2) { |
| ; CHECK-LABEL: shared_fneg_across_bbs_fmuladd: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: fmls v2.4s, v1.4s, v0.4s |
| ; CHECK-NEXT: str q2, [x1] |
| ; CHECK-NEXT: tbz w0, #0, .LBB3_2 |
| ; CHECK-NEXT: // %bb.1: // %use_bb |
| ; CHECK-NEXT: fmls v4.4s, v3.4s, v0.4s |
| ; CHECK-NEXT: str q4, [x2] |
| ; CHECK-NEXT: .LBB3_2: // %exit |
| ; CHECK-NEXT: ret |
| entry: |
| %neg = fneg <4 x float> %x |
| %r1 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %y, <4 x float> %neg, <4 x float> %z) |
| store <4 x float> %r1, ptr %out1 |
| br i1 %cond, label %use_bb, label %exit |
| |
| use_bb: |
| %r2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %neg, <4 x float> %b) |
| store <4 x float> %r2, ptr %out2 |
| br label %exit |
| |
| exit: |
| ret void |
| } |
| |
| define void @shared_fneg_across_bbs_fmul_fast(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %a, <4 x float> %b, i1 %cond, ptr %out1, ptr %out2) { |
| ; CHECK-LABEL: shared_fneg_across_bbs_fmul_fast: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: fmls v2.4s, v0.4s, v1.4s |
| ; CHECK-NEXT: str q2, [x1] |
| ; CHECK-NEXT: tbz w0, #0, .LBB4_2 |
| ; CHECK-NEXT: // %bb.1: // %use_bb |
| ; CHECK-NEXT: fmls v4.4s, v0.4s, v3.4s |
| ; CHECK-NEXT: str q4, [x2] |
| ; CHECK-NEXT: .LBB4_2: // %exit |
| ; CHECK-NEXT: ret |
| entry: |
| %neg = fneg <4 x float> %x |
| %mul1 = fmul fast <4 x float> %y, %neg |
| %r1 = fadd fast <4 x float> %mul1, %z |
| store <4 x float> %r1, ptr %out1 |
| br i1 %cond, label %use_bb, label %exit |
| |
| use_bb: |
| %mul2 = fmul fast <4 x float> %a, %neg |
| %r2 = fadd fast <4 x float> %mul2, %b |
| store <4 x float> %r2, ptr %out2 |
| br label %exit |
| |
| exit: |
| ret void |
| } |
| |
| define void @shared_fneg_across_bbs_fmul_contract(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %a, <4 x float> %b, i1 %cond, ptr %out1, ptr %out2) { |
| ; CHECK-LABEL: shared_fneg_across_bbs_fmul_contract: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: fmls v2.4s, v0.4s, v1.4s |
| ; CHECK-NEXT: str q2, [x1] |
| ; CHECK-NEXT: tbz w0, #0, .LBB5_2 |
| ; CHECK-NEXT: // %bb.1: // %use_bb |
| ; CHECK-NEXT: fmls v4.4s, v0.4s, v3.4s |
| ; CHECK-NEXT: str q4, [x2] |
| ; CHECK-NEXT: .LBB5_2: // %exit |
| ; CHECK-NEXT: ret |
| entry: |
| %neg = fneg <4 x float> %x |
| %mul1 = fmul contract <4 x float> %y, %neg |
| %r1 = fadd contract <4 x float> %mul1, %z |
| store <4 x float> %r1, ptr %out1 |
| br i1 %cond, label %use_bb, label %exit |
| |
| use_bb: |
| %mul2 = fmul contract <4 x float> %a, %neg |
| %r2 = fadd contract <4 x float> %mul2, %b |
| store <4 x float> %r2, ptr %out2 |
| br label %exit |
| |
| exit: |
| ret void |
| } |
| |
| define void @shared_fneg_across_bbs_fmul_scalar(float %x, float %y, float %z, float %a, float %b, i1 %cond, ptr %out1, ptr %out2) { |
| ; CHECK-LABEL: shared_fneg_across_bbs_fmul_scalar: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: fmsub s1, s1, s0, s2 |
| ; CHECK-NEXT: str s1, [x1] |
| ; CHECK-NEXT: tbz w0, #0, .LBB6_2 |
| ; CHECK-NEXT: // %bb.1: // %use_bb |
| ; CHECK-NEXT: fmsub s0, s3, s0, s4 |
| ; CHECK-NEXT: str s0, [x2] |
| ; CHECK-NEXT: .LBB6_2: // %exit |
| ; CHECK-NEXT: ret |
| entry: |
| %neg = fneg float %x |
| %mul1 = fmul fast float %y, %neg |
| %r1 = fadd fast float %mul1, %z |
| store float %r1, ptr %out1 |
| br i1 %cond, label %use_bb, label %exit |
| |
| use_bb: |
| %mul2 = fmul fast float %a, %neg |
| %r2 = fadd fast float %mul2, %b |
| store float %r2, ptr %out2 |
| br label %exit |
| |
| exit: |
| ret void |
| } |
| |
| define void @shared_fneg_splat_across_bbs_fmul(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %a, <4 x float> %b, i1 %cond, ptr %out1, ptr %out2) { |
| ; CHECK-LABEL: shared_fneg_splat_across_bbs_fmul: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: fmls v2.4s, v0.4s, v1.s[0] |
| ; CHECK-NEXT: str q2, [x1] |
| ; CHECK-NEXT: tbz w0, #0, .LBB7_2 |
| ; CHECK-NEXT: // %bb.1: // %use_bb |
| ; CHECK-NEXT: fmls v4.4s, v0.4s, v1.s[0] |
| ; CHECK-NEXT: str q4, [x2] |
| ; CHECK-NEXT: .LBB7_2: // %exit |
| ; CHECK-NEXT: ret |
| entry: |
| %neg = fneg <4 x float> %x |
| %splat = shufflevector <4 x float> %y, <4 x float> poison, <4 x i32> zeroinitializer |
| %mul1 = fmul fast <4 x float> %splat, %neg |
| %r1 = fadd fast <4 x float> %mul1, %z |
| store <4 x float> %r1, ptr %out1 |
| br i1 %cond, label %use_bb, label %exit |
| |
| use_bb: |
| %mul2 = fmul fast <4 x float> %splat, %neg |
| %r2 = fadd fast <4 x float> %mul2, %b |
| store <4 x float> %r2, ptr %out2 |
| br label %exit |
| |
| exit: |
| ret void |
| } |
| |
| declare void @foo(<4 x float>) |