| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc < %s | FileCheck %s --check-prefixes=CHECK-EXPAND |
| ; RUN: llc -mattr=+fp16fml < %s | FileCheck %s --check-prefixes=CHECK-FMLAL |
| ; RUN: llc -mattr=+f16f32dot < %s | FileCheck %s --check-prefixes=CHECK-FDOT |
| ; RUN: llc -mattr=+f16f32dot,+fp16fml < %s | FileCheck %s --check-prefixes=CHECK-FDOT |
| ; RUN: llc -mattr=+f16f32dot,+sve2 < %s | FileCheck %s --check-prefixes=CHECK-FDOT |
| ; RUN: llc -mattr=+f16f32dot,+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK-SVE-FMLALBT |
| |
| target triple = "aarch64-linux-gnu" |
| |
| define <4 x float> @fixed_fdot_wide(<4 x float> %acc, <8 x half> %a, <8 x half> %b) { |
| ; CHECK-EXPAND-LABEL: fixed_fdot_wide: |
| ; CHECK-EXPAND: // %bb.0: // %entry |
| ; CHECK-EXPAND-NEXT: fcvtl v3.4s, v1.4h |
| ; CHECK-EXPAND-NEXT: fcvtl v4.4s, v2.4h |
| ; CHECK-EXPAND-NEXT: fcvtl2 v1.4s, v1.8h |
| ; CHECK-EXPAND-NEXT: fcvtl2 v2.4s, v2.8h |
| ; CHECK-EXPAND-NEXT: fmul v3.4s, v3.4s, v4.4s |
| ; CHECK-EXPAND-NEXT: fmul v1.4s, v1.4s, v2.4s |
| ; CHECK-EXPAND-NEXT: fadd v0.4s, v0.4s, v3.4s |
| ; CHECK-EXPAND-NEXT: fadd v0.4s, v0.4s, v1.4s |
| ; CHECK-EXPAND-NEXT: ret |
| ; |
| ; CHECK-FMLAL-LABEL: fixed_fdot_wide: |
| ; CHECK-FMLAL: // %bb.0: // %entry |
| ; CHECK-FMLAL-NEXT: fmlal v0.4s, v1.4h, v2.4h |
| ; CHECK-FMLAL-NEXT: fmlal2 v0.4s, v1.4h, v2.4h |
| ; CHECK-FMLAL-NEXT: ret |
| ; |
| ; CHECK-FDOT-LABEL: fixed_fdot_wide: |
| ; CHECK-FDOT: // %bb.0: // %entry |
| ; CHECK-FDOT-NEXT: fdot v0.4s, v1.8h, v2.8h |
| ; CHECK-FDOT-NEXT: ret |
| ; |
| ; CHECK-SVE-FMLALBT-LABEL: fixed_fdot_wide: |
| ; CHECK-SVE-FMLALBT: // %bb.0: // %entry |
| ; CHECK-SVE-FMLALBT-NEXT: fmlalb z0.s, z1.h, z2.h |
| ; CHECK-SVE-FMLALBT-NEXT: fmlalt z0.s, z1.h, z2.h |
| ; CHECK-SVE-FMLALBT-NEXT: ret |
| entry: |
| %a.wide = fpext <8 x half> %a to <8 x float> |
| %b.wide = fpext <8 x half> %b to <8 x float> |
| %mult = fmul <8 x float> %a.wide, %b.wide |
| %partial.reduce = call <4 x float> @llvm.vector.partial.reduce.fadd(<4 x float> %acc, <8 x float> %mult) |
| ret <4 x float> %partial.reduce |
| } |
| |
| define <2 x float> @fixed_fdot(<2 x float> %acc, <4 x half> %a, <4 x half> %b) { |
| ; CHECK-EXPAND-LABEL: fixed_fdot: |
| ; CHECK-EXPAND: // %bb.0: // %entry |
| ; CHECK-EXPAND-NEXT: fcvtl v1.4s, v1.4h |
| ; CHECK-EXPAND-NEXT: fcvtl v2.4s, v2.4h |
| ; CHECK-EXPAND-NEXT: fmul v1.4s, v1.4s, v2.4s |
| ; CHECK-EXPAND-NEXT: fadd v0.2s, v0.2s, v1.2s |
| ; CHECK-EXPAND-NEXT: ext v1.16b, v1.16b, v1.16b, #8 |
| ; CHECK-EXPAND-NEXT: fadd v0.2s, v1.2s, v0.2s |
| ; CHECK-EXPAND-NEXT: ret |
| ; |
| ; CHECK-FMLAL-LABEL: fixed_fdot: |
| ; CHECK-FMLAL: // %bb.0: // %entry |
| ; CHECK-FMLAL-NEXT: fmlal v0.2s, v1.2h, v2.2h |
| ; CHECK-FMLAL-NEXT: fmlal2 v0.2s, v1.2h, v2.2h |
| ; CHECK-FMLAL-NEXT: ret |
| ; |
| ; CHECK-FDOT-LABEL: fixed_fdot: |
| ; CHECK-FDOT: // %bb.0: // %entry |
| ; CHECK-FDOT-NEXT: fdot v0.2s, v1.4h, v2.4h |
| ; CHECK-FDOT-NEXT: ret |
| ; |
| ; CHECK-SVE-FMLALBT-LABEL: fixed_fdot: |
| ; CHECK-SVE-FMLALBT: // %bb.0: // %entry |
| ; CHECK-SVE-FMLALBT-NEXT: fmlalb z0.s, z1.h, z2.h |
| ; CHECK-SVE-FMLALBT-NEXT: fmlalt z0.s, z1.h, z2.h |
| ; CHECK-SVE-FMLALBT-NEXT: ret |
| entry: |
| %a.wide = fpext <4 x half> %a to <4 x float> |
| %b.wide = fpext <4 x half> %b to <4 x float> |
| %mult = fmul <4 x float> %a.wide, %b.wide |
| %partial.reduce = call <2 x float> @llvm.vector.partial.reduce.fadd(<2 x float> %acc, <4 x float> %mult) |
| ret <2 x float> %partial.reduce |
| } |