blob: ab72173755a61b2866cacdb794065ae118cc6773 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s | FileCheck %s --check-prefixes=CHECK-EXPAND
; RUN: llc -mattr=+fp16fml < %s | FileCheck %s --check-prefixes=CHECK-FMLAL
; RUN: llc -mattr=+f16f32dot < %s | FileCheck %s --check-prefixes=CHECK-FDOT
; RUN: llc -mattr=+f16f32dot,+fp16fml < %s | FileCheck %s --check-prefixes=CHECK-FDOT
; RUN: llc -mattr=+f16f32dot,+sve2 < %s | FileCheck %s --check-prefixes=CHECK-FDOT
; RUN: llc -mattr=+f16f32dot,+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK-SVE-FMLALBT
target triple = "aarch64-linux-gnu"
define <4 x float> @fixed_fdot_wide(<4 x float> %acc, <8 x half> %a, <8 x half> %b) {
; CHECK-EXPAND-LABEL: fixed_fdot_wide:
; CHECK-EXPAND: // %bb.0: // %entry
; CHECK-EXPAND-NEXT: fcvtl v3.4s, v1.4h
; CHECK-EXPAND-NEXT: fcvtl v4.4s, v2.4h
; CHECK-EXPAND-NEXT: fcvtl2 v1.4s, v1.8h
; CHECK-EXPAND-NEXT: fcvtl2 v2.4s, v2.8h
; CHECK-EXPAND-NEXT: fmul v3.4s, v3.4s, v4.4s
; CHECK-EXPAND-NEXT: fmul v1.4s, v1.4s, v2.4s
; CHECK-EXPAND-NEXT: fadd v0.4s, v0.4s, v3.4s
; CHECK-EXPAND-NEXT: fadd v0.4s, v0.4s, v1.4s
; CHECK-EXPAND-NEXT: ret
;
; CHECK-FMLAL-LABEL: fixed_fdot_wide:
; CHECK-FMLAL: // %bb.0: // %entry
; CHECK-FMLAL-NEXT: fmlal v0.4s, v1.4h, v2.4h
; CHECK-FMLAL-NEXT: fmlal2 v0.4s, v1.4h, v2.4h
; CHECK-FMLAL-NEXT: ret
;
; CHECK-FDOT-LABEL: fixed_fdot_wide:
; CHECK-FDOT: // %bb.0: // %entry
; CHECK-FDOT-NEXT: fdot v0.4s, v1.8h, v2.8h
; CHECK-FDOT-NEXT: ret
;
; CHECK-SVE-FMLALBT-LABEL: fixed_fdot_wide:
; CHECK-SVE-FMLALBT: // %bb.0: // %entry
; CHECK-SVE-FMLALBT-NEXT: fmlalb z0.s, z1.h, z2.h
; CHECK-SVE-FMLALBT-NEXT: fmlalt z0.s, z1.h, z2.h
; CHECK-SVE-FMLALBT-NEXT: ret
entry:
%a.wide = fpext <8 x half> %a to <8 x float>
%b.wide = fpext <8 x half> %b to <8 x float>
%mult = fmul <8 x float> %a.wide, %b.wide
%partial.reduce = call <4 x float> @llvm.vector.partial.reduce.fadd(<4 x float> %acc, <8 x float> %mult)
ret <4 x float> %partial.reduce
}
define <2 x float> @fixed_fdot(<2 x float> %acc, <4 x half> %a, <4 x half> %b) {
; CHECK-EXPAND-LABEL: fixed_fdot:
; CHECK-EXPAND: // %bb.0: // %entry
; CHECK-EXPAND-NEXT: fcvtl v1.4s, v1.4h
; CHECK-EXPAND-NEXT: fcvtl v2.4s, v2.4h
; CHECK-EXPAND-NEXT: fmul v1.4s, v1.4s, v2.4s
; CHECK-EXPAND-NEXT: fadd v0.2s, v0.2s, v1.2s
; CHECK-EXPAND-NEXT: ext v1.16b, v1.16b, v1.16b, #8
; CHECK-EXPAND-NEXT: fadd v0.2s, v1.2s, v0.2s
; CHECK-EXPAND-NEXT: ret
;
; CHECK-FMLAL-LABEL: fixed_fdot:
; CHECK-FMLAL: // %bb.0: // %entry
; CHECK-FMLAL-NEXT: fmlal v0.2s, v1.2h, v2.2h
; CHECK-FMLAL-NEXT: fmlal2 v0.2s, v1.2h, v2.2h
; CHECK-FMLAL-NEXT: ret
;
; CHECK-FDOT-LABEL: fixed_fdot:
; CHECK-FDOT: // %bb.0: // %entry
; CHECK-FDOT-NEXT: fdot v0.2s, v1.4h, v2.4h
; CHECK-FDOT-NEXT: ret
;
; CHECK-SVE-FMLALBT-LABEL: fixed_fdot:
; CHECK-SVE-FMLALBT: // %bb.0: // %entry
; CHECK-SVE-FMLALBT-NEXT: fmlalb z0.s, z1.h, z2.h
; CHECK-SVE-FMLALBT-NEXT: fmlalt z0.s, z1.h, z2.h
; CHECK-SVE-FMLALBT-NEXT: ret
entry:
%a.wide = fpext <4 x half> %a to <4 x float>
%b.wide = fpext <4 x half> %b to <4 x float>
%mult = fmul <4 x float> %a.wide, %b.wide
%partial.reduce = call <2 x float> @llvm.vector.partial.reduce.fadd(<2 x float> %acc, <4 x float> %mult)
ret <2 x float> %partial.reduce
}