| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc -mattr=+sve2 < %s | FileCheck %s --check-prefixes=CHECK,SVE2 |
| ; RUN: llc -global-isel -global-isel-abort=2 -mattr=+sve2 < %s | FileCheck %s --check-prefixes=CHECK,SVE2 |
| ; RUN: llc -mattr=+sve2p1 < %s | FileCheck %s --check-prefixes=CHECK,SVE2P1 |
| ; RUN: llc -global-isel -global-isel-abort=2 -mattr=+sve2p1 < %s | FileCheck %s --check-prefixes=CHECK,SVE2P1 |
| |
| target triple = "aarch64-linux-gnu" |
| |
| define <vscale x 4 x float> @fdot_wide_nxv4f32(<vscale x 4 x float> %acc, <vscale x 8 x half> %a, <vscale x 8 x half> %b) { |
| ; SVE2-LABEL: fdot_wide_nxv4f32: |
| ; SVE2: // %bb.0: // %entry |
| ; SVE2-NEXT: uunpklo z3.s, z1.h |
| ; SVE2-NEXT: uunpklo z4.s, z2.h |
| ; SVE2-NEXT: ptrue p0.s |
| ; SVE2-NEXT: uunpkhi z1.s, z1.h |
| ; SVE2-NEXT: uunpkhi z2.s, z2.h |
| ; SVE2-NEXT: fcvt z3.s, p0/m, z3.h |
| ; SVE2-NEXT: fcvt z4.s, p0/m, z4.h |
| ; SVE2-NEXT: fcvt z1.s, p0/m, z1.h |
| ; SVE2-NEXT: fcvt z2.s, p0/m, z2.h |
| ; SVE2-NEXT: fmul z3.s, z3.s, z4.s |
| ; SVE2-NEXT: fmul z1.s, z1.s, z2.s |
| ; SVE2-NEXT: fadd z0.s, z0.s, z3.s |
| ; SVE2-NEXT: fadd z0.s, z0.s, z1.s |
| ; SVE2-NEXT: ret |
| ; |
| ; SVE2P1-LABEL: fdot_wide_nxv4f32: |
| ; SVE2P1: // %bb.0: // %entry |
| ; SVE2P1-NEXT: fdot z0.s, z1.h, z2.h |
| ; SVE2P1-NEXT: ret |
| entry: |
| %a.wide = fpext <vscale x 8 x half> %a to <vscale x 8 x float> |
| %b.wide = fpext <vscale x 8 x half> %b to <vscale x 8 x float> |
| %mult = fmul <vscale x 8 x float> %a.wide, %b.wide |
| %partial.reduce = call <vscale x 4 x float> @llvm.vector.partial.reduce.fadd(<vscale x 4 x float> %acc, <vscale x 8 x float> %mult) |
| ret <vscale x 4 x float> %partial.reduce |
| } |
| |
| define <vscale x 4 x float> @fdot_splat_nxv4f32(<vscale x 4 x float> %acc, <vscale x 8 x half> %a) { |
| ; SVE2-LABEL: fdot_splat_nxv4f32: |
| ; SVE2: // %bb.0: // %entry |
| ; SVE2-NEXT: uunpklo z2.s, z1.h |
| ; SVE2-NEXT: ptrue p0.s |
| ; SVE2-NEXT: uunpkhi z1.s, z1.h |
| ; SVE2-NEXT: fcvt z2.s, p0/m, z2.h |
| ; SVE2-NEXT: fcvt z1.s, p0/m, z1.h |
| ; SVE2-NEXT: fadd z0.s, z0.s, z2.s |
| ; SVE2-NEXT: fadd z0.s, z0.s, z1.s |
| ; SVE2-NEXT: ret |
| ; |
| ; SVE2P1-LABEL: fdot_splat_nxv4f32: |
| ; SVE2P1: // %bb.0: // %entry |
| ; SVE2P1-NEXT: fmov z2.h, #1.00000000 |
| ; SVE2P1-NEXT: fdot z0.s, z1.h, z2.h |
| ; SVE2P1-NEXT: ret |
| entry: |
| %a.wide = fpext <vscale x 8 x half> %a to <vscale x 8 x float> |
| %partial.reduce = call <vscale x 4 x float> @llvm.vector.partial.reduce.fadd(<vscale x 4 x float> %acc, <vscale x 8 x float> %a.wide) |
| ret <vscale x 4 x float> %partial.reduce |
| } |
| |
| define <vscale x 8 x half> @partial_reduce_nxv8f16(<vscale x 8 x half> %acc, <vscale x 16 x half> %a) { |
| ; CHECK-LABEL: partial_reduce_nxv8f16: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: fadd z0.h, z0.h, z1.h |
| ; CHECK-NEXT: fadd z0.h, z0.h, z2.h |
| ; CHECK-NEXT: ret |
| entry: |
| %partial.reduce = call <vscale x 8 x half> @llvm.vector.partial.reduce.fadd(<vscale x 8 x half> %acc, <vscale x 16 x half> %a) |
| ret <vscale x 8 x half> %partial.reduce |
| } |
| |
| define <vscale x 4 x float> @partial_reduce_nxv4f32(<vscale x 4 x float> %acc, <vscale x 8 x float> %a) { |
| ; CHECK-LABEL: partial_reduce_nxv4f32: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: fadd z0.s, z0.s, z1.s |
| ; CHECK-NEXT: fadd z0.s, z0.s, z2.s |
| ; CHECK-NEXT: ret |
| entry: |
| %partial.reduce = call <vscale x 4 x float> @llvm.vector.partial.reduce.fadd(<vscale x 4 x float> %acc, <vscale x 8 x float> %a) |
| ret <vscale x 4 x float> %partial.reduce |
| } |
| |
| define <vscale x 2 x double> @partial_reduce_nxv2f64(<vscale x 2 x double> %acc, <vscale x 4 x double> %a) { |
| ; CHECK-LABEL: partial_reduce_nxv2f64: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: fadd z0.d, z0.d, z1.d |
| ; CHECK-NEXT: fadd z0.d, z0.d, z2.d |
| ; CHECK-NEXT: ret |
| entry: |
| %partial.reduce = call <vscale x 2 x double> @llvm.vector.partial.reduce.fadd(<vscale x 2 x double> %acc, <vscale x 4 x double> %a) |
| ret <vscale x 2 x double> %partial.reduce |
| } |