blob: 20bfcfcccd3997f54f939d0e087af4dd9fe7a0de [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -mattr=-paired-vector-memops -mcpu=pwr10 -mtriple=powerpc64le < %s | \
; RUN: FileCheck %s --check-prefix=PWR10LE
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -mattr=-paired-vector-memops -mcpu=pwr10 -mtriple=powerpc64 < %s | \
; RUN: FileCheck %s --check-prefix=PWR10BE
;;
;; Vectors of f32
;;
define dso_local float @v2f32(<2 x float> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v2f32:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3
; PWR9LE-NEXT: xxswapd vs1, v2
; PWR9LE-NEXT: xscvspdpn f0, vs0
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f1, f0, f1
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v2f32:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR9BE-NEXT: xscvspdpn f0, v2
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f1, f0, f1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v2f32:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3
; PWR10LE-NEXT: xxswapd vs1, v2
; PWR10LE-NEXT: xscvspdpn f0, vs0
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f1, f0, f1
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v2f32:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR10BE-NEXT: xscvspdpn f0, v2
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f1, f0, f1
; PWR10BE-NEXT: blr
entry:
%0 = call float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> %a)
ret float %0
}
define dso_local float @v2f32_b(<2 x float> %a, float %b) local_unnamed_addr #0 {
; PWR9LE-LABEL: v2f32_b:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3
; PWR9LE-NEXT: xscvspdpn f0, vs0
; PWR9LE-NEXT: xsaddsp f0, f1, f0
; PWR9LE-NEXT: xxswapd vs1, v2
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f1, f0, f1
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v2f32_b:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xscvspdpn f0, v2
; PWR9BE-NEXT: xsaddsp f0, f1, f0
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f1, f0, f1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v2f32_b:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3
; PWR10LE-NEXT: xscvspdpn f0, vs0
; PWR10LE-NEXT: xsaddsp f0, f1, f0
; PWR10LE-NEXT: xxswapd vs1, v2
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f1, f0, f1
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v2f32_b:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xscvspdpn f0, v2
; PWR10BE-NEXT: xsaddsp f0, f1, f0
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f1, f0, f1
; PWR10BE-NEXT: blr
entry:
%0 = call float @llvm.vector.reduce.fadd.v2f32(float %b, <2 x float> %a)
ret float %0
}
define dso_local float @v2f32_fast(<2 x float> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v2f32_fast:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxspltw vs0, v2, 2
; PWR9LE-NEXT: xvaddsp vs0, v2, vs0
; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
; PWR9LE-NEXT: xscvspdpn f1, vs0
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v2f32_fast:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xxspltw vs0, v2, 1
; PWR9BE-NEXT: xvaddsp vs0, v2, vs0
; PWR9BE-NEXT: xscvspdpn f1, vs0
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v2f32_fast:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxspltw vs0, v2, 2
; PWR10LE-NEXT: xvaddsp vs0, v2, vs0
; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3
; PWR10LE-NEXT: xscvspdpn f1, vs0
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v2f32_fast:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xxspltw vs0, v2, 1
; PWR10BE-NEXT: xvaddsp vs0, v2, vs0
; PWR10BE-NEXT: xscvspdpn f1, vs0
; PWR10BE-NEXT: blr
entry:
%0 = call fast float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> %a)
ret float %0
}
define dso_local float @v4f32(<4 x float> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v4f32:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3
; PWR9LE-NEXT: xxswapd vs1, v2
; PWR9LE-NEXT: xscvspdpn f0, vs0
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xscvspdpn f1, v2
; PWR9LE-NEXT: xsaddsp f1, f0, f1
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v4f32:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR9BE-NEXT: xscvspdpn f0, v2
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v2
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f1, f0, f1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v4f32:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3
; PWR10LE-NEXT: xxswapd vs1, v2
; PWR10LE-NEXT: xscvspdpn f0, vs0
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xscvspdpn f1, v2
; PWR10LE-NEXT: xsaddsp f1, f0, f1
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v4f32:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR10BE-NEXT: xscvspdpn f0, v2
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v2
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f1, f0, f1
; PWR10BE-NEXT: blr
entry:
%0 = call float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> %a)
ret float %0
}
define dso_local float @v4f32_b(<4 x float> %a, float %b) local_unnamed_addr #0 {
; PWR9LE-LABEL: v4f32_b:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3
; PWR9LE-NEXT: xscvspdpn f0, vs0
; PWR9LE-NEXT: xsaddsp f0, f1, f0
; PWR9LE-NEXT: xxswapd vs1, v2
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xscvspdpn f1, v2
; PWR9LE-NEXT: xsaddsp f1, f0, f1
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v4f32_b:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xscvspdpn f0, v2
; PWR9BE-NEXT: xsaddsp f0, f1, f0
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v2
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f1, f0, f1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v4f32_b:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3
; PWR10LE-NEXT: xscvspdpn f0, vs0
; PWR10LE-NEXT: xsaddsp f0, f1, f0
; PWR10LE-NEXT: xxswapd vs1, v2
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xscvspdpn f1, v2
; PWR10LE-NEXT: xsaddsp f1, f0, f1
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v4f32_b:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xscvspdpn f0, v2
; PWR10BE-NEXT: xsaddsp f0, f1, f0
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v2
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f1, f0, f1
; PWR10BE-NEXT: blr
entry:
%0 = call float @llvm.vector.reduce.fadd.v4f32(float %b, <4 x float> %a)
ret float %0
}
define dso_local float @v4f32_fast(<4 x float> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v4f32_fast:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxswapd v3, v2
; PWR9LE-NEXT: xvaddsp vs0, v2, v3
; PWR9LE-NEXT: xxspltw vs1, vs0, 2
; PWR9LE-NEXT: xvaddsp vs0, vs0, vs1
; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
; PWR9LE-NEXT: xscvspdpn f1, vs0
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v4f32_fast:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xxswapd v3, v2
; PWR9BE-NEXT: xvaddsp vs0, v2, v3
; PWR9BE-NEXT: xxspltw vs1, vs0, 1
; PWR9BE-NEXT: xvaddsp vs0, vs0, vs1
; PWR9BE-NEXT: xscvspdpn f1, vs0
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v4f32_fast:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxswapd v3, v2
; PWR10LE-NEXT: xvaddsp vs0, v2, v3
; PWR10LE-NEXT: xxspltw vs1, vs0, 2
; PWR10LE-NEXT: xvaddsp vs0, vs0, vs1
; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3
; PWR10LE-NEXT: xscvspdpn f1, vs0
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v4f32_fast:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xxswapd v3, v2
; PWR10BE-NEXT: xvaddsp vs0, v2, v3
; PWR10BE-NEXT: xxspltw vs1, vs0, 1
; PWR10BE-NEXT: xvaddsp vs0, vs0, vs1
; PWR10BE-NEXT: xscvspdpn f1, vs0
; PWR10BE-NEXT: blr
entry:
%0 = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> %a)
ret float %0
}
define dso_local float @v8f32(<8 x float> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v8f32:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3
; PWR9LE-NEXT: xxswapd vs1, v2
; PWR9LE-NEXT: xscvspdpn f0, vs0
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xscvspdpn f1, v2
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 3
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v3
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 1
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xscvspdpn f1, v3
; PWR9LE-NEXT: xsaddsp f1, f0, f1
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v8f32:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR9BE-NEXT: xscvspdpn f0, v2
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v2
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xscvspdpn f1, v3
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 1
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v3
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 3
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f1, f0, f1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v8f32:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3
; PWR10LE-NEXT: xxswapd vs1, v2
; PWR10LE-NEXT: xscvspdpn f0, vs0
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xscvspdpn f1, v2
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 3
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v3
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 1
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xscvspdpn f1, v3
; PWR10LE-NEXT: xsaddsp f1, f0, f1
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v8f32:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR10BE-NEXT: xscvspdpn f0, v2
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v2
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xscvspdpn f1, v3
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 1
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v3
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 3
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f1, f0, f1
; PWR10BE-NEXT: blr
entry:
%0 = call float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> %a)
ret float %0
}
define dso_local float @v8f32_b(<8 x float> %a, float %b) local_unnamed_addr #0 {
; PWR9LE-LABEL: v8f32_b:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3
; PWR9LE-NEXT: xscvspdpn f0, vs0
; PWR9LE-NEXT: xsaddsp f0, f1, f0
; PWR9LE-NEXT: xxswapd vs1, v2
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xscvspdpn f1, v2
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 3
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v3
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 1
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xscvspdpn f1, v3
; PWR9LE-NEXT: xsaddsp f1, f0, f1
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v8f32_b:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xscvspdpn f0, v2
; PWR9BE-NEXT: xsaddsp f0, f1, f0
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v2
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xscvspdpn f1, v3
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 1
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v3
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 3
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f1, f0, f1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v8f32_b:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3
; PWR10LE-NEXT: xscvspdpn f0, vs0
; PWR10LE-NEXT: xsaddsp f0, f1, f0
; PWR10LE-NEXT: xxswapd vs1, v2
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xscvspdpn f1, v2
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 3
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v3
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 1
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xscvspdpn f1, v3
; PWR10LE-NEXT: xsaddsp f1, f0, f1
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v8f32_b:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xscvspdpn f0, v2
; PWR10BE-NEXT: xsaddsp f0, f1, f0
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v2
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xscvspdpn f1, v3
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 1
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v3
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 3
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f1, f0, f1
; PWR10BE-NEXT: blr
entry:
%0 = call float @llvm.vector.reduce.fadd.v8f32(float %b, <8 x float> %a)
ret float %0
}
define dso_local float @v8f32_fast(<8 x float> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v8f32_fast:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xvaddsp vs0, v2, v3
; PWR9LE-NEXT: xxswapd v2, vs0
; PWR9LE-NEXT: xvaddsp vs0, vs0, v2
; PWR9LE-NEXT: xxspltw vs1, vs0, 2
; PWR9LE-NEXT: xvaddsp vs0, vs0, vs1
; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
; PWR9LE-NEXT: xscvspdpn f1, vs0
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v8f32_fast:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xvaddsp vs0, v2, v3
; PWR9BE-NEXT: xxswapd v2, vs0
; PWR9BE-NEXT: xvaddsp vs0, vs0, v2
; PWR9BE-NEXT: xxspltw vs1, vs0, 1
; PWR9BE-NEXT: xvaddsp vs0, vs0, vs1
; PWR9BE-NEXT: xscvspdpn f1, vs0
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v8f32_fast:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xvaddsp vs0, v2, v3
; PWR10LE-NEXT: xxswapd v2, vs0
; PWR10LE-NEXT: xvaddsp vs0, vs0, v2
; PWR10LE-NEXT: xxspltw vs1, vs0, 2
; PWR10LE-NEXT: xvaddsp vs0, vs0, vs1
; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3
; PWR10LE-NEXT: xscvspdpn f1, vs0
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v8f32_fast:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xvaddsp vs0, v2, v3
; PWR10BE-NEXT: xxswapd v2, vs0
; PWR10BE-NEXT: xvaddsp vs0, vs0, v2
; PWR10BE-NEXT: xxspltw vs1, vs0, 1
; PWR10BE-NEXT: xvaddsp vs0, vs0, vs1
; PWR10BE-NEXT: xscvspdpn f1, vs0
; PWR10BE-NEXT: blr
entry:
%0 = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> %a)
ret float %0
}
define dso_local float @v16f32(<16 x float> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16f32:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3
; PWR9LE-NEXT: xxswapd vs1, v2
; PWR9LE-NEXT: xscvspdpn f0, vs0
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xscvspdpn f1, v2
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 3
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v3
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 1
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xscvspdpn f1, v3
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxsldwi vs1, v4, v4, 3
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v4
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxsldwi vs1, v4, v4, 1
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xscvspdpn f1, v4
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxsldwi vs1, v5, v5, 3
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v5
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxsldwi vs1, v5, v5, 1
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xscvspdpn f1, v5
; PWR9LE-NEXT: xsaddsp f1, f0, f1
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v16f32:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR9BE-NEXT: xscvspdpn f0, v2
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v2
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xscvspdpn f1, v3
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 1
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v3
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 3
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xscvspdpn f1, v4
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxsldwi vs1, v4, v4, 1
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v4
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxsldwi vs1, v4, v4, 3
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xscvspdpn f1, v5
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxsldwi vs1, v5, v5, 1
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v5
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxsldwi vs1, v5, v5, 3
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f1, f0, f1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v16f32:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3
; PWR10LE-NEXT: xxswapd vs1, v2
; PWR10LE-NEXT: xscvspdpn f0, vs0
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xscvspdpn f1, v2
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 3
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v3
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 1
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xscvspdpn f1, v3
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxsldwi vs1, v4, v4, 3
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v4
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxsldwi vs1, v4, v4, 1
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xscvspdpn f1, v4
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxsldwi vs1, v5, v5, 3
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v5
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxsldwi vs1, v5, v5, 1
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xscvspdpn f1, v5
; PWR10LE-NEXT: xsaddsp f1, f0, f1
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v16f32:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR10BE-NEXT: xscvspdpn f0, v2
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v2
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xscvspdpn f1, v3
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 1
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v3
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 3
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xscvspdpn f1, v4
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxsldwi vs1, v4, v4, 1
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v4
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxsldwi vs1, v4, v4, 3
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xscvspdpn f1, v5
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxsldwi vs1, v5, v5, 1
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v5
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxsldwi vs1, v5, v5, 3
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f1, f0, f1
; PWR10BE-NEXT: blr
entry:
%0 = call float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> %a)
ret float %0
}
define dso_local float @v16f32_b(<16 x float> %a, float %b) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16f32_b:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3
; PWR9LE-NEXT: xscvspdpn f0, vs0
; PWR9LE-NEXT: xsaddsp f0, f1, f0
; PWR9LE-NEXT: xxswapd vs1, v2
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xscvspdpn f1, v2
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 3
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v3
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 1
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xscvspdpn f1, v3
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxsldwi vs1, v4, v4, 3
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v4
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxsldwi vs1, v4, v4, 1
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xscvspdpn f1, v4
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxsldwi vs1, v5, v5, 3
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v5
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xxsldwi vs1, v5, v5, 1
; PWR9LE-NEXT: xscvspdpn f1, vs1
; PWR9LE-NEXT: xsaddsp f0, f0, f1
; PWR9LE-NEXT: xscvspdpn f1, v5
; PWR9LE-NEXT: xsaddsp f1, f0, f1
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v16f32_b:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xscvspdpn f0, v2
; PWR9BE-NEXT: xsaddsp f0, f1, f0
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v2
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xscvspdpn f1, v3
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 1
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v3
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 3
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xscvspdpn f1, v4
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxsldwi vs1, v4, v4, 1
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v4
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxsldwi vs1, v4, v4, 3
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xscvspdpn f1, v5
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxsldwi vs1, v5, v5, 1
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v5
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f0, f0, f1
; PWR9BE-NEXT: xxsldwi vs1, v5, v5, 3
; PWR9BE-NEXT: xscvspdpn f1, vs1
; PWR9BE-NEXT: xsaddsp f1, f0, f1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v16f32_b:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3
; PWR10LE-NEXT: xscvspdpn f0, vs0
; PWR10LE-NEXT: xsaddsp f0, f1, f0
; PWR10LE-NEXT: xxswapd vs1, v2
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xscvspdpn f1, v2
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 3
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v3
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 1
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xscvspdpn f1, v3
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxsldwi vs1, v4, v4, 3
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v4
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxsldwi vs1, v4, v4, 1
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xscvspdpn f1, v4
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxsldwi vs1, v5, v5, 3
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v5
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xxsldwi vs1, v5, v5, 1
; PWR10LE-NEXT: xscvspdpn f1, vs1
; PWR10LE-NEXT: xsaddsp f0, f0, f1
; PWR10LE-NEXT: xscvspdpn f1, v5
; PWR10LE-NEXT: xsaddsp f1, f0, f1
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v16f32_b:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xscvspdpn f0, v2
; PWR10BE-NEXT: xsaddsp f0, f1, f0
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v2
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xscvspdpn f1, v3
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 1
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v3
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 3
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xscvspdpn f1, v4
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxsldwi vs1, v4, v4, 1
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v4
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxsldwi vs1, v4, v4, 3
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xscvspdpn f1, v5
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxsldwi vs1, v5, v5, 1
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v5
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f0, f0, f1
; PWR10BE-NEXT: xxsldwi vs1, v5, v5, 3
; PWR10BE-NEXT: xscvspdpn f1, vs1
; PWR10BE-NEXT: xsaddsp f1, f0, f1
; PWR10BE-NEXT: blr
entry:
%0 = call float @llvm.vector.reduce.fadd.v16f32(float %b, <16 x float> %a)
ret float %0
}
define dso_local float @v16f32_fast(<16 x float> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16f32_fast:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xvaddsp vs0, v3, v5
; PWR9LE-NEXT: xvaddsp vs1, v2, v4
; PWR9LE-NEXT: xvaddsp vs0, vs1, vs0
; PWR9LE-NEXT: xxswapd v2, vs0
; PWR9LE-NEXT: xvaddsp vs0, vs0, v2
; PWR9LE-NEXT: xxspltw vs1, vs0, 2
; PWR9LE-NEXT: xvaddsp vs0, vs0, vs1
; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
; PWR9LE-NEXT: xscvspdpn f1, vs0
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v16f32_fast:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xvaddsp vs0, v3, v5
; PWR9BE-NEXT: xvaddsp vs1, v2, v4
; PWR9BE-NEXT: xvaddsp vs0, vs1, vs0
; PWR9BE-NEXT: xxswapd v2, vs0
; PWR9BE-NEXT: xvaddsp vs0, vs0, v2
; PWR9BE-NEXT: xxspltw vs1, vs0, 1
; PWR9BE-NEXT: xvaddsp vs0, vs0, vs1
; PWR9BE-NEXT: xscvspdpn f1, vs0
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v16f32_fast:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xvaddsp vs0, v3, v5
; PWR10LE-NEXT: xvaddsp vs1, v2, v4
; PWR10LE-NEXT: xvaddsp vs0, vs1, vs0
; PWR10LE-NEXT: xxswapd v2, vs0
; PWR10LE-NEXT: xvaddsp vs0, vs0, v2
; PWR10LE-NEXT: xxspltw vs1, vs0, 2
; PWR10LE-NEXT: xvaddsp vs0, vs0, vs1
; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3
; PWR10LE-NEXT: xscvspdpn f1, vs0
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v16f32_fast:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xvaddsp vs0, v3, v5
; PWR10BE-NEXT: xvaddsp vs1, v2, v4
; PWR10BE-NEXT: xvaddsp vs0, vs1, vs0
; PWR10BE-NEXT: xxswapd v2, vs0
; PWR10BE-NEXT: xvaddsp vs0, vs0, v2
; PWR10BE-NEXT: xxspltw vs1, vs0, 1
; PWR10BE-NEXT: xvaddsp vs0, vs0, vs1
; PWR10BE-NEXT: xscvspdpn f1, vs0
; PWR10BE-NEXT: blr
entry:
%0 = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> %a)
ret float %0
}
declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>) #0
declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) #0
declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>) #0
declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>) #0
;;
;; Vectors of f64
;;
define dso_local double @v2f64(<2 x double> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v2f64:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxswapd vs0, v2
; PWR9LE-NEXT: xsadddp f1, f0, v2
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v2f64:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xxswapd vs0, v2
; PWR9BE-NEXT: xsadddp f1, v2, f0
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v2f64:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxswapd vs0, v2
; PWR10LE-NEXT: xsadddp f1, f0, v2
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v2f64:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xxswapd vs0, v2
; PWR10BE-NEXT: xsadddp f1, v2, f0
; PWR10BE-NEXT: blr
entry:
%0 = call double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> %a)
ret double %0
}
define dso_local double @v2f64_b(<2 x double> %a, double %b) local_unnamed_addr #0 {
; PWR9LE-LABEL: v2f64_b:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxswapd vs0, v2
; PWR9LE-NEXT: xsadddp f0, f1, f0
; PWR9LE-NEXT: xsadddp f1, f0, v2
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v2f64_b:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xsadddp f0, f1, v2
; PWR9BE-NEXT: xxswapd vs1, v2
; PWR9BE-NEXT: xsadddp f1, f0, f1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v2f64_b:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxswapd vs0, v2
; PWR10LE-NEXT: xsadddp f0, f1, f0
; PWR10LE-NEXT: xsadddp f1, f0, v2
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v2f64_b:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xsadddp f0, f1, v2
; PWR10BE-NEXT: xxswapd vs1, v2
; PWR10BE-NEXT: xsadddp f1, f0, f1
; PWR10BE-NEXT: blr
entry:
%0 = call double @llvm.vector.reduce.fadd.v2f64(double %b, <2 x double> %a)
ret double %0
}
define dso_local double @v2f64_fast(<2 x double> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v2f64_fast:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxswapd vs0, v2
; PWR9LE-NEXT: xvadddp vs0, v2, vs0
; PWR9LE-NEXT: xxswapd vs1, vs0
; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v2f64_fast:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xxswapd vs0, v2
; PWR9BE-NEXT: xvadddp vs1, v2, vs0
; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v2f64_fast:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxswapd vs0, v2
; PWR10LE-NEXT: xvadddp vs0, v2, vs0
; PWR10LE-NEXT: xxswapd vs1, vs0
; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v2f64_fast:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xxswapd vs0, v2
; PWR10BE-NEXT: xvadddp vs1, v2, vs0
; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10BE-NEXT: blr
entry:
%0 = call fast double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> %a)
ret double %0
}
define dso_local double @v4f64(<4 x double> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v4f64:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxswapd vs0, v2
; PWR9LE-NEXT: xxswapd vs1, v3
; PWR9LE-NEXT: xsadddp f0, f0, v2
; PWR9LE-NEXT: xsadddp f0, f0, f1
; PWR9LE-NEXT: xsadddp f1, f0, v3
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v4f64:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xxswapd vs0, v2
; PWR9BE-NEXT: xxswapd vs1, v3
; PWR9BE-NEXT: xsadddp f0, v2, f0
; PWR9BE-NEXT: xsadddp f0, f0, v3
; PWR9BE-NEXT: xsadddp f1, f0, f1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v4f64:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxswapd vs0, v2
; PWR10LE-NEXT: xxswapd vs1, v3
; PWR10LE-NEXT: xsadddp f0, f0, v2
; PWR10LE-NEXT: xsadddp f0, f0, f1
; PWR10LE-NEXT: xsadddp f1, f0, v3
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v4f64:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xxswapd vs0, v2
; PWR10BE-NEXT: xxswapd vs1, v3
; PWR10BE-NEXT: xsadddp f0, v2, f0
; PWR10BE-NEXT: xsadddp f0, f0, v3
; PWR10BE-NEXT: xsadddp f1, f0, f1
; PWR10BE-NEXT: blr
entry:
%0 = call double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> %a)
ret double %0
}
define dso_local double @v4f64_b(<4 x double> %a, double %b) local_unnamed_addr #0 {
; PWR9LE-LABEL: v4f64_b:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxswapd vs0, v2
; PWR9LE-NEXT: xsadddp f0, f1, f0
; PWR9LE-NEXT: xxswapd vs1, v3
; PWR9LE-NEXT: xsadddp f0, f0, v2
; PWR9LE-NEXT: xsadddp f0, f0, f1
; PWR9LE-NEXT: xsadddp f1, f0, v3
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v4f64_b:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xsadddp f0, f1, v2
; PWR9BE-NEXT: xxswapd vs1, v2
; PWR9BE-NEXT: xsadddp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v3
; PWR9BE-NEXT: xsadddp f0, f0, v3
; PWR9BE-NEXT: xsadddp f1, f0, f1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v4f64_b:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxswapd vs0, v2
; PWR10LE-NEXT: xsadddp f0, f1, f0
; PWR10LE-NEXT: xxswapd vs1, v3
; PWR10LE-NEXT: xsadddp f0, f0, v2
; PWR10LE-NEXT: xsadddp f0, f0, f1
; PWR10LE-NEXT: xsadddp f1, f0, v3
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v4f64_b:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xsadddp f0, f1, v2
; PWR10BE-NEXT: xxswapd vs1, v2
; PWR10BE-NEXT: xsadddp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v3
; PWR10BE-NEXT: xsadddp f0, f0, v3
; PWR10BE-NEXT: xsadddp f1, f0, f1
; PWR10BE-NEXT: blr
entry:
%0 = call double @llvm.vector.reduce.fadd.v4f64(double %b, <4 x double> %a)
ret double %0
}
define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v4f64_fast:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xvadddp vs0, v2, v3
; PWR9LE-NEXT: xxswapd vs1, vs0
; PWR9LE-NEXT: xvadddp vs0, vs0, vs1
; PWR9LE-NEXT: xxswapd vs1, vs0
; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v4f64_fast:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xvadddp vs0, v2, v3
; PWR9BE-NEXT: xxswapd vs1, vs0
; PWR9BE-NEXT: xvadddp vs1, vs0, vs1
; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v4f64_fast:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xvadddp vs0, v2, v3
; PWR10LE-NEXT: xxswapd vs1, vs0
; PWR10LE-NEXT: xvadddp vs0, vs0, vs1
; PWR10LE-NEXT: xxswapd vs1, vs0
; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v4f64_fast:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xvadddp vs0, v2, v3
; PWR10BE-NEXT: xxswapd vs1, vs0
; PWR10BE-NEXT: xvadddp vs1, vs0, vs1
; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10BE-NEXT: blr
entry:
%0 = call fast double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> %a)
ret double %0
}
define dso_local double @v8f64(<8 x double> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v8f64:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxswapd vs0, v2
; PWR9LE-NEXT: xxswapd vs1, v3
; PWR9LE-NEXT: xsadddp f0, f0, v2
; PWR9LE-NEXT: xsadddp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v4
; PWR9LE-NEXT: xsadddp f0, f0, v3
; PWR9LE-NEXT: xsadddp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v5
; PWR9LE-NEXT: xsadddp f0, f0, v4
; PWR9LE-NEXT: xsadddp f0, f0, f1
; PWR9LE-NEXT: xsadddp f1, f0, v5
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v8f64:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xxswapd vs0, v2
; PWR9BE-NEXT: xxswapd vs1, v3
; PWR9BE-NEXT: xsadddp f0, v2, f0
; PWR9BE-NEXT: xsadddp f0, f0, v3
; PWR9BE-NEXT: xsadddp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v4
; PWR9BE-NEXT: xsadddp f0, f0, v4
; PWR9BE-NEXT: xsadddp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v5
; PWR9BE-NEXT: xsadddp f0, f0, v5
; PWR9BE-NEXT: xsadddp f1, f0, f1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v8f64:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxswapd vs0, v2
; PWR10LE-NEXT: xxswapd vs1, v3
; PWR10LE-NEXT: xsadddp f0, f0, v2
; PWR10LE-NEXT: xsadddp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v4
; PWR10LE-NEXT: xsadddp f0, f0, v3
; PWR10LE-NEXT: xsadddp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v5
; PWR10LE-NEXT: xsadddp f0, f0, v4
; PWR10LE-NEXT: xsadddp f0, f0, f1
; PWR10LE-NEXT: xsadddp f1, f0, v5
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v8f64:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xxswapd vs0, v2
; PWR10BE-NEXT: xxswapd vs1, v3
; PWR10BE-NEXT: xsadddp f0, v2, f0
; PWR10BE-NEXT: xsadddp f0, f0, v3
; PWR10BE-NEXT: xsadddp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v4
; PWR10BE-NEXT: xsadddp f0, f0, v4
; PWR10BE-NEXT: xsadddp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v5
; PWR10BE-NEXT: xsadddp f0, f0, v5
; PWR10BE-NEXT: xsadddp f1, f0, f1
; PWR10BE-NEXT: blr
entry:
%0 = call double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> %a)
ret double %0
}
define dso_local double @v8f64_b(<8 x double> %a, double %b) local_unnamed_addr #0 {
; PWR9LE-LABEL: v8f64_b:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxswapd vs0, v2
; PWR9LE-NEXT: xsadddp f0, f1, f0
; PWR9LE-NEXT: xxswapd vs1, v3
; PWR9LE-NEXT: xsadddp f0, f0, v2
; PWR9LE-NEXT: xsadddp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v4
; PWR9LE-NEXT: xsadddp f0, f0, v3
; PWR9LE-NEXT: xsadddp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v5
; PWR9LE-NEXT: xsadddp f0, f0, v4
; PWR9LE-NEXT: xsadddp f0, f0, f1
; PWR9LE-NEXT: xsadddp f1, f0, v5
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v8f64_b:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xsadddp f0, f1, v2
; PWR9BE-NEXT: xxswapd vs1, v2
; PWR9BE-NEXT: xsadddp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v3
; PWR9BE-NEXT: xsadddp f0, f0, v3
; PWR9BE-NEXT: xsadddp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v4
; PWR9BE-NEXT: xsadddp f0, f0, v4
; PWR9BE-NEXT: xsadddp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v5
; PWR9BE-NEXT: xsadddp f0, f0, v5
; PWR9BE-NEXT: xsadddp f1, f0, f1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v8f64_b:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxswapd vs0, v2
; PWR10LE-NEXT: xsadddp f0, f1, f0
; PWR10LE-NEXT: xxswapd vs1, v3
; PWR10LE-NEXT: xsadddp f0, f0, v2
; PWR10LE-NEXT: xsadddp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v4
; PWR10LE-NEXT: xsadddp f0, f0, v3
; PWR10LE-NEXT: xsadddp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v5
; PWR10LE-NEXT: xsadddp f0, f0, v4
; PWR10LE-NEXT: xsadddp f0, f0, f1
; PWR10LE-NEXT: xsadddp f1, f0, v5
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v8f64_b:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xsadddp f0, f1, v2
; PWR10BE-NEXT: xxswapd vs1, v2
; PWR10BE-NEXT: xsadddp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v3
; PWR10BE-NEXT: xsadddp f0, f0, v3
; PWR10BE-NEXT: xsadddp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v4
; PWR10BE-NEXT: xsadddp f0, f0, v4
; PWR10BE-NEXT: xsadddp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v5
; PWR10BE-NEXT: xsadddp f0, f0, v5
; PWR10BE-NEXT: xsadddp f1, f0, f1
; PWR10BE-NEXT: blr
entry:
%0 = call double @llvm.vector.reduce.fadd.v8f64(double %b, <8 x double> %a)
ret double %0
}
define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v8f64_fast:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xvadddp vs0, v3, v5
; PWR9LE-NEXT: xvadddp vs1, v2, v4
; PWR9LE-NEXT: xvadddp vs0, vs1, vs0
; PWR9LE-NEXT: xxswapd vs1, vs0
; PWR9LE-NEXT: xvadddp vs0, vs0, vs1
; PWR9LE-NEXT: xxswapd vs1, vs0
; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v8f64_fast:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xvadddp vs0, v3, v5
; PWR9BE-NEXT: xvadddp vs1, v2, v4
; PWR9BE-NEXT: xvadddp vs0, vs1, vs0
; PWR9BE-NEXT: xxswapd vs1, vs0
; PWR9BE-NEXT: xvadddp vs1, vs0, vs1
; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v8f64_fast:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xvadddp vs0, v3, v5
; PWR10LE-NEXT: xvadddp vs1, v2, v4
; PWR10LE-NEXT: xvadddp vs0, vs1, vs0
; PWR10LE-NEXT: xxswapd vs1, vs0
; PWR10LE-NEXT: xvadddp vs0, vs0, vs1
; PWR10LE-NEXT: xxswapd vs1, vs0
; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v8f64_fast:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xvadddp vs0, v3, v5
; PWR10BE-NEXT: xvadddp vs1, v2, v4
; PWR10BE-NEXT: xvadddp vs0, vs1, vs0
; PWR10BE-NEXT: xxswapd vs1, vs0
; PWR10BE-NEXT: xvadddp vs1, vs0, vs1
; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10BE-NEXT: blr
entry:
%0 = call fast double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> %a)
ret double %0
}
define dso_local double @v16f64(<16 x double> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16f64:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxswapd vs0, v2
; PWR9LE-NEXT: xxswapd vs1, v3
; PWR9LE-NEXT: xsadddp f0, f0, v2
; PWR9LE-NEXT: xsadddp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v4
; PWR9LE-NEXT: xsadddp f0, f0, v3
; PWR9LE-NEXT: xsadddp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v5
; PWR9LE-NEXT: xsadddp f0, f0, v4
; PWR9LE-NEXT: xsadddp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v6
; PWR9LE-NEXT: xsadddp f0, f0, v5
; PWR9LE-NEXT: xsadddp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v7
; PWR9LE-NEXT: xsadddp f0, f0, v6
; PWR9LE-NEXT: xsadddp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v8
; PWR9LE-NEXT: xsadddp f0, f0, v7
; PWR9LE-NEXT: xsadddp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v9
; PWR9LE-NEXT: xsadddp f0, f0, v8
; PWR9LE-NEXT: xsadddp f0, f0, f1
; PWR9LE-NEXT: xsadddp f1, f0, v9
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v16f64:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xxswapd vs0, v2
; PWR9BE-NEXT: xxswapd vs1, v3
; PWR9BE-NEXT: xsadddp f0, v2, f0
; PWR9BE-NEXT: xsadddp f0, f0, v3
; PWR9BE-NEXT: xsadddp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v4
; PWR9BE-NEXT: xsadddp f0, f0, v4
; PWR9BE-NEXT: xsadddp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v5
; PWR9BE-NEXT: xsadddp f0, f0, v5
; PWR9BE-NEXT: xsadddp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v6
; PWR9BE-NEXT: xsadddp f0, f0, v6
; PWR9BE-NEXT: xsadddp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v7
; PWR9BE-NEXT: xsadddp f0, f0, v7
; PWR9BE-NEXT: xsadddp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v8
; PWR9BE-NEXT: xsadddp f0, f0, v8
; PWR9BE-NEXT: xsadddp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v9
; PWR9BE-NEXT: xsadddp f0, f0, v9
; PWR9BE-NEXT: xsadddp f1, f0, f1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v16f64:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxswapd vs0, v2
; PWR10LE-NEXT: xxswapd vs1, v3
; PWR10LE-NEXT: xsadddp f0, f0, v2
; PWR10LE-NEXT: xsadddp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v4
; PWR10LE-NEXT: xsadddp f0, f0, v3
; PWR10LE-NEXT: xsadddp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v5
; PWR10LE-NEXT: xsadddp f0, f0, v4
; PWR10LE-NEXT: xsadddp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v6
; PWR10LE-NEXT: xsadddp f0, f0, v5
; PWR10LE-NEXT: xsadddp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v7
; PWR10LE-NEXT: xsadddp f0, f0, v6
; PWR10LE-NEXT: xsadddp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v8
; PWR10LE-NEXT: xsadddp f0, f0, v7
; PWR10LE-NEXT: xsadddp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v9
; PWR10LE-NEXT: xsadddp f0, f0, v8
; PWR10LE-NEXT: xsadddp f0, f0, f1
; PWR10LE-NEXT: xsadddp f1, f0, v9
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v16f64:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xxswapd vs0, v2
; PWR10BE-NEXT: xxswapd vs1, v3
; PWR10BE-NEXT: xsadddp f0, v2, f0
; PWR10BE-NEXT: xsadddp f0, f0, v3
; PWR10BE-NEXT: xsadddp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v4
; PWR10BE-NEXT: xsadddp f0, f0, v4
; PWR10BE-NEXT: xsadddp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v5
; PWR10BE-NEXT: xsadddp f0, f0, v5
; PWR10BE-NEXT: xsadddp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v6
; PWR10BE-NEXT: xsadddp f0, f0, v6
; PWR10BE-NEXT: xsadddp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v7
; PWR10BE-NEXT: xsadddp f0, f0, v7
; PWR10BE-NEXT: xsadddp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v8
; PWR10BE-NEXT: xsadddp f0, f0, v8
; PWR10BE-NEXT: xsadddp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v9
; PWR10BE-NEXT: xsadddp f0, f0, v9
; PWR10BE-NEXT: xsadddp f1, f0, f1
; PWR10BE-NEXT: blr
entry:
%0 = call double @llvm.vector.reduce.fadd.v16f64(double -0.000000e+00, <16 x double> %a)
ret double %0
}
define dso_local double @v16f64_b(<16 x double> %a, double %b) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16f64_b:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxswapd vs0, v2
; PWR9LE-NEXT: xsadddp f0, f1, f0
; PWR9LE-NEXT: xxswapd vs1, v3
; PWR9LE-NEXT: xsadddp f0, f0, v2
; PWR9LE-NEXT: xsadddp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v4
; PWR9LE-NEXT: xsadddp f0, f0, v3
; PWR9LE-NEXT: xsadddp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v5
; PWR9LE-NEXT: xsadddp f0, f0, v4
; PWR9LE-NEXT: xsadddp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v6
; PWR9LE-NEXT: xsadddp f0, f0, v5
; PWR9LE-NEXT: xsadddp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v7
; PWR9LE-NEXT: xsadddp f0, f0, v6
; PWR9LE-NEXT: xsadddp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v8
; PWR9LE-NEXT: xsadddp f0, f0, v7
; PWR9LE-NEXT: xsadddp f0, f0, f1
; PWR9LE-NEXT: xxswapd vs1, v9
; PWR9LE-NEXT: xsadddp f0, f0, v8
; PWR9LE-NEXT: xsadddp f0, f0, f1
; PWR9LE-NEXT: xsadddp f1, f0, v9
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v16f64_b:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xsadddp f0, f1, v2
; PWR9BE-NEXT: xxswapd vs1, v2
; PWR9BE-NEXT: xsadddp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v3
; PWR9BE-NEXT: xsadddp f0, f0, v3
; PWR9BE-NEXT: xsadddp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v4
; PWR9BE-NEXT: xsadddp f0, f0, v4
; PWR9BE-NEXT: xsadddp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v5
; PWR9BE-NEXT: xsadddp f0, f0, v5
; PWR9BE-NEXT: xsadddp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v6
; PWR9BE-NEXT: xsadddp f0, f0, v6
; PWR9BE-NEXT: xsadddp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v7
; PWR9BE-NEXT: xsadddp f0, f0, v7
; PWR9BE-NEXT: xsadddp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v8
; PWR9BE-NEXT: xsadddp f0, f0, v8
; PWR9BE-NEXT: xsadddp f0, f0, f1
; PWR9BE-NEXT: xxswapd vs1, v9
; PWR9BE-NEXT: xsadddp f0, f0, v9
; PWR9BE-NEXT: xsadddp f1, f0, f1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v16f64_b:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxswapd vs0, v2
; PWR10LE-NEXT: xsadddp f0, f1, f0
; PWR10LE-NEXT: xxswapd vs1, v3
; PWR10LE-NEXT: xsadddp f0, f0, v2
; PWR10LE-NEXT: xsadddp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v4
; PWR10LE-NEXT: xsadddp f0, f0, v3
; PWR10LE-NEXT: xsadddp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v5
; PWR10LE-NEXT: xsadddp f0, f0, v4
; PWR10LE-NEXT: xsadddp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v6
; PWR10LE-NEXT: xsadddp f0, f0, v5
; PWR10LE-NEXT: xsadddp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v7
; PWR10LE-NEXT: xsadddp f0, f0, v6
; PWR10LE-NEXT: xsadddp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v8
; PWR10LE-NEXT: xsadddp f0, f0, v7
; PWR10LE-NEXT: xsadddp f0, f0, f1
; PWR10LE-NEXT: xxswapd vs1, v9
; PWR10LE-NEXT: xsadddp f0, f0, v8
; PWR10LE-NEXT: xsadddp f0, f0, f1
; PWR10LE-NEXT: xsadddp f1, f0, v9
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v16f64_b:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xsadddp f0, f1, v2
; PWR10BE-NEXT: xxswapd vs1, v2
; PWR10BE-NEXT: xsadddp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v3
; PWR10BE-NEXT: xsadddp f0, f0, v3
; PWR10BE-NEXT: xsadddp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v4
; PWR10BE-NEXT: xsadddp f0, f0, v4
; PWR10BE-NEXT: xsadddp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v5
; PWR10BE-NEXT: xsadddp f0, f0, v5
; PWR10BE-NEXT: xsadddp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v6
; PWR10BE-NEXT: xsadddp f0, f0, v6
; PWR10BE-NEXT: xsadddp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v7
; PWR10BE-NEXT: xsadddp f0, f0, v7
; PWR10BE-NEXT: xsadddp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v8
; PWR10BE-NEXT: xsadddp f0, f0, v8
; PWR10BE-NEXT: xsadddp f0, f0, f1
; PWR10BE-NEXT: xxswapd vs1, v9
; PWR10BE-NEXT: xsadddp f0, f0, v9
; PWR10BE-NEXT: xsadddp f1, f0, f1
; PWR10BE-NEXT: blr
entry:
%0 = call double @llvm.vector.reduce.fadd.v16f64(double %b, <16 x double> %a)
ret double %0
}
define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16f64_fast:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xvadddp vs0, v4, v8
; PWR9LE-NEXT: xvadddp vs1, v2, v6
; PWR9LE-NEXT: xvadddp vs2, v5, v9
; PWR9LE-NEXT: xvadddp vs3, v3, v7
; PWR9LE-NEXT: xvadddp vs2, vs3, vs2
; PWR9LE-NEXT: xvadddp vs0, vs1, vs0
; PWR9LE-NEXT: xvadddp vs0, vs0, vs2
; PWR9LE-NEXT: xxswapd vs1, vs0
; PWR9LE-NEXT: xvadddp vs0, vs0, vs1
; PWR9LE-NEXT: xxswapd vs1, vs0
; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v16f64_fast:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xvadddp vs0, v4, v8
; PWR9BE-NEXT: xvadddp vs1, v2, v6
; PWR9BE-NEXT: xvadddp vs2, v5, v9
; PWR9BE-NEXT: xvadddp vs3, v3, v7
; PWR9BE-NEXT: xvadddp vs2, vs3, vs2
; PWR9BE-NEXT: xvadddp vs0, vs1, vs0
; PWR9BE-NEXT: xvadddp vs0, vs0, vs2
; PWR9BE-NEXT: xxswapd vs1, vs0
; PWR9BE-NEXT: xvadddp vs1, vs0, vs1
; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v16f64_fast:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xvadddp vs0, v4, v8
; PWR10LE-NEXT: xvadddp vs1, v2, v6
; PWR10LE-NEXT: xvadddp vs2, v5, v9
; PWR10LE-NEXT: xvadddp vs3, v3, v7
; PWR10LE-NEXT: xvadddp vs2, vs3, vs2
; PWR10LE-NEXT: xvadddp vs0, vs1, vs0
; PWR10LE-NEXT: xvadddp vs0, vs0, vs2
; PWR10LE-NEXT: xxswapd vs1, vs0
; PWR10LE-NEXT: xvadddp vs0, vs0, vs1
; PWR10LE-NEXT: xxswapd vs1, vs0
; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v16f64_fast:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xvadddp vs0, v4, v8
; PWR10BE-NEXT: xvadddp vs1, v2, v6
; PWR10BE-NEXT: xvadddp vs2, v5, v9
; PWR10BE-NEXT: xvadddp vs3, v3, v7
; PWR10BE-NEXT: xvadddp vs2, vs3, vs2
; PWR10BE-NEXT: xvadddp vs0, vs1, vs0
; PWR10BE-NEXT: xvadddp vs0, vs0, vs2
; PWR10BE-NEXT: xxswapd vs1, vs0
; PWR10BE-NEXT: xvadddp vs1, vs0, vs1
; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10BE-NEXT: blr
entry:
%0 = call fast double @llvm.vector.reduce.fadd.v16f64(double -0.000000e+00, <16 x double> %a)
ret double %0
}
define dso_local double @v32f64(<32 x double> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v32f64:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxswapd vs4, v2
; PWR9LE-NEXT: xxswapd vs5, v3
; PWR9LE-NEXT: lxv vs3, 224(r1)
; PWR9LE-NEXT: lxv vs2, 240(r1)
; PWR9LE-NEXT: lxv vs1, 256(r1)
; PWR9LE-NEXT: lxv vs0, 272(r1)
; PWR9LE-NEXT: xsadddp f4, f4, v2
; PWR9LE-NEXT: xsadddp f4, f4, f5
; PWR9LE-NEXT: xxswapd vs5, v4
; PWR9LE-NEXT: xsadddp f4, f4, v3
; PWR9LE-NEXT: xsadddp f4, f4, f5
; PWR9LE-NEXT: xxswapd vs5, v5
; PWR9LE-NEXT: xsadddp f4, f4, v4
; PWR9LE-NEXT: xsadddp f4, f4, f5
; PWR9LE-NEXT: xxswapd vs5, v6
; PWR9LE-NEXT: xsadddp f4, f4, v5
; PWR9LE-NEXT: xsadddp f4, f4, f5
; PWR9LE-NEXT: xxswapd vs5, v7
; PWR9LE-NEXT: xsadddp f4, f4, v6
; PWR9LE-NEXT: xsadddp f4, f4, f5
; PWR9LE-NEXT: xxswapd vs5, v8
; PWR9LE-NEXT: xsadddp f4, f4, v7
; PWR9LE-NEXT: xsadddp f4, f4, f5
; PWR9LE-NEXT: xxswapd vs5, v9
; PWR9LE-NEXT: xsadddp f4, f4, v8
; PWR9LE-NEXT: xsadddp f4, f4, f5
; PWR9LE-NEXT: xxswapd vs5, v10
; PWR9LE-NEXT: xsadddp f4, f4, v9
; PWR9LE-NEXT: xsadddp f4, f4, f5
; PWR9LE-NEXT: xxswapd vs5, v11
; PWR9LE-NEXT: xsadddp f4, f4, v10
; PWR9LE-NEXT: xsadddp f4, f4, f5
; PWR9LE-NEXT: xxswapd vs5, v12
; PWR9LE-NEXT: xsadddp f4, f4, v11
; PWR9LE-NEXT: xsadddp f4, f4, f5
; PWR9LE-NEXT: xxswapd vs5, v13
; PWR9LE-NEXT: xsadddp f4, f4, v12
; PWR9LE-NEXT: xsadddp f4, f4, f5
; PWR9LE-NEXT: xxswapd vs5, vs3
; PWR9LE-NEXT: xsadddp f4, f4, v13
; PWR9LE-NEXT: xsadddp f4, f4, f5
; PWR9LE-NEXT: xsadddp f3, f4, f3
; PWR9LE-NEXT: xxswapd vs4, vs2
; PWR9LE-NEXT: xsadddp f3, f3, f4
; PWR9LE-NEXT: xsadddp f2, f3, f2
; PWR9LE-NEXT: xxswapd vs3, vs1
; PWR9LE-NEXT: xsadddp f2, f2, f3
; PWR9LE-NEXT: xsadddp f1, f2, f1
; PWR9LE-NEXT: xxswapd vs2, vs0
; PWR9LE-NEXT: xsadddp f1, f1, f2
; PWR9LE-NEXT: xsadddp f1, f1, f0
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v32f64:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xxswapd vs4, v2
; PWR9BE-NEXT: xxswapd vs5, v3
; PWR9BE-NEXT: lxv vs3, 240(r1)
; PWR9BE-NEXT: lxv vs2, 256(r1)
; PWR9BE-NEXT: lxv vs1, 272(r1)
; PWR9BE-NEXT: lxv vs0, 288(r1)
; PWR9BE-NEXT: xsadddp f4, v2, f4
; PWR9BE-NEXT: xsadddp f4, f4, v3
; PWR9BE-NEXT: xsadddp f4, f4, f5
; PWR9BE-NEXT: xxswapd vs5, v4
; PWR9BE-NEXT: xsadddp f4, f4, v4
; PWR9BE-NEXT: xsadddp f4, f4, f5
; PWR9BE-NEXT: xxswapd vs5, v5
; PWR9BE-NEXT: xsadddp f4, f4, v5
; PWR9BE-NEXT: xsadddp f4, f4, f5
; PWR9BE-NEXT: xxswapd vs5, v6
; PWR9BE-NEXT: xsadddp f4, f4, v6
; PWR9BE-NEXT: xsadddp f4, f4, f5
; PWR9BE-NEXT: xxswapd vs5, v7
; PWR9BE-NEXT: xsadddp f4, f4, v7
; PWR9BE-NEXT: xsadddp f4, f4, f5
; PWR9BE-NEXT: xxswapd vs5, v8
; PWR9BE-NEXT: xsadddp f4, f4, v8
; PWR9BE-NEXT: xsadddp f4, f4, f5
; PWR9BE-NEXT: xxswapd vs5, v9
; PWR9BE-NEXT: xsadddp f4, f4, v9
; PWR9BE-NEXT: xsadddp f4, f4, f5
; PWR9BE-NEXT: xxswapd vs5, v10
; PWR9BE-NEXT: xsadddp f4, f4, v10
; PWR9BE-NEXT: xsadddp f4, f4, f5
; PWR9BE-NEXT: xxswapd vs5, v11
; PWR9BE-NEXT: xsadddp f4, f4, v11
; PWR9BE-NEXT: xsadddp f4, f4, f5
; PWR9BE-NEXT: xxswapd vs5, v12
; PWR9BE-NEXT: xsadddp f4, f4, v12
; PWR9BE-NEXT: xsadddp f4, f4, f5
; PWR9BE-NEXT: xxswapd vs5, v13
; PWR9BE-NEXT: xsadddp f4, f4, v13
; PWR9BE-NEXT: xsadddp f4, f4, f5
; PWR9BE-NEXT: xsadddp f4, f4, f3
; PWR9BE-NEXT: xxswapd vs3, vs3
; PWR9BE-NEXT: xsadddp f3, f4, f3
; PWR9BE-NEXT: xsadddp f3, f3, f2
; PWR9BE-NEXT: xxswapd vs2, vs2
; PWR9BE-NEXT: xsadddp f2, f3, f2
; PWR9BE-NEXT: xsadddp f2, f2, f1
; PWR9BE-NEXT: xxswapd vs1, vs1
; PWR9BE-NEXT: xsadddp f1, f2, f1
; PWR9BE-NEXT: xsadddp f1, f1, f0
; PWR9BE-NEXT: xxswapd vs0, vs0
; PWR9BE-NEXT: xsadddp f1, f1, f0
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v32f64:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxswapd vs4, v2
; PWR10LE-NEXT: xxswapd vs5, v3
; PWR10LE-NEXT: lxv vs3, 224(r1)
; PWR10LE-NEXT: lxv vs2, 240(r1)
; PWR10LE-NEXT: xsadddp f4, f4, v2
; PWR10LE-NEXT: lxv vs1, 256(r1)
; PWR10LE-NEXT: lxv vs0, 272(r1)
; PWR10LE-NEXT: xsadddp f4, f4, f5
; PWR10LE-NEXT: xxswapd vs5, v4
; PWR10LE-NEXT: xsadddp f4, f4, v3
; PWR10LE-NEXT: xsadddp f4, f4, f5
; PWR10LE-NEXT: xxswapd vs5, v5
; PWR10LE-NEXT: xsadddp f4, f4, v4
; PWR10LE-NEXT: xsadddp f4, f4, f5
; PWR10LE-NEXT: xxswapd vs5, v6
; PWR10LE-NEXT: xsadddp f4, f4, v5
; PWR10LE-NEXT: xsadddp f4, f4, f5
; PWR10LE-NEXT: xxswapd vs5, v7
; PWR10LE-NEXT: xsadddp f4, f4, v6
; PWR10LE-NEXT: xsadddp f4, f4, f5
; PWR10LE-NEXT: xxswapd vs5, v8
; PWR10LE-NEXT: xsadddp f4, f4, v7
; PWR10LE-NEXT: xsadddp f4, f4, f5
; PWR10LE-NEXT: xxswapd vs5, v9
; PWR10LE-NEXT: xsadddp f4, f4, v8
; PWR10LE-NEXT: xsadddp f4, f4, f5
; PWR10LE-NEXT: xxswapd vs5, v10
; PWR10LE-NEXT: xsadddp f4, f4, v9
; PWR10LE-NEXT: xsadddp f4, f4, f5
; PWR10LE-NEXT: xxswapd vs5, v11
; PWR10LE-NEXT: xsadddp f4, f4, v10
; PWR10LE-NEXT: xsadddp f4, f4, f5
; PWR10LE-NEXT: xxswapd vs5, v12
; PWR10LE-NEXT: xsadddp f4, f4, v11
; PWR10LE-NEXT: xsadddp f4, f4, f5
; PWR10LE-NEXT: xxswapd vs5, v13
; PWR10LE-NEXT: xsadddp f4, f4, v12
; PWR10LE-NEXT: xsadddp f4, f4, f5
; PWR10LE-NEXT: xxswapd vs5, vs3
; PWR10LE-NEXT: xsadddp f4, f4, v13
; PWR10LE-NEXT: xsadddp f4, f4, f5
; PWR10LE-NEXT: xsadddp f3, f4, f3
; PWR10LE-NEXT: xxswapd vs4, vs2
; PWR10LE-NEXT: xsadddp f3, f3, f4
; PWR10LE-NEXT: xsadddp f2, f3, f2
; PWR10LE-NEXT: xxswapd vs3, vs1
; PWR10LE-NEXT: xsadddp f2, f2, f3
; PWR10LE-NEXT: xsadddp f1, f2, f1
; PWR10LE-NEXT: xxswapd vs2, vs0
; PWR10LE-NEXT: xsadddp f1, f1, f2
; PWR10LE-NEXT: xsadddp f1, f1, f0
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v32f64:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xxswapd vs4, v2
; PWR10BE-NEXT: xxswapd vs5, v3
; PWR10BE-NEXT: lxv vs3, 240(r1)
; PWR10BE-NEXT: lxv vs2, 256(r1)
; PWR10BE-NEXT: xsadddp f4, v2, f4
; PWR10BE-NEXT: lxv vs1, 272(r1)
; PWR10BE-NEXT: lxv vs0, 288(r1)
; PWR10BE-NEXT: xsadddp f4, f4, v3
; PWR10BE-NEXT: xsadddp f4, f4, f5
; PWR10BE-NEXT: xxswapd vs5, v4
; PWR10BE-NEXT: xsadddp f4, f4, v4
; PWR10BE-NEXT: xsadddp f4, f4, f5
; PWR10BE-NEXT: xxswapd vs5, v5
; PWR10BE-NEXT: xsadddp f4, f4, v5
; PWR10BE-NEXT: xsadddp f4, f4, f5
; PWR10BE-NEXT: xxswapd vs5, v6
; PWR10BE-NEXT: xsadddp f4, f4, v6
; PWR10BE-NEXT: xsadddp f4, f4, f5
; PWR10BE-NEXT: xxswapd vs5, v7
; PWR10BE-NEXT: xsadddp f4, f4, v7
; PWR10BE-NEXT: xsadddp f4, f4, f5
; PWR10BE-NEXT: xxswapd vs5, v8
; PWR10BE-NEXT: xsadddp f4, f4, v8
; PWR10BE-NEXT: xsadddp f4, f4, f5
; PWR10BE-NEXT: xxswapd vs5, v9
; PWR10BE-NEXT: xsadddp f4, f4, v9
; PWR10BE-NEXT: xsadddp f4, f4, f5
; PWR10BE-NEXT: xxswapd vs5, v10
; PWR10BE-NEXT: xsadddp f4, f4, v10
; PWR10BE-NEXT: xsadddp f4, f4, f5
; PWR10BE-NEXT: xxswapd vs5, v11
; PWR10BE-NEXT: xsadddp f4, f4, v11
; PWR10BE-NEXT: xsadddp f4, f4, f5
; PWR10BE-NEXT: xxswapd vs5, v12
; PWR10BE-NEXT: xsadddp f4, f4, v12
; PWR10BE-NEXT: xsadddp f4, f4, f5
; PWR10BE-NEXT: xxswapd vs5, v13
; PWR10BE-NEXT: xsadddp f4, f4, v13
; PWR10BE-NEXT: xsadddp f4, f4, f5
; PWR10BE-NEXT: xsadddp f4, f4, f3
; PWR10BE-NEXT: xxswapd vs3, vs3
; PWR10BE-NEXT: xsadddp f3, f4, f3
; PWR10BE-NEXT: xsadddp f3, f3, f2
; PWR10BE-NEXT: xxswapd vs2, vs2
; PWR10BE-NEXT: xsadddp f2, f3, f2
; PWR10BE-NEXT: xsadddp f2, f2, f1
; PWR10BE-NEXT: xxswapd vs1, vs1
; PWR10BE-NEXT: xsadddp f1, f2, f1
; PWR10BE-NEXT: xsadddp f1, f1, f0
; PWR10BE-NEXT: xxswapd vs0, vs0
; PWR10BE-NEXT: xsadddp f1, f1, f0
; PWR10BE-NEXT: blr
entry:
%0 = call double @llvm.vector.reduce.fadd.v32f64(double -0.000000e+00, <32 x double> %a)
ret double %0
}
define dso_local double @v32f64_b(<32 x double> %a, double %b) local_unnamed_addr #0 {
; PWR9LE-LABEL: v32f64_b:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxswapd vs5, v2
; PWR9LE-NEXT: lxv vs4, 224(r1)
; PWR9LE-NEXT: lxv vs3, 240(r1)
; PWR9LE-NEXT: lxv vs2, 256(r1)
; PWR9LE-NEXT: lxv vs0, 272(r1)
; PWR9LE-NEXT: xsadddp f1, f1, f5
; PWR9LE-NEXT: xxswapd vs5, v3
; PWR9LE-NEXT: xsadddp f1, f1, v2
; PWR9LE-NEXT: xsadddp f1, f1, f5
; PWR9LE-NEXT: xxswapd vs5, v4
; PWR9LE-NEXT: xsadddp f1, f1, v3
; PWR9LE-NEXT: xsadddp f1, f1, f5
; PWR9LE-NEXT: xxswapd vs5, v5
; PWR9LE-NEXT: xsadddp f1, f1, v4
; PWR9LE-NEXT: xsadddp f1, f1, f5
; PWR9LE-NEXT: xxswapd vs5, v6
; PWR9LE-NEXT: xsadddp f1, f1, v5
; PWR9LE-NEXT: xsadddp f1, f1, f5
; PWR9LE-NEXT: xxswapd vs5, v7
; PWR9LE-NEXT: xsadddp f1, f1, v6
; PWR9LE-NEXT: xsadddp f1, f1, f5
; PWR9LE-NEXT: xxswapd vs5, v8
; PWR9LE-NEXT: xsadddp f1, f1, v7
; PWR9LE-NEXT: xsadddp f1, f1, f5
; PWR9LE-NEXT: xxswapd vs5, v9
; PWR9LE-NEXT: xsadddp f1, f1, v8
; PWR9LE-NEXT: xsadddp f1, f1, f5
; PWR9LE-NEXT: xxswapd vs5, v10
; PWR9LE-NEXT: xsadddp f1, f1, v9
; PWR9LE-NEXT: xsadddp f1, f1, f5
; PWR9LE-NEXT: xxswapd vs5, v11
; PWR9LE-NEXT: xsadddp f1, f1, v10
; PWR9LE-NEXT: xsadddp f1, f1, f5
; PWR9LE-NEXT: xxswapd vs5, v12
; PWR9LE-NEXT: xsadddp f1, f1, v11
; PWR9LE-NEXT: xsadddp f1, f1, f5
; PWR9LE-NEXT: xxswapd vs5, v13
; PWR9LE-NEXT: xsadddp f1, f1, v12
; PWR9LE-NEXT: xsadddp f1, f1, f5
; PWR9LE-NEXT: xxswapd vs5, vs4
; PWR9LE-NEXT: xsadddp f1, f1, v13
; PWR9LE-NEXT: xsadddp f1, f1, f5
; PWR9LE-NEXT: xsadddp f1, f1, f4
; PWR9LE-NEXT: xxswapd vs4, vs3
; PWR9LE-NEXT: xsadddp f1, f1, f4
; PWR9LE-NEXT: xsadddp f1, f1, f3
; PWR9LE-NEXT: xxswapd vs3, vs2
; PWR9LE-NEXT: xsadddp f1, f1, f3
; PWR9LE-NEXT: xsadddp f1, f1, f2
; PWR9LE-NEXT: xxswapd vs2, vs0
; PWR9LE-NEXT: xsadddp f1, f1, f2
; PWR9LE-NEXT: xsadddp f1, f1, f0
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v32f64_b:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xsadddp f1, f1, v2
; PWR9BE-NEXT: xxswapd vs5, v2
; PWR9BE-NEXT: lxv vs4, 240(r1)
; PWR9BE-NEXT: lxv vs3, 256(r1)
; PWR9BE-NEXT: lxv vs2, 272(r1)
; PWR9BE-NEXT: lxv vs0, 288(r1)
; PWR9BE-NEXT: xsadddp f1, f1, f5
; PWR9BE-NEXT: xxswapd vs5, v3
; PWR9BE-NEXT: xsadddp f1, f1, v3
; PWR9BE-NEXT: xsadddp f1, f1, f5
; PWR9BE-NEXT: xxswapd vs5, v4
; PWR9BE-NEXT: xsadddp f1, f1, v4
; PWR9BE-NEXT: xsadddp f1, f1, f5
; PWR9BE-NEXT: xxswapd vs5, v5
; PWR9BE-NEXT: xsadddp f1, f1, v5
; PWR9BE-NEXT: xsadddp f1, f1, f5
; PWR9BE-NEXT: xxswapd vs5, v6
; PWR9BE-NEXT: xsadddp f1, f1, v6
; PWR9BE-NEXT: xsadddp f1, f1, f5
; PWR9BE-NEXT: xxswapd vs5, v7
; PWR9BE-NEXT: xsadddp f1, f1, v7
; PWR9BE-NEXT: xsadddp f1, f1, f5
; PWR9BE-NEXT: xxswapd vs5, v8
; PWR9BE-NEXT: xsadddp f1, f1, v8
; PWR9BE-NEXT: xsadddp f1, f1, f5
; PWR9BE-NEXT: xxswapd vs5, v9
; PWR9BE-NEXT: xsadddp f1, f1, v9
; PWR9BE-NEXT: xsadddp f1, f1, f5
; PWR9BE-NEXT: xxswapd vs5, v10
; PWR9BE-NEXT: xsadddp f1, f1, v10
; PWR9BE-NEXT: xsadddp f1, f1, f5
; PWR9BE-NEXT: xxswapd vs5, v11
; PWR9BE-NEXT: xsadddp f1, f1, v11
; PWR9BE-NEXT: xsadddp f1, f1, f5
; PWR9BE-NEXT: xxswapd vs5, v12
; PWR9BE-NEXT: xsadddp f1, f1, v12
; PWR9BE-NEXT: xsadddp f1, f1, f5
; PWR9BE-NEXT: xxswapd vs5, v13
; PWR9BE-NEXT: xsadddp f1, f1, v13
; PWR9BE-NEXT: xsadddp f1, f1, f5
; PWR9BE-NEXT: xsadddp f1, f1, f4
; PWR9BE-NEXT: xxswapd vs4, vs4
; PWR9BE-NEXT: xsadddp f1, f1, f4
; PWR9BE-NEXT: xsadddp f1, f1, f3
; PWR9BE-NEXT: xxswapd vs3, vs3
; PWR9BE-NEXT: xsadddp f1, f1, f3
; PWR9BE-NEXT: xsadddp f1, f1, f2
; PWR9BE-NEXT: xxswapd vs2, vs2
; PWR9BE-NEXT: xsadddp f1, f1, f2
; PWR9BE-NEXT: xsadddp f1, f1, f0
; PWR9BE-NEXT: xxswapd vs0, vs0
; PWR9BE-NEXT: xsadddp f1, f1, f0
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v32f64_b:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxswapd vs5, v2
; PWR10LE-NEXT: lxv vs4, 224(r1)
; PWR10LE-NEXT: lxv vs3, 240(r1)
; PWR10LE-NEXT: xsadddp f1, f1, f5
; PWR10LE-NEXT: xxswapd vs5, v3
; PWR10LE-NEXT: lxv vs2, 256(r1)
; PWR10LE-NEXT: lxv vs0, 272(r1)
; PWR10LE-NEXT: xsadddp f1, f1, v2
; PWR10LE-NEXT: xsadddp f1, f1, f5
; PWR10LE-NEXT: xxswapd vs5, v4
; PWR10LE-NEXT: xsadddp f1, f1, v3
; PWR10LE-NEXT: xsadddp f1, f1, f5
; PWR10LE-NEXT: xxswapd vs5, v5
; PWR10LE-NEXT: xsadddp f1, f1, v4
; PWR10LE-NEXT: xsadddp f1, f1, f5
; PWR10LE-NEXT: xxswapd vs5, v6
; PWR10LE-NEXT: xsadddp f1, f1, v5
; PWR10LE-NEXT: xsadddp f1, f1, f5
; PWR10LE-NEXT: xxswapd vs5, v7
; PWR10LE-NEXT: xsadddp f1, f1, v6
; PWR10LE-NEXT: xsadddp f1, f1, f5
; PWR10LE-NEXT: xxswapd vs5, v8
; PWR10LE-NEXT: xsadddp f1, f1, v7
; PWR10LE-NEXT: xsadddp f1, f1, f5
; PWR10LE-NEXT: xxswapd vs5, v9
; PWR10LE-NEXT: xsadddp f1, f1, v8
; PWR10LE-NEXT: xsadddp f1, f1, f5
; PWR10LE-NEXT: xxswapd vs5, v10
; PWR10LE-NEXT: xsadddp f1, f1, v9
; PWR10LE-NEXT: xsadddp f1, f1, f5
; PWR10LE-NEXT: xxswapd vs5, v11
; PWR10LE-NEXT: xsadddp f1, f1, v10
; PWR10LE-NEXT: xsadddp f1, f1, f5
; PWR10LE-NEXT: xxswapd vs5, v12
; PWR10LE-NEXT: xsadddp f1, f1, v11
; PWR10LE-NEXT: xsadddp f1, f1, f5
; PWR10LE-NEXT: xxswapd vs5, v13
; PWR10LE-NEXT: xsadddp f1, f1, v12
; PWR10LE-NEXT: xsadddp f1, f1, f5
; PWR10LE-NEXT: xxswapd vs5, vs4
; PWR10LE-NEXT: xsadddp f1, f1, v13
; PWR10LE-NEXT: xsadddp f1, f1, f5
; PWR10LE-NEXT: xsadddp f1, f1, f4
; PWR10LE-NEXT: xxswapd vs4, vs3
; PWR10LE-NEXT: xsadddp f1, f1, f4
; PWR10LE-NEXT: xsadddp f1, f1, f3
; PWR10LE-NEXT: xxswapd vs3, vs2
; PWR10LE-NEXT: xsadddp f1, f1, f3
; PWR10LE-NEXT: xsadddp f1, f1, f2
; PWR10LE-NEXT: xxswapd vs2, vs0
; PWR10LE-NEXT: xsadddp f1, f1, f2
; PWR10LE-NEXT: xsadddp f1, f1, f0
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v32f64_b:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xsadddp f1, f1, v2
; PWR10BE-NEXT: xxswapd vs5, v2
; PWR10BE-NEXT: lxv vs4, 240(r1)
; PWR10BE-NEXT: lxv vs3, 256(r1)
; PWR10BE-NEXT: xsadddp f1, f1, f5
; PWR10BE-NEXT: xxswapd vs5, v3
; PWR10BE-NEXT: lxv vs2, 272(r1)
; PWR10BE-NEXT: lxv vs0, 288(r1)
; PWR10BE-NEXT: xsadddp f1, f1, v3
; PWR10BE-NEXT: xsadddp f1, f1, f5
; PWR10BE-NEXT: xxswapd vs5, v4
; PWR10BE-NEXT: xsadddp f1, f1, v4
; PWR10BE-NEXT: xsadddp f1, f1, f5
; PWR10BE-NEXT: xxswapd vs5, v5
; PWR10BE-NEXT: xsadddp f1, f1, v5
; PWR10BE-NEXT: xsadddp f1, f1, f5
; PWR10BE-NEXT: xxswapd vs5, v6
; PWR10BE-NEXT: xsadddp f1, f1, v6
; PWR10BE-NEXT: xsadddp f1, f1, f5
; PWR10BE-NEXT: xxswapd vs5, v7
; PWR10BE-NEXT: xsadddp f1, f1, v7
; PWR10BE-NEXT: xsadddp f1, f1, f5
; PWR10BE-NEXT: xxswapd vs5, v8
; PWR10BE-NEXT: xsadddp f1, f1, v8
; PWR10BE-NEXT: xsadddp f1, f1, f5
; PWR10BE-NEXT: xxswapd vs5, v9
; PWR10BE-NEXT: xsadddp f1, f1, v9
; PWR10BE-NEXT: xsadddp f1, f1, f5
; PWR10BE-NEXT: xxswapd vs5, v10
; PWR10BE-NEXT: xsadddp f1, f1, v10
; PWR10BE-NEXT: xsadddp f1, f1, f5
; PWR10BE-NEXT: xxswapd vs5, v11
; PWR10BE-NEXT: xsadddp f1, f1, v11
; PWR10BE-NEXT: xsadddp f1, f1, f5
; PWR10BE-NEXT: xxswapd vs5, v12
; PWR10BE-NEXT: xsadddp f1, f1, v12
; PWR10BE-NEXT: xsadddp f1, f1, f5
; PWR10BE-NEXT: xxswapd vs5, v13
; PWR10BE-NEXT: xsadddp f1, f1, v13
; PWR10BE-NEXT: xsadddp f1, f1, f5
; PWR10BE-NEXT: xsadddp f1, f1, f4
; PWR10BE-NEXT: xxswapd vs4, vs4
; PWR10BE-NEXT: xsadddp f1, f1, f4
; PWR10BE-NEXT: xsadddp f1, f1, f3
; PWR10BE-NEXT: xxswapd vs3, vs3
; PWR10BE-NEXT: xsadddp f1, f1, f3
; PWR10BE-NEXT: xsadddp f1, f1, f2
; PWR10BE-NEXT: xxswapd vs2, vs2
; PWR10BE-NEXT: xsadddp f1, f1, f2
; PWR10BE-NEXT: xsadddp f1, f1, f0
; PWR10BE-NEXT: xxswapd vs0, vs0
; PWR10BE-NEXT: xsadddp f1, f1, f0
; PWR10BE-NEXT: blr
entry:
%0 = call double @llvm.vector.reduce.fadd.v32f64(double %b, <32 x double> %a)
ret double %0
}
define dso_local double @v32f64_fast(<32 x double> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v32f64_fast:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: lxv vs0, 256(r1)
; PWR9LE-NEXT: lxv vs1, 224(r1)
; PWR9LE-NEXT: lxv vs2, 272(r1)
; PWR9LE-NEXT: lxv vs3, 240(r1)
; PWR9LE-NEXT: xvadddp vs4, v3, v11
; PWR9LE-NEXT: xvadddp vs5, v5, v13
; PWR9LE-NEXT: xvadddp vs6, v2, v10
; PWR9LE-NEXT: xvadddp vs7, v4, v12
; PWR9LE-NEXT: xvadddp vs3, v7, vs3
; PWR9LE-NEXT: xvadddp vs2, v9, vs2
; PWR9LE-NEXT: xvadddp vs1, v6, vs1
; PWR9LE-NEXT: xvadddp vs0, v8, vs0
; PWR9LE-NEXT: xvadddp vs0, vs7, vs0
; PWR9LE-NEXT: xvadddp vs1, vs6, vs1
; PWR9LE-NEXT: xvadddp vs2, vs5, vs2
; PWR9LE-NEXT: xvadddp vs3, vs4, vs3
; PWR9LE-NEXT: xvadddp vs2, vs3, vs2
; PWR9LE-NEXT: xvadddp vs0, vs1, vs0
; PWR9LE-NEXT: xvadddp vs0, vs0, vs2
; PWR9LE-NEXT: xxswapd vs1, vs0
; PWR9LE-NEXT: xvadddp vs0, vs0, vs1
; PWR9LE-NEXT: xxswapd vs1, vs0
; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v32f64_fast:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: lxv vs0, 272(r1)
; PWR9BE-NEXT: lxv vs1, 240(r1)
; PWR9BE-NEXT: lxv vs2, 288(r1)
; PWR9BE-NEXT: lxv vs3, 256(r1)
; PWR9BE-NEXT: xvadddp vs4, v3, v11
; PWR9BE-NEXT: xvadddp vs5, v5, v13
; PWR9BE-NEXT: xvadddp vs6, v2, v10
; PWR9BE-NEXT: xvadddp vs7, v4, v12
; PWR9BE-NEXT: xvadddp vs3, v7, vs3
; PWR9BE-NEXT: xvadddp vs2, v9, vs2
; PWR9BE-NEXT: xvadddp vs1, v6, vs1
; PWR9BE-NEXT: xvadddp vs0, v8, vs0
; PWR9BE-NEXT: xvadddp vs0, vs7, vs0
; PWR9BE-NEXT: xvadddp vs1, vs6, vs1
; PWR9BE-NEXT: xvadddp vs2, vs5, vs2
; PWR9BE-NEXT: xvadddp vs3, vs4, vs3
; PWR9BE-NEXT: xvadddp vs2, vs3, vs2
; PWR9BE-NEXT: xvadddp vs0, vs1, vs0
; PWR9BE-NEXT: xvadddp vs0, vs0, vs2
; PWR9BE-NEXT: xxswapd vs1, vs0
; PWR9BE-NEXT: xvadddp vs1, vs0, vs1
; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v32f64_fast:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: lxv vs0, 256(r1)
; PWR10LE-NEXT: lxv vs1, 224(r1)
; PWR10LE-NEXT: xvadddp vs4, v3, v11
; PWR10LE-NEXT: xvadddp vs5, v5, v13
; PWR10LE-NEXT: xvadddp vs6, v2, v10
; PWR10LE-NEXT: xvadddp vs7, v4, v12
; PWR10LE-NEXT: xvadddp vs1, v6, vs1
; PWR10LE-NEXT: lxv vs2, 272(r1)
; PWR10LE-NEXT: lxv vs3, 240(r1)
; PWR10LE-NEXT: xvadddp vs3, v7, vs3
; PWR10LE-NEXT: xvadddp vs2, v9, vs2
; PWR10LE-NEXT: xvadddp vs0, v8, vs0
; PWR10LE-NEXT: xvadddp vs0, vs7, vs0
; PWR10LE-NEXT: xvadddp vs1, vs6, vs1
; PWR10LE-NEXT: xvadddp vs2, vs5, vs2
; PWR10LE-NEXT: xvadddp vs3, vs4, vs3
; PWR10LE-NEXT: xvadddp vs2, vs3, vs2
; PWR10LE-NEXT: xvadddp vs0, vs1, vs0
; PWR10LE-NEXT: xvadddp vs0, vs0, vs2
; PWR10LE-NEXT: xxswapd vs1, vs0
; PWR10LE-NEXT: xvadddp vs0, vs0, vs1
; PWR10LE-NEXT: xxswapd vs1, vs0
; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v32f64_fast:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: lxv vs0, 272(r1)
; PWR10BE-NEXT: lxv vs1, 240(r1)
; PWR10BE-NEXT: xvadddp vs4, v3, v11
; PWR10BE-NEXT: xvadddp vs5, v5, v13
; PWR10BE-NEXT: xvadddp vs6, v2, v10
; PWR10BE-NEXT: xvadddp vs7, v4, v12
; PWR10BE-NEXT: xvadddp vs1, v6, vs1
; PWR10BE-NEXT: lxv vs2, 288(r1)
; PWR10BE-NEXT: lxv vs3, 256(r1)
; PWR10BE-NEXT: xvadddp vs3, v7, vs3
; PWR10BE-NEXT: xvadddp vs2, v9, vs2
; PWR10BE-NEXT: xvadddp vs0, v8, vs0
; PWR10BE-NEXT: xvadddp vs0, vs7, vs0
; PWR10BE-NEXT: xvadddp vs1, vs6, vs1
; PWR10BE-NEXT: xvadddp vs2, vs5, vs2
; PWR10BE-NEXT: xvadddp vs3, vs4, vs3
; PWR10BE-NEXT: xvadddp vs2, vs3, vs2
; PWR10BE-NEXT: xvadddp vs0, vs1, vs0
; PWR10BE-NEXT: xvadddp vs0, vs0, vs2
; PWR10BE-NEXT: xxswapd vs1, vs0
; PWR10BE-NEXT: xvadddp vs1, vs0, vs1
; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10BE-NEXT: blr
entry:
%0 = call fast double @llvm.vector.reduce.fadd.v32f64(double -0.000000e+00, <32 x double> %a)
ret double %0
}
define dso_local double @v64f64(<64 x double> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v64f64:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxswapd v18, v2
; PWR9LE-NEXT: lxv v17, 224(r1)
; PWR9LE-NEXT: lxv v16, 240(r1)
; PWR9LE-NEXT: lxv v15, 256(r1)
; PWR9LE-NEXT: lxv v14, 272(r1)
; PWR9LE-NEXT: xsadddp v2, v18, v2
; PWR9LE-NEXT: xxswapd v18, v3
; PWR9LE-NEXT: lxv v1, 288(r1)
; PWR9LE-NEXT: lxv v0, 304(r1)
; PWR9LE-NEXT: lxv vs13, 320(r1)
; PWR9LE-NEXT: lxv vs12, 336(r1)
; PWR9LE-NEXT: lxv vs11, 352(r1)
; PWR9LE-NEXT: lxv vs10, 368(r1)
; PWR9LE-NEXT: lxv vs9, 384(r1)
; PWR9LE-NEXT: lxv vs8, 400(r1)
; PWR9LE-NEXT: lxv vs7, 416(r1)
; PWR9LE-NEXT: lxv vs6, 432(r1)
; PWR9LE-NEXT: lxv vs5, 448(r1)
; PWR9LE-NEXT: lxv vs4, 464(r1)
; PWR9LE-NEXT: xsadddp v2, v2, v18
; PWR9LE-NEXT: lxv vs3, 480(r1)
; PWR9LE-NEXT: lxv vs2, 496(r1)
; PWR9LE-NEXT: lxv vs1, 512(r1)
; PWR9LE-NEXT: lxv vs0, 528(r1)
; PWR9LE-NEXT: xsadddp v2, v2, v3
; PWR9LE-NEXT: xxswapd v3, v4
; PWR9LE-NEXT: xsadddp v2, v2, v3
; PWR9LE-NEXT: xxswapd v3, v5
; PWR9LE-NEXT: xsadddp v2, v2, v4
; PWR9LE-NEXT: xsadddp v2, v2, v3
; PWR9LE-NEXT: xxswapd v3, v6
; PWR9LE-NEXT: xsadddp v2, v2, v5
; PWR9LE-NEXT: xsadddp v2, v2, v3
; PWR9LE-NEXT: xxswapd v3, v7
; PWR9LE-NEXT: xsadddp v2, v2, v6
; PWR9LE-NEXT: xsadddp v2, v2, v3
; PWR9LE-NEXT: xxswapd v3, v8
; PWR9LE-NEXT: xsadddp v2, v2, v7
; PWR9LE-NEXT: xsadddp v2, v2, v3
; PWR9LE-NEXT: xxswapd v3, v9
; PWR9LE-NEXT: xsadddp v2, v2, v8
; PWR9LE-NEXT: xsadddp v2, v2, v3
; PWR9LE-NEXT: xxswapd v3, v10
; PWR9LE-NEXT: xsadddp v2, v2, v9
; PWR9LE-NEXT: xsadddp v2, v2, v3
; PWR9LE-NEXT: xxswapd v3, v11
; PWR9LE-NEXT: xsadddp v2, v2, v10
; PWR9LE-NEXT: xsadddp v2, v2, v3
; PWR9LE-NEXT: xxswapd v3, v12
; PWR9LE-NEXT: xsadddp v2, v2, v11
; PWR9LE-NEXT: xsadddp v2, v2, v3
; PWR9LE-NEXT: xxswapd v3, v13
; PWR9LE-NEXT: xsadddp v2, v2, v12
; PWR9LE-NEXT: xsadddp v2, v2, v3
; PWR9LE-NEXT: xxswapd v3, v17
; PWR9LE-NEXT: xsadddp v2, v2, v13
; PWR9LE-NEXT: xsadddp v2, v2, v3
; PWR9LE-NEXT: xxswapd v3, v16
; PWR9LE-NEXT: xsadddp v2, v2, v17
; PWR9LE-NEXT: xsadddp v2, v2, v3
; PWR9LE-NEXT: xxswapd v3, v15
; PWR9LE-NEXT: xsadddp v2, v2, v16
; PWR9LE-NEXT: xsadddp v2, v2, v3
; PWR9LE-NEXT: xxswapd v3, v14
; PWR9LE-NEXT: xsadddp v2, v2, v15
; PWR9LE-NEXT: xsadddp v2, v2, v3
; PWR9LE-NEXT: xxswapd v3, v1
; PWR9LE-NEXT: xsadddp v2, v2, v14
; PWR9LE-NEXT: xsadddp v2, v2, v3
; PWR9LE-NEXT: xxswapd v3, v0
; PWR9LE-NEXT: xsadddp v2, v2, v1
; PWR9LE-NEXT: xsadddp v2, v2, v3
; PWR9LE-NEXT: xxswapd v3, vs13
; PWR9LE-NEXT: xsadddp v2, v2, v0
; PWR9LE-NEXT: xsadddp v2, v2, v3
; PWR9LE-NEXT: xsadddp f13, v2, f13
; PWR9LE-NEXT: xxswapd v2, vs12
; PWR9LE-NEXT: xsadddp f13, f13, v2
; PWR9LE-NEXT: xsadddp f12, f13, f12
; PWR9LE-NEXT: xxswapd vs13, vs11
; PWR9LE-NEXT: xsadddp f12, f12, f13
; PWR9LE-NEXT: xsadddp f11, f12, f11
; PWR9LE-NEXT: xxswapd vs12, vs10
; PWR9LE-NEXT: xsadddp f11, f11, f12
; PWR9LE-NEXT: xsadddp f10, f11, f10
; PWR9LE-NEXT: xxswapd vs11, vs9
; PWR9LE-NEXT: xsadddp f10, f10, f11
; PWR9LE-NEXT: xsadddp f9, f10, f9
; PWR9LE-NEXT: xxswapd vs10, vs8
; PWR9LE-NEXT: xsadddp f9, f9, f10
; PWR9LE-NEXT: xsadddp f8, f9, f8
; PWR9LE-NEXT: xxswapd vs9, vs7
; PWR9LE-NEXT: xsadddp f8, f8, f9
; PWR9LE-NEXT: xsadddp f7, f8, f7
; PWR9LE-NEXT: xxswapd vs8, vs6
; PWR9LE-NEXT: xsadddp f7, f7, f8
; PWR9LE-NEXT: xsadddp f6, f7, f6
; PWR9LE-NEXT: xxswapd vs7, vs5
; PWR9LE-NEXT: xsadddp f6, f6, f7
; PWR9LE-NEXT: xsadddp f5, f6, f5
; PWR9LE-NEXT: xxswapd vs6, vs4
; PWR9LE-NEXT: xsadddp f5, f5, f6
; PWR9LE-NEXT: xsadddp f4, f5, f4
; PWR9LE-NEXT: xxswapd vs5, vs3
; PWR9LE-NEXT: xsadddp f4, f4, f5
; PWR9LE-NEXT: xsadddp f3, f4, f3
; PWR9LE-NEXT: xxswapd vs4, vs2
; PWR9LE-NEXT: xsadddp f3, f3, f4
; PWR9LE-NEXT: xsadddp f2, f3, f2
; PWR9LE-NEXT: xxswapd vs3, vs1
; PWR9LE-NEXT: xsadddp f2, f2, f3
; PWR9LE-NEXT: xsadddp f1, f2, f1
; PWR9LE-NEXT: xxswapd vs2, vs0
; PWR9LE-NEXT: xsadddp f1, f1, f2
; PWR9LE-NEXT: xsadddp f1, f1, f0
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v64f64:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xxswapd v18, v2
; PWR9BE-NEXT: lxv v17, 240(r1)
; PWR9BE-NEXT: lxv v16, 256(r1)
; PWR9BE-NEXT: lxv v15, 272(r1)
; PWR9BE-NEXT: lxv v14, 288(r1)
; PWR9BE-NEXT: xsadddp v2, v2, v18
; PWR9BE-NEXT: lxv v1, 304(r1)
; PWR9BE-NEXT: lxv v0, 320(r1)
; PWR9BE-NEXT: lxv vs13, 336(r1)
; PWR9BE-NEXT: lxv vs12, 352(r1)
; PWR9BE-NEXT: lxv vs11, 368(r1)
; PWR9BE-NEXT: lxv vs10, 384(r1)
; PWR9BE-NEXT: lxv vs9, 400(r1)
; PWR9BE-NEXT: lxv vs8, 416(r1)
; PWR9BE-NEXT: lxv vs7, 432(r1)
; PWR9BE-NEXT: lxv vs6, 448(r1)
; PWR9BE-NEXT: lxv vs5, 464(r1)
; PWR9BE-NEXT: lxv vs4, 480(r1)
; PWR9BE-NEXT: xsadddp v2, v2, v3
; PWR9BE-NEXT: xxswapd v3, v3
; PWR9BE-NEXT: lxv vs3, 496(r1)
; PWR9BE-NEXT: lxv vs2, 512(r1)
; PWR9BE-NEXT: lxv vs1, 528(r1)
; PWR9BE-NEXT: lxv vs0, 544(r1)
; PWR9BE-NEXT: xsadddp v2, v2, v3
; PWR9BE-NEXT: xxswapd v3, v4
; PWR9BE-NEXT: xsadddp v2, v2, v4
; PWR9BE-NEXT: xsadddp v2, v2, v3
; PWR9BE-NEXT: xxswapd v3, v5
; PWR9BE-NEXT: xsadddp v2, v2, v5
; PWR9BE-NEXT: xsadddp v2, v2, v3
; PWR9BE-NEXT: xxswapd v3, v6
; PWR9BE-NEXT: xsadddp v2, v2, v6
; PWR9BE-NEXT: xsadddp v2, v2, v3
; PWR9BE-NEXT: xxswapd v3, v7
; PWR9BE-NEXT: xsadddp v2, v2, v7
; PWR9BE-NEXT: xsadddp v2, v2, v3
; PWR9BE-NEXT: xxswapd v3, v8
; PWR9BE-NEXT: xsadddp v2, v2, v8
; PWR9BE-NEXT: xsadddp v2, v2, v3
; PWR9BE-NEXT: xxswapd v3, v9
; PWR9BE-NEXT: xsadddp v2, v2, v9
; PWR9BE-NEXT: xsadddp v2, v2, v3
; PWR9BE-NEXT: xxswapd v3, v10
; PWR9BE-NEXT: xsadddp v2, v2, v10
; PWR9BE-NEXT: xsadddp v2, v2, v3
; PWR9BE-NEXT: xxswapd v3, v11
; PWR9BE-NEXT: xsadddp v2, v2, v11
; PWR9BE-NEXT: xsadddp v2, v2, v3
; PWR9BE-NEXT: xxswapd v3, v12
; PWR9BE-NEXT: xsadddp v2, v2, v12
; PWR9BE-NEXT: xsadddp v2, v2, v3
; PWR9BE-NEXT: xxswapd v3, v13
; PWR9BE-NEXT: xsadddp v2, v2, v13
; PWR9BE-NEXT: xsadddp v2, v2, v3
; PWR9BE-NEXT: xxswapd v3, v17
; PWR9BE-NEXT: xsadddp v2, v2, v17
; PWR9BE-NEXT: xsadddp v2, v2, v3
; PWR9BE-NEXT: xxswapd v3, v16
; PWR9BE-NEXT: xsadddp v2, v2, v16
; PWR9BE-NEXT: xsadddp v2, v2, v3
; PWR9BE-NEXT: xxswapd v3, v15
; PWR9BE-NEXT: xsadddp v2, v2, v15
; PWR9BE-NEXT: xsadddp v2, v2, v3
; PWR9BE-NEXT: xxswapd v3, v14
; PWR9BE-NEXT: xsadddp v2, v2, v14
; PWR9BE-NEXT: xsadddp v2, v2, v3
; PWR9BE-NEXT: xxswapd v3, v1
; PWR9BE-NEXT: xsadddp v2, v2, v1
; PWR9BE-NEXT: xsadddp v2, v2, v3
; PWR9BE-NEXT: xxswapd v3, v0
; PWR9BE-NEXT: xsadddp v2, v2, v0
; PWR9BE-NEXT: xsadddp v2, v2, v3
; PWR9BE-NEXT: xsadddp v2, v2, f13
; PWR9BE-NEXT: xxswapd vs13, vs13
; PWR9BE-NEXT: xsadddp f13, v2, f13
; PWR9BE-NEXT: xsadddp f13, f13, f12
; PWR9BE-NEXT: xxswapd vs12, vs12
; PWR9BE-NEXT: xsadddp f12, f13, f12
; PWR9BE-NEXT: xsadddp f12, f12, f11
; PWR9BE-NEXT: xxswapd vs11, vs11
; PWR9BE-NEXT: xsadddp f11, f12, f11
; PWR9BE-NEXT: xsadddp f11, f11, f10
; PWR9BE-NEXT: xxswapd vs10, vs10
; PWR9BE-NEXT: xsadddp f10, f11, f10
; PWR9BE-NEXT: xsadddp f10, f10, f9
; PWR9BE-NEXT: xxswapd vs9, vs9
; PWR9BE-NEXT: xsadddp f9, f10, f9
; PWR9BE-NEXT: xsadddp f9, f9, f8
; PWR9BE-NEXT: xxswapd vs8, vs8
; PWR9BE-NEXT: xsadddp f8, f9, f8
; PWR9BE-NEXT: xsadddp f8, f8, f7
; PWR9BE-NEXT: xxswapd vs7, vs7
; PWR9BE-NEXT: xsadddp f7, f8, f7
; PWR9BE-NEXT: xsadddp f7, f7, f6
; PWR9BE-NEXT: xxswapd vs6, vs6
; PWR9BE-NEXT: xsadddp f6, f7, f6
; PWR9BE-NEXT: xsadddp f6, f6, f5
; PWR9BE-NEXT: xxswapd vs5, vs5
; PWR9BE-NEXT: xsadddp f5, f6, f5
; PWR9BE-NEXT: xsadddp f5, f5, f4
; PWR9BE-NEXT: xxswapd vs4, vs4
; PWR9BE-NEXT: xsadddp f4, f5, f4
; PWR9BE-NEXT: xsadddp f4, f4, f3
; PWR9BE-NEXT: xxswapd vs3, vs3
; PWR9BE-NEXT: xsadddp f3, f4, f3
; PWR9BE-NEXT: xsadddp f3, f3, f2
; PWR9BE-NEXT: xxswapd vs2, vs2
; PWR9BE-NEXT: xsadddp f2, f3, f2
; PWR9BE-NEXT: xsadddp f2, f2, f1
; PWR9BE-NEXT: xxswapd vs1, vs1
; PWR9BE-NEXT: xsadddp f1, f2, f1
; PWR9BE-NEXT: xsadddp f1, f1, f0
; PWR9BE-NEXT: xxswapd vs0, vs0
; PWR9BE-NEXT: xsadddp f1, f1, f0
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v64f64:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxswapd v18, v2
; PWR10LE-NEXT: lxv v17, 224(r1)
; PWR10LE-NEXT: lxv v16, 240(r1)
; PWR10LE-NEXT: xsadddp v2, v18, v2
; PWR10LE-NEXT: xxswapd v18, v3
; PWR10LE-NEXT: lxv v15, 256(r1)
; PWR10LE-NEXT: lxv v14, 272(r1)
; PWR10LE-NEXT: lxv v1, 288(r1)
; PWR10LE-NEXT: lxv v0, 304(r1)
; PWR10LE-NEXT: lxv vs13, 320(r1)
; PWR10LE-NEXT: lxv vs12, 336(r1)
; PWR10LE-NEXT: lxv vs11, 352(r1)
; PWR10LE-NEXT: lxv vs10, 368(r1)
; PWR10LE-NEXT: xsadddp v2, v2, v18
; PWR10LE-NEXT: lxv vs9, 384(r1)
; PWR10LE-NEXT: lxv vs8, 400(r1)
; PWR10LE-NEXT: lxv vs7, 416(r1)
; PWR10LE-NEXT: lxv vs6, 432(r1)
; PWR10LE-NEXT: lxv vs5, 448(r1)
; PWR10LE-NEXT: lxv vs4, 464(r1)
; PWR10LE-NEXT: lxv vs3, 480(r1)
; PWR10LE-NEXT: lxv vs2, 496(r1)
; PWR10LE-NEXT: lxv vs1, 512(r1)
; PWR10LE-NEXT: lxv vs0, 528(r1)
; PWR10LE-NEXT: xsadddp v2, v2, v3
; PWR10LE-NEXT: xxswapd v3, v4
; PWR10LE-NEXT: xsadddp v2, v2, v3
; PWR10LE-NEXT: xxswapd v3, v5
; PWR10LE-NEXT: xsadddp v2, v2, v4
; PWR10LE-NEXT: xsadddp v2, v2, v3
; PWR10LE-NEXT: xxswapd v3, v6
; PWR10LE-NEXT: xsadddp v2, v2, v5
; PWR10LE-NEXT: xsadddp v2, v2, v3
; PWR10LE-NEXT: xxswapd v3, v7
; PWR10LE-NEXT: xsadddp v2, v2, v6
; PWR10LE-NEXT: xsadddp v2, v2, v3
; PWR10LE-NEXT: xxswapd v3, v8
; PWR10LE-NEXT: xsadddp v2, v2, v7
; PWR10LE-NEXT: xsadddp v2, v2, v3
; PWR10LE-NEXT: xxswapd v3, v9
; PWR10LE-NEXT: xsadddp v2, v2, v8
; PWR10LE-NEXT: xsadddp v2, v2, v3
; PWR10LE-NEXT: xxswapd v3, v10
; PWR10LE-NEXT: xsadddp v2, v2, v9
; PWR10LE-NEXT: xsadddp v2, v2, v3
; PWR10LE-NEXT: xxswapd v3, v11
; PWR10LE-NEXT: xsadddp v2, v2, v10
; PWR10LE-NEXT: xsadddp v2, v2, v3
; PWR10LE-NEXT: xxswapd v3, v12
; PWR10LE-NEXT: xsadddp v2, v2, v11
; PWR10LE-NEXT: xsadddp v2, v2, v3
; PWR10LE-NEXT: xxswapd v3, v13
; PWR10LE-NEXT: xsadddp v2, v2, v12
; PWR10LE-NEXT: xsadddp v2, v2, v3
; PWR10LE-NEXT: xxswapd v3, v17
; PWR10LE-NEXT: xsadddp v2, v2, v13
; PWR10LE-NEXT: xsadddp v2, v2, v3
; PWR10LE-NEXT: xxswapd v3, v16
; PWR10LE-NEXT: xsadddp v2, v2, v17
; PWR10LE-NEXT: xsadddp v2, v2, v3
; PWR10LE-NEXT: xxswapd v3, v15
; PWR10LE-NEXT: xsadddp v2, v2, v16
; PWR10LE-NEXT: xsadddp v2, v2, v3
; PWR10LE-NEXT: xxswapd v3, v14
; PWR10LE-NEXT: xsadddp v2, v2, v15
; PWR10LE-NEXT: xsadddp v2, v2, v3
; PWR10LE-NEXT: xxswapd v3, v1
; PWR10LE-NEXT: xsadddp v2, v2, v14
; PWR10LE-NEXT: xsadddp v2, v2, v3
; PWR10LE-NEXT: xxswapd v3, v0
; PWR10LE-NEXT: xsadddp v2, v2, v1
; PWR10LE-NEXT: xsadddp v2, v2, v3
; PWR10LE-NEXT: xxswapd v3, vs13
; PWR10LE-NEXT: xsadddp v2, v2, v0
; PWR10LE-NEXT: xsadddp v2, v2, v3
; PWR10LE-NEXT: xsadddp f13, v2, f13
; PWR10LE-NEXT: xxswapd v2, vs12
; PWR10LE-NEXT: xsadddp f13, f13, v2
; PWR10LE-NEXT: xsadddp f12, f13, f12
; PWR10LE-NEXT: xxswapd vs13, vs11
; PWR10LE-NEXT: xsadddp f12, f12, f13
; PWR10LE-NEXT: xsadddp f11, f12, f11
; PWR10LE-NEXT: xxswapd vs12, vs10
; PWR10LE-NEXT: xsadddp f11, f11, f12
; PWR10LE-NEXT: xsadddp f10, f11, f10
; PWR10LE-NEXT: xxswapd vs11, vs9
; PWR10LE-NEXT: xsadddp f10, f10, f11
; PWR10LE-NEXT: xsadddp f9, f10, f9
; PWR10LE-NEXT: xxswapd vs10, vs8
; PWR10LE-NEXT: xsadddp f9, f9, f10
; PWR10LE-NEXT: xsadddp f8, f9, f8
; PWR10LE-NEXT: xxswapd vs9, vs7
; PWR10LE-NEXT: xsadddp f8, f8, f9
; PWR10LE-NEXT: xsadddp f7, f8, f7
; PWR10LE-NEXT: xxswapd vs8, vs6
; PWR10LE-NEXT: xsadddp f7, f7, f8
; PWR10LE-NEXT: xsadddp f6, f7, f6
; PWR10LE-NEXT: xxswapd vs7, vs5
; PWR10LE-NEXT: xsadddp f6, f6, f7
; PWR10LE-NEXT: xsadddp f5, f6, f5
; PWR10LE-NEXT: xxswapd vs6, vs4
; PWR10LE-NEXT: xsadddp f5, f5, f6
; PWR10LE-NEXT: xsadddp f4, f5, f4
; PWR10LE-NEXT: xxswapd vs5, vs3
; PWR10LE-NEXT: xsadddp f4, f4, f5
; PWR10LE-NEXT: xsadddp f3, f4, f3
; PWR10LE-NEXT: xxswapd vs4, vs2
; PWR10LE-NEXT: xsadddp f3, f3, f4
; PWR10LE-NEXT: xsadddp f2, f3, f2
; PWR10LE-NEXT: xxswapd vs3, vs1
; PWR10LE-NEXT: xsadddp f2, f2, f3
; PWR10LE-NEXT: xsadddp f1, f2, f1
; PWR10LE-NEXT: xxswapd vs2, vs0
; PWR10LE-NEXT: xsadddp f1, f1, f2
; PWR10LE-NEXT: xsadddp f1, f1, f0
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v64f64:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xxswapd v18, v2
; PWR10BE-NEXT: lxv v17, 240(r1)
; PWR10BE-NEXT: lxv v16, 256(r1)
; PWR10BE-NEXT: xsadddp v2, v2, v18
; PWR10BE-NEXT: lxv v15, 272(r1)
; PWR10BE-NEXT: lxv v14, 288(r1)
; PWR10BE-NEXT: lxv v1, 304(r1)
; PWR10BE-NEXT: lxv v0, 320(r1)
; PWR10BE-NEXT: lxv vs13, 336(r1)
; PWR10BE-NEXT: lxv vs12, 352(r1)
; PWR10BE-NEXT: lxv vs11, 368(r1)
; PWR10BE-NEXT: lxv vs10, 384(r1)
; PWR10BE-NEXT: xsadddp v2, v2, v3
; PWR10BE-NEXT: xxswapd v3, v3
; PWR10BE-NEXT: lxv vs9, 400(r1)
; PWR10BE-NEXT: lxv vs8, 416(r1)
; PWR10BE-NEXT: lxv vs7, 432(r1)
; PWR10BE-NEXT: lxv vs6, 448(r1)
; PWR10BE-NEXT: lxv vs5, 464(r1)
; PWR10BE-NEXT: lxv vs4, 480(r1)
; PWR10BE-NEXT: lxv vs3, 496(r1)
; PWR10BE-NEXT: lxv vs2, 512(r1)
; PWR10BE-NEXT: lxv vs1, 528(r1)
; PWR10BE-NEXT: lxv vs0, 544(r1)
; PWR10BE-NEXT: xsadddp v2, v2, v3
; PWR10BE-NEXT: xxswapd v3, v4
; PWR10BE-NEXT: xsadddp v2, v2, v4
; PWR10BE-NEXT: xsadddp v2, v2, v3
; PWR10BE-NEXT: xxswapd v3, v5
; PWR10BE-NEXT: xsadddp v2, v2, v5
; PWR10BE-NEXT: xsadddp v2, v2, v3
; PWR10BE-NEXT: xxswapd v3, v6
; PWR10BE-NEXT: xsadddp v2, v2, v6
; PWR10BE-NEXT: xsadddp v2, v2, v3
; PWR10BE-NEXT: xxswapd v3, v7
; PWR10BE-NEXT: xsadddp v2, v2, v7
; PWR10BE-NEXT: xsadddp v2, v2, v3
; PWR10BE-NEXT: xxswapd v3, v8
; PWR10BE-NEXT: xsadddp v2, v2, v8
; PWR10BE-NEXT: xsadddp v2, v2, v3
; PWR10BE-NEXT: xxswapd v3, v9
; PWR10BE-NEXT: xsadddp v2, v2, v9
; PWR10BE-NEXT: xsadddp v2, v2, v3
; PWR10BE-NEXT: xxswapd v3, v10
; PWR10BE-NEXT: xsadddp v2, v2, v10
; PWR10BE-NEXT: xsadddp v2, v2, v3
; PWR10BE-NEXT: xxswapd v3, v11
; PWR10BE-NEXT: xsadddp v2, v2, v11
; PWR10BE-NEXT: xsadddp v2, v2, v3
; PWR10BE-NEXT: xxswapd v3, v12
; PWR10BE-NEXT: xsadddp v2, v2, v12
; PWR10BE-NEXT: xsadddp v2, v2, v3
; PWR10BE-NEXT: xxswapd v3, v13
; PWR10BE-NEXT: xsadddp v2, v2, v13
; PWR10BE-NEXT: xsadddp v2, v2, v3
; PWR10BE-NEXT: xxswapd v3, v17
; PWR10BE-NEXT: xsadddp v2, v2, v17
; PWR10BE-NEXT: xsadddp v2, v2, v3
; PWR10BE-NEXT: xxswapd v3, v16
; PWR10BE-NEXT: xsadddp v2, v2, v16
; PWR10BE-NEXT: xsadddp v2, v2, v3
; PWR10BE-NEXT: xxswapd v3, v15
; PWR10BE-NEXT: xsadddp v2, v2, v15
; PWR10BE-NEXT: xsadddp v2, v2, v3
; PWR10BE-NEXT: xxswapd v3, v14
; PWR10BE-NEXT: xsadddp v2, v2, v14
; PWR10BE-NEXT: xsadddp v2, v2, v3
; PWR10BE-NEXT: xxswapd v3, v1
; PWR10BE-NEXT: xsadddp v2, v2, v1
; PWR10BE-NEXT: xsadddp v2, v2, v3
; PWR10BE-NEXT: xxswapd v3, v0
; PWR10BE-NEXT: xsadddp v2, v2, v0
; PWR10BE-NEXT: xsadddp v2, v2, v3
; PWR10BE-NEXT: xsadddp v2, v2, f13
; PWR10BE-NEXT: xxswapd vs13, vs13
; PWR10BE-NEXT: xsadddp f13, v2, f13
; PWR10BE-NEXT: xsadddp f13, f13, f12
; PWR10BE-NEXT: xxswapd vs12, vs12
; PWR10BE-NEXT: xsadddp f12, f13, f12
; PWR10BE-NEXT: xsadddp f12, f12, f11
; PWR10BE-NEXT: xxswapd vs11, vs11
; PWR10BE-NEXT: xsadddp f11, f12, f11
; PWR10BE-NEXT: xsadddp f11, f11, f10
; PWR10BE-NEXT: xxswapd vs10, vs10
; PWR10BE-NEXT: xsadddp f10, f11, f10
; PWR10BE-NEXT: xsadddp f10, f10, f9
; PWR10BE-NEXT: xxswapd vs9, vs9
; PWR10BE-NEXT: xsadddp f9, f10, f9
; PWR10BE-NEXT: xsadddp f9, f9, f8
; PWR10BE-NEXT: xxswapd vs8, vs8
; PWR10BE-NEXT: xsadddp f8, f9, f8
; PWR10BE-NEXT: xsadddp f8, f8, f7
; PWR10BE-NEXT: xxswapd vs7, vs7
; PWR10BE-NEXT: xsadddp f7, f8, f7
; PWR10BE-NEXT: xsadddp f7, f7, f6
; PWR10BE-NEXT: xxswapd vs6, vs6
; PWR10BE-NEXT: xsadddp f6, f7, f6
; PWR10BE-NEXT: xsadddp f6, f6, f5
; PWR10BE-NEXT: xxswapd vs5, vs5
; PWR10BE-NEXT: xsadddp f5, f6, f5
; PWR10BE-NEXT: xsadddp f5, f5, f4
; PWR10BE-NEXT: xxswapd vs4, vs4
; PWR10BE-NEXT: xsadddp f4, f5, f4
; PWR10BE-NEXT: xsadddp f4, f4, f3
; PWR10BE-NEXT: xxswapd vs3, vs3
; PWR10BE-NEXT: xsadddp f3, f4, f3
; PWR10BE-NEXT: xsadddp f3, f3, f2
; PWR10BE-NEXT: xxswapd vs2, vs2
; PWR10BE-NEXT: xsadddp f2, f3, f2
; PWR10BE-NEXT: xsadddp f2, f2, f1
; PWR10BE-NEXT: xxswapd vs1, vs1
; PWR10BE-NEXT: xsadddp f1, f2, f1
; PWR10BE-NEXT: xsadddp f1, f1, f0
; PWR10BE-NEXT: xxswapd vs0, vs0
; PWR10BE-NEXT: xsadddp f1, f1, f0
; PWR10BE-NEXT: blr
entry:
%0 = call double @llvm.vector.reduce.fadd.v64f64(double -0.000000e+00, <64 x double> %a)
ret double %0
}
define dso_local double @v64f64_b(<64 x double> %a, double %b) local_unnamed_addr #0 {
; PWR9LE-LABEL: v64f64_b:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: xxswapd v19, v2
; PWR9LE-NEXT: lxv v18, 224(r1)
; PWR9LE-NEXT: lxv v17, 240(r1)
; PWR9LE-NEXT: lxv v16, 256(r1)
; PWR9LE-NEXT: lxv v15, 272(r1)
; PWR9LE-NEXT: xsadddp f1, f1, v19
; PWR9LE-NEXT: lxv v14, 288(r1)
; PWR9LE-NEXT: lxv v1, 304(r1)
; PWR9LE-NEXT: lxv v0, 320(r1)
; PWR9LE-NEXT: lxv vs13, 336(r1)
; PWR9LE-NEXT: lxv vs12, 352(r1)
; PWR9LE-NEXT: lxv vs11, 368(r1)
; PWR9LE-NEXT: lxv vs10, 384(r1)
; PWR9LE-NEXT: lxv vs9, 400(r1)
; PWR9LE-NEXT: lxv vs8, 416(r1)
; PWR9LE-NEXT: lxv vs7, 432(r1)
; PWR9LE-NEXT: lxv vs6, 448(r1)
; PWR9LE-NEXT: lxv vs5, 464(r1)
; PWR9LE-NEXT: xsadddp f1, f1, v2
; PWR9LE-NEXT: xxswapd v2, v3
; PWR9LE-NEXT: lxv vs4, 480(r1)
; PWR9LE-NEXT: lxv vs3, 496(r1)
; PWR9LE-NEXT: lxv vs2, 512(r1)
; PWR9LE-NEXT: lxv vs0, 528(r1)
; PWR9LE-NEXT: xsadddp f1, f1, v2
; PWR9LE-NEXT: xxswapd v2, v4
; PWR9LE-NEXT: xsadddp f1, f1, v3
; PWR9LE-NEXT: xsadddp f1, f1, v2
; PWR9LE-NEXT: xxswapd v2, v5
; PWR9LE-NEXT: xsadddp f1, f1, v4
; PWR9LE-NEXT: xsadddp f1, f1, v2
; PWR9LE-NEXT: xxswapd v2, v6
; PWR9LE-NEXT: xsadddp f1, f1, v5
; PWR9LE-NEXT: xsadddp f1, f1, v2
; PWR9LE-NEXT: xxswapd v2, v7
; PWR9LE-NEXT: xsadddp f1, f1, v6
; PWR9LE-NEXT: xsadddp f1, f1, v2
; PWR9LE-NEXT: xxswapd v2, v8
; PWR9LE-NEXT: xsadddp f1, f1, v7
; PWR9LE-NEXT: xsadddp f1, f1, v2
; PWR9LE-NEXT: xxswapd v2, v9
; PWR9LE-NEXT: xsadddp f1, f1, v8
; PWR9LE-NEXT: xsadddp f1, f1, v2
; PWR9LE-NEXT: xxswapd v2, v10
; PWR9LE-NEXT: xsadddp f1, f1, v9
; PWR9LE-NEXT: xsadddp f1, f1, v2
; PWR9LE-NEXT: xxswapd v2, v11
; PWR9LE-NEXT: xsadddp f1, f1, v10
; PWR9LE-NEXT: xsadddp f1, f1, v2
; PWR9LE-NEXT: xxswapd v2, v12
; PWR9LE-NEXT: xsadddp f1, f1, v11
; PWR9LE-NEXT: xsadddp f1, f1, v2
; PWR9LE-NEXT: xxswapd v2, v13
; PWR9LE-NEXT: xsadddp f1, f1, v12
; PWR9LE-NEXT: xsadddp f1, f1, v2
; PWR9LE-NEXT: xxswapd v2, v18
; PWR9LE-NEXT: xsadddp f1, f1, v13
; PWR9LE-NEXT: xsadddp f1, f1, v2
; PWR9LE-NEXT: xxswapd v2, v17
; PWR9LE-NEXT: xsadddp f1, f1, v18
; PWR9LE-NEXT: xsadddp f1, f1, v2
; PWR9LE-NEXT: xxswapd v2, v16
; PWR9LE-NEXT: xsadddp f1, f1, v17
; PWR9LE-NEXT: xsadddp f1, f1, v2
; PWR9LE-NEXT: xxswapd v2, v15
; PWR9LE-NEXT: xsadddp f1, f1, v16
; PWR9LE-NEXT: xsadddp f1, f1, v2
; PWR9LE-NEXT: xxswapd v2, v14
; PWR9LE-NEXT: xsadddp f1, f1, v15
; PWR9LE-NEXT: xsadddp f1, f1, v2
; PWR9LE-NEXT: xxswapd v2, v1
; PWR9LE-NEXT: xsadddp f1, f1, v14
; PWR9LE-NEXT: xsadddp f1, f1, v2
; PWR9LE-NEXT: xxswapd v2, v0
; PWR9LE-NEXT: xsadddp f1, f1, v1
; PWR9LE-NEXT: xsadddp f1, f1, v2
; PWR9LE-NEXT: xxswapd v2, vs13
; PWR9LE-NEXT: xsadddp f1, f1, v0
; PWR9LE-NEXT: xsadddp f1, f1, v2
; PWR9LE-NEXT: xsadddp f1, f1, f13
; PWR9LE-NEXT: xxswapd vs13, vs12
; PWR9LE-NEXT: xsadddp f1, f1, f13
; PWR9LE-NEXT: xsadddp f1, f1, f12
; PWR9LE-NEXT: xxswapd vs12, vs11
; PWR9LE-NEXT: xsadddp f1, f1, f12
; PWR9LE-NEXT: xsadddp f1, f1, f11
; PWR9LE-NEXT: xxswapd vs11, vs10
; PWR9LE-NEXT: xsadddp f1, f1, f11
; PWR9LE-NEXT: xsadddp f1, f1, f10
; PWR9LE-NEXT: xxswapd vs10, vs9
; PWR9LE-NEXT: xsadddp f1, f1, f10
; PWR9LE-NEXT: xsadddp f1, f1, f9
; PWR9LE-NEXT: xxswapd vs9, vs8
; PWR9LE-NEXT: xsadddp f1, f1, f9
; PWR9LE-NEXT: xsadddp f1, f1, f8
; PWR9LE-NEXT: xxswapd vs8, vs7
; PWR9LE-NEXT: xsadddp f1, f1, f8
; PWR9LE-NEXT: xsadddp f1, f1, f7
; PWR9LE-NEXT: xxswapd vs7, vs6
; PWR9LE-NEXT: xsadddp f1, f1, f7
; PWR9LE-NEXT: xsadddp f1, f1, f6
; PWR9LE-NEXT: xxswapd vs6, vs5
; PWR9LE-NEXT: xsadddp f1, f1, f6
; PWR9LE-NEXT: xsadddp f1, f1, f5
; PWR9LE-NEXT: xxswapd vs5, vs4
; PWR9LE-NEXT: xsadddp f1, f1, f5
; PWR9LE-NEXT: xsadddp f1, f1, f4
; PWR9LE-NEXT: xxswapd vs4, vs3
; PWR9LE-NEXT: xsadddp f1, f1, f4
; PWR9LE-NEXT: xsadddp f1, f1, f3
; PWR9LE-NEXT: xxswapd vs3, vs2
; PWR9LE-NEXT: xsadddp f1, f1, f3
; PWR9LE-NEXT: xsadddp f1, f1, f2
; PWR9LE-NEXT: xxswapd vs2, vs0
; PWR9LE-NEXT: xsadddp f1, f1, f2
; PWR9LE-NEXT: xsadddp f1, f1, f0
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v64f64_b:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: xsadddp f1, f1, v2
; PWR9BE-NEXT: xxswapd v2, v2
; PWR9BE-NEXT: lxv v18, 240(r1)
; PWR9BE-NEXT: lxv v17, 256(r1)
; PWR9BE-NEXT: lxv v16, 272(r1)
; PWR9BE-NEXT: lxv v15, 288(r1)
; PWR9BE-NEXT: lxv v14, 304(r1)
; PWR9BE-NEXT: xsadddp f1, f1, v2
; PWR9BE-NEXT: xxswapd v2, v3
; PWR9BE-NEXT: lxv v1, 320(r1)
; PWR9BE-NEXT: lxv v0, 336(r1)
; PWR9BE-NEXT: lxv vs13, 352(r1)
; PWR9BE-NEXT: lxv vs12, 368(r1)
; PWR9BE-NEXT: lxv vs11, 384(r1)
; PWR9BE-NEXT: lxv vs10, 400(r1)
; PWR9BE-NEXT: lxv vs9, 416(r1)
; PWR9BE-NEXT: lxv vs8, 432(r1)
; PWR9BE-NEXT: lxv vs7, 448(r1)
; PWR9BE-NEXT: lxv vs6, 464(r1)
; PWR9BE-NEXT: lxv vs5, 480(r1)
; PWR9BE-NEXT: lxv vs4, 496(r1)
; PWR9BE-NEXT: lxv vs3, 512(r1)
; PWR9BE-NEXT: lxv vs2, 528(r1)
; PWR9BE-NEXT: lxv vs0, 544(r1)
; PWR9BE-NEXT: xsadddp f1, f1, v3
; PWR9BE-NEXT: xsadddp f1, f1, v2
; PWR9BE-NEXT: xxswapd v2, v4
; PWR9BE-NEXT: xsadddp f1, f1, v4
; PWR9BE-NEXT: xsadddp f1, f1, v2
; PWR9BE-NEXT: xxswapd v2, v5
; PWR9BE-NEXT: xsadddp f1, f1, v5
; PWR9BE-NEXT: xsadddp f1, f1, v2
; PWR9BE-NEXT: xxswapd v2, v6
; PWR9BE-NEXT: xsadddp f1, f1, v6
; PWR9BE-NEXT: xsadddp f1, f1, v2
; PWR9BE-NEXT: xxswapd v2, v7
; PWR9BE-NEXT: xsadddp f1, f1, v7
; PWR9BE-NEXT: xsadddp f1, f1, v2
; PWR9BE-NEXT: xxswapd v2, v8
; PWR9BE-NEXT: xsadddp f1, f1, v8
; PWR9BE-NEXT: xsadddp f1, f1, v2
; PWR9BE-NEXT: xxswapd v2, v9
; PWR9BE-NEXT: xsadddp f1, f1, v9
; PWR9BE-NEXT: xsadddp f1, f1, v2
; PWR9BE-NEXT: xxswapd v2, v10
; PWR9BE-NEXT: xsadddp f1, f1, v10
; PWR9BE-NEXT: xsadddp f1, f1, v2
; PWR9BE-NEXT: xxswapd v2, v11
; PWR9BE-NEXT: xsadddp f1, f1, v11
; PWR9BE-NEXT: xsadddp f1, f1, v2
; PWR9BE-NEXT: xxswapd v2, v12
; PWR9BE-NEXT: xsadddp f1, f1, v12
; PWR9BE-NEXT: xsadddp f1, f1, v2
; PWR9BE-NEXT: xxswapd v2, v13
; PWR9BE-NEXT: xsadddp f1, f1, v13
; PWR9BE-NEXT: xsadddp f1, f1, v2
; PWR9BE-NEXT: xxswapd v2, v18
; PWR9BE-NEXT: xsadddp f1, f1, v18
; PWR9BE-NEXT: xsadddp f1, f1, v2
; PWR9BE-NEXT: xxswapd v2, v17
; PWR9BE-NEXT: xsadddp f1, f1, v17
; PWR9BE-NEXT: xsadddp f1, f1, v2
; PWR9BE-NEXT: xxswapd v2, v16
; PWR9BE-NEXT: xsadddp f1, f1, v16
; PWR9BE-NEXT: xsadddp f1, f1, v2
; PWR9BE-NEXT: xxswapd v2, v15
; PWR9BE-NEXT: xsadddp f1, f1, v15
; PWR9BE-NEXT: xsadddp f1, f1, v2
; PWR9BE-NEXT: xxswapd v2, v14
; PWR9BE-NEXT: xsadddp f1, f1, v14
; PWR9BE-NEXT: xsadddp f1, f1, v2
; PWR9BE-NEXT: xxswapd v2, v1
; PWR9BE-NEXT: xsadddp f1, f1, v1
; PWR9BE-NEXT: xsadddp f1, f1, v2
; PWR9BE-NEXT: xxswapd v2, v0
; PWR9BE-NEXT: xsadddp f1, f1, v0
; PWR9BE-NEXT: xsadddp f1, f1, v2
; PWR9BE-NEXT: xsadddp f1, f1, f13
; PWR9BE-NEXT: xxswapd vs13, vs13
; PWR9BE-NEXT: xsadddp f1, f1, f13
; PWR9BE-NEXT: xsadddp f1, f1, f12
; PWR9BE-NEXT: xxswapd vs12, vs12
; PWR9BE-NEXT: xsadddp f1, f1, f12
; PWR9BE-NEXT: xsadddp f1, f1, f11
; PWR9BE-NEXT: xxswapd vs11, vs11
; PWR9BE-NEXT: xsadddp f1, f1, f11
; PWR9BE-NEXT: xsadddp f1, f1, f10
; PWR9BE-NEXT: xxswapd vs10, vs10
; PWR9BE-NEXT: xsadddp f1, f1, f10
; PWR9BE-NEXT: xsadddp f1, f1, f9
; PWR9BE-NEXT: xxswapd vs9, vs9
; PWR9BE-NEXT: xsadddp f1, f1, f9
; PWR9BE-NEXT: xsadddp f1, f1, f8
; PWR9BE-NEXT: xxswapd vs8, vs8
; PWR9BE-NEXT: xsadddp f1, f1, f8
; PWR9BE-NEXT: xsadddp f1, f1, f7
; PWR9BE-NEXT: xxswapd vs7, vs7
; PWR9BE-NEXT: xsadddp f1, f1, f7
; PWR9BE-NEXT: xsadddp f1, f1, f6
; PWR9BE-NEXT: xxswapd vs6, vs6
; PWR9BE-NEXT: xsadddp f1, f1, f6
; PWR9BE-NEXT: xsadddp f1, f1, f5
; PWR9BE-NEXT: xxswapd vs5, vs5
; PWR9BE-NEXT: xsadddp f1, f1, f5
; PWR9BE-NEXT: xsadddp f1, f1, f4
; PWR9BE-NEXT: xxswapd vs4, vs4
; PWR9BE-NEXT: xsadddp f1, f1, f4
; PWR9BE-NEXT: xsadddp f1, f1, f3
; PWR9BE-NEXT: xxswapd vs3, vs3
; PWR9BE-NEXT: xsadddp f1, f1, f3
; PWR9BE-NEXT: xsadddp f1, f1, f2
; PWR9BE-NEXT: xxswapd vs2, vs2
; PWR9BE-NEXT: xsadddp f1, f1, f2
; PWR9BE-NEXT: xsadddp f1, f1, f0
; PWR9BE-NEXT: xxswapd vs0, vs0
; PWR9BE-NEXT: xsadddp f1, f1, f0
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v64f64_b:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: xxswapd v19, v2
; PWR10LE-NEXT: lxv v18, 224(r1)
; PWR10LE-NEXT: lxv v17, 240(r1)
; PWR10LE-NEXT: xsadddp f1, f1, v19
; PWR10LE-NEXT: lxv v16, 256(r1)
; PWR10LE-NEXT: lxv v15, 272(r1)
; PWR10LE-NEXT: lxv v14, 288(r1)
; PWR10LE-NEXT: lxv v1, 304(r1)
; PWR10LE-NEXT: lxv v0, 320(r1)
; PWR10LE-NEXT: lxv vs13, 336(r1)
; PWR10LE-NEXT: lxv vs12, 352(r1)
; PWR10LE-NEXT: lxv vs11, 368(r1)
; PWR10LE-NEXT: xsadddp f1, f1, v2
; PWR10LE-NEXT: xxswapd v2, v3
; PWR10LE-NEXT: lxv vs10, 384(r1)
; PWR10LE-NEXT: lxv vs9, 400(r1)
; PWR10LE-NEXT: lxv vs8, 416(r1)
; PWR10LE-NEXT: lxv vs7, 432(r1)
; PWR10LE-NEXT: lxv vs6, 448(r1)
; PWR10LE-NEXT: lxv vs5, 464(r1)
; PWR10LE-NEXT: lxv vs4, 480(r1)
; PWR10LE-NEXT: lxv vs3, 496(r1)
; PWR10LE-NEXT: lxv vs2, 512(r1)
; PWR10LE-NEXT: lxv vs0, 528(r1)
; PWR10LE-NEXT: xsadddp f1, f1, v2
; PWR10LE-NEXT: xxswapd v2, v4
; PWR10LE-NEXT: xsadddp f1, f1, v3
; PWR10LE-NEXT: xsadddp f1, f1, v2
; PWR10LE-NEXT: xxswapd v2, v5
; PWR10LE-NEXT: xsadddp f1, f1, v4
; PWR10LE-NEXT: xsadddp f1, f1, v2
; PWR10LE-NEXT: xxswapd v2, v6
; PWR10LE-NEXT: xsadddp f1, f1, v5
; PWR10LE-NEXT: xsadddp f1, f1, v2
; PWR10LE-NEXT: xxswapd v2, v7
; PWR10LE-NEXT: xsadddp f1, f1, v6
; PWR10LE-NEXT: xsadddp f1, f1, v2
; PWR10LE-NEXT: xxswapd v2, v8
; PWR10LE-NEXT: xsadddp f1, f1, v7
; PWR10LE-NEXT: xsadddp f1, f1, v2
; PWR10LE-NEXT: xxswapd v2, v9
; PWR10LE-NEXT: xsadddp f1, f1, v8
; PWR10LE-NEXT: xsadddp f1, f1, v2
; PWR10LE-NEXT: xxswapd v2, v10
; PWR10LE-NEXT: xsadddp f1, f1, v9
; PWR10LE-NEXT: xsadddp f1, f1, v2
; PWR10LE-NEXT: xxswapd v2, v11
; PWR10LE-NEXT: xsadddp f1, f1, v10
; PWR10LE-NEXT: xsadddp f1, f1, v2
; PWR10LE-NEXT: xxswapd v2, v12
; PWR10LE-NEXT: xsadddp f1, f1, v11
; PWR10LE-NEXT: xsadddp f1, f1, v2
; PWR10LE-NEXT: xxswapd v2, v13
; PWR10LE-NEXT: xsadddp f1, f1, v12
; PWR10LE-NEXT: xsadddp f1, f1, v2
; PWR10LE-NEXT: xxswapd v2, v18
; PWR10LE-NEXT: xsadddp f1, f1, v13
; PWR10LE-NEXT: xsadddp f1, f1, v2
; PWR10LE-NEXT: xxswapd v2, v17
; PWR10LE-NEXT: xsadddp f1, f1, v18
; PWR10LE-NEXT: xsadddp f1, f1, v2
; PWR10LE-NEXT: xxswapd v2, v16
; PWR10LE-NEXT: xsadddp f1, f1, v17
; PWR10LE-NEXT: xsadddp f1, f1, v2
; PWR10LE-NEXT: xxswapd v2, v15
; PWR10LE-NEXT: xsadddp f1, f1, v16
; PWR10LE-NEXT: xsadddp f1, f1, v2
; PWR10LE-NEXT: xxswapd v2, v14
; PWR10LE-NEXT: xsadddp f1, f1, v15
; PWR10LE-NEXT: xsadddp f1, f1, v2
; PWR10LE-NEXT: xxswapd v2, v1
; PWR10LE-NEXT: xsadddp f1, f1, v14
; PWR10LE-NEXT: xsadddp f1, f1, v2
; PWR10LE-NEXT: xxswapd v2, v0
; PWR10LE-NEXT: xsadddp f1, f1, v1
; PWR10LE-NEXT: xsadddp f1, f1, v2
; PWR10LE-NEXT: xxswapd v2, vs13
; PWR10LE-NEXT: xsadddp f1, f1, v0
; PWR10LE-NEXT: xsadddp f1, f1, v2
; PWR10LE-NEXT: xsadddp f1, f1, f13
; PWR10LE-NEXT: xxswapd vs13, vs12
; PWR10LE-NEXT: xsadddp f1, f1, f13
; PWR10LE-NEXT: xsadddp f1, f1, f12
; PWR10LE-NEXT: xxswapd vs12, vs11
; PWR10LE-NEXT: xsadddp f1, f1, f12
; PWR10LE-NEXT: xsadddp f1, f1, f11
; PWR10LE-NEXT: xxswapd vs11, vs10
; PWR10LE-NEXT: xsadddp f1, f1, f11
; PWR10LE-NEXT: xsadddp f1, f1, f10
; PWR10LE-NEXT: xxswapd vs10, vs9
; PWR10LE-NEXT: xsadddp f1, f1, f10
; PWR10LE-NEXT: xsadddp f1, f1, f9
; PWR10LE-NEXT: xxswapd vs9, vs8
; PWR10LE-NEXT: xsadddp f1, f1, f9
; PWR10LE-NEXT: xsadddp f1, f1, f8
; PWR10LE-NEXT: xxswapd vs8, vs7
; PWR10LE-NEXT: xsadddp f1, f1, f8
; PWR10LE-NEXT: xsadddp f1, f1, f7
; PWR10LE-NEXT: xxswapd vs7, vs6
; PWR10LE-NEXT: xsadddp f1, f1, f7
; PWR10LE-NEXT: xsadddp f1, f1, f6
; PWR10LE-NEXT: xxswapd vs6, vs5
; PWR10LE-NEXT: xsadddp f1, f1, f6
; PWR10LE-NEXT: xsadddp f1, f1, f5
; PWR10LE-NEXT: xxswapd vs5, vs4
; PWR10LE-NEXT: xsadddp f1, f1, f5
; PWR10LE-NEXT: xsadddp f1, f1, f4
; PWR10LE-NEXT: xxswapd vs4, vs3
; PWR10LE-NEXT: xsadddp f1, f1, f4
; PWR10LE-NEXT: xsadddp f1, f1, f3
; PWR10LE-NEXT: xxswapd vs3, vs2
; PWR10LE-NEXT: xsadddp f1, f1, f3
; PWR10LE-NEXT: xsadddp f1, f1, f2
; PWR10LE-NEXT: xxswapd vs2, vs0
; PWR10LE-NEXT: xsadddp f1, f1, f2
; PWR10LE-NEXT: xsadddp f1, f1, f0
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v64f64_b:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: xsadddp f1, f1, v2
; PWR10BE-NEXT: xxswapd v2, v2
; PWR10BE-NEXT: lxv v18, 240(r1)
; PWR10BE-NEXT: lxv v17, 256(r1)
; PWR10BE-NEXT: xsadddp f1, f1, v2
; PWR10BE-NEXT: xxswapd v2, v3
; PWR10BE-NEXT: lxv v16, 272(r1)
; PWR10BE-NEXT: lxv v15, 288(r1)
; PWR10BE-NEXT: lxv v14, 304(r1)
; PWR10BE-NEXT: lxv v1, 320(r1)
; PWR10BE-NEXT: lxv v0, 336(r1)
; PWR10BE-NEXT: lxv vs13, 352(r1)
; PWR10BE-NEXT: lxv vs12, 368(r1)
; PWR10BE-NEXT: lxv vs11, 384(r1)
; PWR10BE-NEXT: lxv vs10, 400(r1)
; PWR10BE-NEXT: lxv vs9, 416(r1)
; PWR10BE-NEXT: xsadddp f1, f1, v3
; PWR10BE-NEXT: lxv vs8, 432(r1)
; PWR10BE-NEXT: lxv vs7, 448(r1)
; PWR10BE-NEXT: lxv vs6, 464(r1)
; PWR10BE-NEXT: lxv vs5, 480(r1)
; PWR10BE-NEXT: lxv vs4, 496(r1)
; PWR10BE-NEXT: lxv vs3, 512(r1)
; PWR10BE-NEXT: lxv vs2, 528(r1)
; PWR10BE-NEXT: lxv vs0, 544(r1)
; PWR10BE-NEXT: xsadddp f1, f1, v2
; PWR10BE-NEXT: xxswapd v2, v4
; PWR10BE-NEXT: xsadddp f1, f1, v4
; PWR10BE-NEXT: xsadddp f1, f1, v2
; PWR10BE-NEXT: xxswapd v2, v5
; PWR10BE-NEXT: xsadddp f1, f1, v5
; PWR10BE-NEXT: xsadddp f1, f1, v2
; PWR10BE-NEXT: xxswapd v2, v6
; PWR10BE-NEXT: xsadddp f1, f1, v6
; PWR10BE-NEXT: xsadddp f1, f1, v2
; PWR10BE-NEXT: xxswapd v2, v7
; PWR10BE-NEXT: xsadddp f1, f1, v7
; PWR10BE-NEXT: xsadddp f1, f1, v2
; PWR10BE-NEXT: xxswapd v2, v8
; PWR10BE-NEXT: xsadddp f1, f1, v8
; PWR10BE-NEXT: xsadddp f1, f1, v2
; PWR10BE-NEXT: xxswapd v2, v9
; PWR10BE-NEXT: xsadddp f1, f1, v9
; PWR10BE-NEXT: xsadddp f1, f1, v2
; PWR10BE-NEXT: xxswapd v2, v10
; PWR10BE-NEXT: xsadddp f1, f1, v10
; PWR10BE-NEXT: xsadddp f1, f1, v2
; PWR10BE-NEXT: xxswapd v2, v11
; PWR10BE-NEXT: xsadddp f1, f1, v11
; PWR10BE-NEXT: xsadddp f1, f1, v2
; PWR10BE-NEXT: xxswapd v2, v12
; PWR10BE-NEXT: xsadddp f1, f1, v12
; PWR10BE-NEXT: xsadddp f1, f1, v2
; PWR10BE-NEXT: xxswapd v2, v13
; PWR10BE-NEXT: xsadddp f1, f1, v13
; PWR10BE-NEXT: xsadddp f1, f1, v2
; PWR10BE-NEXT: xxswapd v2, v18
; PWR10BE-NEXT: xsadddp f1, f1, v18
; PWR10BE-NEXT: xsadddp f1, f1, v2
; PWR10BE-NEXT: xxswapd v2, v17
; PWR10BE-NEXT: xsadddp f1, f1, v17
; PWR10BE-NEXT: xsadddp f1, f1, v2
; PWR10BE-NEXT: xxswapd v2, v16
; PWR10BE-NEXT: xsadddp f1, f1, v16
; PWR10BE-NEXT: xsadddp f1, f1, v2
; PWR10BE-NEXT: xxswapd v2, v15
; PWR10BE-NEXT: xsadddp f1, f1, v15
; PWR10BE-NEXT: xsadddp f1, f1, v2
; PWR10BE-NEXT: xxswapd v2, v14
; PWR10BE-NEXT: xsadddp f1, f1, v14
; PWR10BE-NEXT: xsadddp f1, f1, v2
; PWR10BE-NEXT: xxswapd v2, v1
; PWR10BE-NEXT: xsadddp f1, f1, v1
; PWR10BE-NEXT: xsadddp f1, f1, v2
; PWR10BE-NEXT: xxswapd v2, v0
; PWR10BE-NEXT: xsadddp f1, f1, v0
; PWR10BE-NEXT: xsadddp f1, f1, v2
; PWR10BE-NEXT: xsadddp f1, f1, f13
; PWR10BE-NEXT: xxswapd vs13, vs13
; PWR10BE-NEXT: xsadddp f1, f1, f13
; PWR10BE-NEXT: xsadddp f1, f1, f12
; PWR10BE-NEXT: xxswapd vs12, vs12
; PWR10BE-NEXT: xsadddp f1, f1, f12
; PWR10BE-NEXT: xsadddp f1, f1, f11
; PWR10BE-NEXT: xxswapd vs11, vs11
; PWR10BE-NEXT: xsadddp f1, f1, f11
; PWR10BE-NEXT: xsadddp f1, f1, f10
; PWR10BE-NEXT: xxswapd vs10, vs10
; PWR10BE-NEXT: xsadddp f1, f1, f10
; PWR10BE-NEXT: xsadddp f1, f1, f9
; PWR10BE-NEXT: xxswapd vs9, vs9
; PWR10BE-NEXT: xsadddp f1, f1, f9
; PWR10BE-NEXT: xsadddp f1, f1, f8
; PWR10BE-NEXT: xxswapd vs8, vs8
; PWR10BE-NEXT: xsadddp f1, f1, f8
; PWR10BE-NEXT: xsadddp f1, f1, f7
; PWR10BE-NEXT: xxswapd vs7, vs7
; PWR10BE-NEXT: xsadddp f1, f1, f7
; PWR10BE-NEXT: xsadddp f1, f1, f6
; PWR10BE-NEXT: xxswapd vs6, vs6
; PWR10BE-NEXT: xsadddp f1, f1, f6
; PWR10BE-NEXT: xsadddp f1, f1, f5
; PWR10BE-NEXT: xxswapd vs5, vs5
; PWR10BE-NEXT: xsadddp f1, f1, f5
; PWR10BE-NEXT: xsadddp f1, f1, f4
; PWR10BE-NEXT: xxswapd vs4, vs4
; PWR10BE-NEXT: xsadddp f1, f1, f4
; PWR10BE-NEXT: xsadddp f1, f1, f3
; PWR10BE-NEXT: xxswapd vs3, vs3
; PWR10BE-NEXT: xsadddp f1, f1, f3
; PWR10BE-NEXT: xsadddp f1, f1, f2
; PWR10BE-NEXT: xxswapd vs2, vs2
; PWR10BE-NEXT: xsadddp f1, f1, f2
; PWR10BE-NEXT: xsadddp f1, f1, f0
; PWR10BE-NEXT: xxswapd vs0, vs0
; PWR10BE-NEXT: xsadddp f1, f1, f0
; PWR10BE-NEXT: blr
entry:
%0 = call double @llvm.vector.reduce.fadd.v64f64(double %b, <64 x double> %a)
ret double %0
}
define dso_local double @v64f64_fast(<64 x double> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v64f64_fast:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: lxv vs0, 368(r1)
; PWR9LE-NEXT: lxv vs1, 496(r1)
; PWR9LE-NEXT: lxv vs2, 240(r1)
; PWR9LE-NEXT: lxv vs3, 304(r1)
; PWR9LE-NEXT: xvadddp vs3, v3, vs3
; PWR9LE-NEXT: lxv vs4, 432(r1)
; PWR9LE-NEXT: lxv vs5, 400(r1)
; PWR9LE-NEXT: lxv vs6, 528(r1)
; PWR9LE-NEXT: lxv vs7, 272(r1)
; PWR9LE-NEXT: lxv vs8, 336(r1)
; PWR9LE-NEXT: lxv vs9, 464(r1)
; PWR9LE-NEXT: lxv vs10, 352(r1)
; PWR9LE-NEXT: lxv vs11, 480(r1)
; PWR9LE-NEXT: lxv vs12, 224(r1)
; PWR9LE-NEXT: lxv vs13, 288(r1)
; PWR9LE-NEXT: lxv v0, 416(r1)
; PWR9LE-NEXT: lxv v1, 384(r1)
; PWR9LE-NEXT: lxv v14, 512(r1)
; PWR9LE-NEXT: lxv v15, 256(r1)
; PWR9LE-NEXT: lxv v16, 320(r1)
; PWR9LE-NEXT: lxv v17, 448(r1)
; PWR9LE-NEXT: xvadddp v12, v12, v17
; PWR9LE-NEXT: xvadddp v4, v4, v16
; PWR9LE-NEXT: xvadddp v14, v15, v14
; PWR9LE-NEXT: xvadddp v1, v8, v1
; PWR9LE-NEXT: xvadddp v0, v10, v0
; PWR9LE-NEXT: xvadddp vs13, v2, vs13
; PWR9LE-NEXT: xvadddp vs11, vs12, vs11
; PWR9LE-NEXT: xvadddp vs10, v6, vs10
; PWR9LE-NEXT: xvadddp vs9, v13, vs9
; PWR9LE-NEXT: xvadddp vs8, v5, vs8
; PWR9LE-NEXT: xvadddp vs6, vs7, vs6
; PWR9LE-NEXT: xvadddp vs5, v9, vs5
; PWR9LE-NEXT: xvadddp vs4, v11, vs4
; PWR9LE-NEXT: xvadddp vs1, vs2, vs1
; PWR9LE-NEXT: xvadddp vs0, v7, vs0
; PWR9LE-NEXT: xvadddp vs0, vs0, vs1
; PWR9LE-NEXT: xvadddp vs1, vs3, vs4
; PWR9LE-NEXT: xvadddp vs2, vs5, vs6
; PWR9LE-NEXT: xvadddp vs3, vs8, vs9
; PWR9LE-NEXT: xvadddp vs4, vs10, vs11
; PWR9LE-NEXT: xvadddp vs5, vs13, v0
; PWR9LE-NEXT: xvadddp vs6, v1, v14
; PWR9LE-NEXT: xvadddp vs7, v4, v12
; PWR9LE-NEXT: xvadddp vs6, vs7, vs6
; PWR9LE-NEXT: xvadddp vs4, vs5, vs4
; PWR9LE-NEXT: xvadddp vs2, vs3, vs2
; PWR9LE-NEXT: xvadddp vs0, vs1, vs0
; PWR9LE-NEXT: xvadddp vs0, vs0, vs2
; PWR9LE-NEXT: xvadddp vs1, vs4, vs6
; PWR9LE-NEXT: xvadddp vs0, vs1, vs0
; PWR9LE-NEXT: xxswapd vs1, vs0
; PWR9LE-NEXT: xvadddp vs0, vs0, vs1
; PWR9LE-NEXT: xxswapd vs1, vs0
; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v64f64_fast:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: lxv vs0, 384(r1)
; PWR9BE-NEXT: lxv vs1, 512(r1)
; PWR9BE-NEXT: lxv vs2, 256(r1)
; PWR9BE-NEXT: lxv vs3, 320(r1)
; PWR9BE-NEXT: xvadddp vs3, v3, vs3
; PWR9BE-NEXT: lxv vs4, 448(r1)
; PWR9BE-NEXT: lxv vs5, 416(r1)
; PWR9BE-NEXT: lxv vs6, 544(r1)
; PWR9BE-NEXT: lxv vs7, 288(r1)
; PWR9BE-NEXT: lxv vs8, 352(r1)
; PWR9BE-NEXT: lxv vs9, 480(r1)
; PWR9BE-NEXT: lxv vs10, 368(r1)
; PWR9BE-NEXT: lxv vs11, 496(r1)
; PWR9BE-NEXT: lxv vs12, 240(r1)
; PWR9BE-NEXT: lxv vs13, 304(r1)
; PWR9BE-NEXT: lxv v0, 432(r1)
; PWR9BE-NEXT: lxv v1, 400(r1)
; PWR9BE-NEXT: lxv v14, 528(r1)
; PWR9BE-NEXT: lxv v15, 272(r1)
; PWR9BE-NEXT: lxv v16, 336(r1)
; PWR9BE-NEXT: lxv v17, 464(r1)
; PWR9BE-NEXT: xvadddp v12, v12, v17
; PWR9BE-NEXT: xvadddp v4, v4, v16
; PWR9BE-NEXT: xvadddp v14, v15, v14
; PWR9BE-NEXT: xvadddp v1, v8, v1
; PWR9BE-NEXT: xvadddp v0, v10, v0
; PWR9BE-NEXT: xvadddp vs13, v2, vs13
; PWR9BE-NEXT: xvadddp vs11, vs12, vs11
; PWR9BE-NEXT: xvadddp vs10, v6, vs10
; PWR9BE-NEXT: xvadddp vs9, v13, vs9
; PWR9BE-NEXT: xvadddp vs8, v5, vs8
; PWR9BE-NEXT: xvadddp vs6, vs7, vs6
; PWR9BE-NEXT: xvadddp vs5, v9, vs5
; PWR9BE-NEXT: xvadddp vs4, v11, vs4
; PWR9BE-NEXT: xvadddp vs1, vs2, vs1
; PWR9BE-NEXT: xvadddp vs0, v7, vs0
; PWR9BE-NEXT: xvadddp vs0, vs0, vs1
; PWR9BE-NEXT: xvadddp vs1, vs3, vs4
; PWR9BE-NEXT: xvadddp vs2, vs5, vs6
; PWR9BE-NEXT: xvadddp vs3, vs8, vs9
; PWR9BE-NEXT: xvadddp vs4, vs10, vs11
; PWR9BE-NEXT: xvadddp vs5, vs13, v0
; PWR9BE-NEXT: xvadddp vs6, v1, v14
; PWR9BE-NEXT: xvadddp vs7, v4, v12
; PWR9BE-NEXT: xvadddp vs6, vs7, vs6
; PWR9BE-NEXT: xvadddp vs4, vs5, vs4
; PWR9BE-NEXT: xvadddp vs2, vs3, vs2
; PWR9BE-NEXT: xvadddp vs0, vs1, vs0
; PWR9BE-NEXT: xvadddp vs0, vs0, vs2
; PWR9BE-NEXT: xvadddp vs1, vs4, vs6
; PWR9BE-NEXT: xvadddp vs0, vs1, vs0
; PWR9BE-NEXT: xxswapd vs1, vs0
; PWR9BE-NEXT: xvadddp vs1, vs0, vs1
; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v64f64_fast:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: lxv vs0, 368(r1)
; PWR10LE-NEXT: lxv vs1, 496(r1)
; PWR10LE-NEXT: xvadddp vs0, v7, vs0
; PWR10LE-NEXT: lxv vs2, 240(r1)
; PWR10LE-NEXT: lxv vs3, 304(r1)
; PWR10LE-NEXT: lxv vs4, 432(r1)
; PWR10LE-NEXT: lxv vs5, 400(r1)
; PWR10LE-NEXT: lxv vs6, 528(r1)
; PWR10LE-NEXT: lxv vs7, 272(r1)
; PWR10LE-NEXT: lxv vs8, 336(r1)
; PWR10LE-NEXT: lxv vs9, 464(r1)
; PWR10LE-NEXT: lxv vs10, 352(r1)
; PWR10LE-NEXT: lxv vs11, 480(r1)
; PWR10LE-NEXT: lxv vs12, 224(r1)
; PWR10LE-NEXT: lxv vs13, 288(r1)
; PWR10LE-NEXT: xvadddp vs13, v2, vs13
; PWR10LE-NEXT: xvadddp vs11, vs12, vs11
; PWR10LE-NEXT: xvadddp vs10, v6, vs10
; PWR10LE-NEXT: xvadddp vs9, v13, vs9
; PWR10LE-NEXT: xvadddp vs8, v5, vs8
; PWR10LE-NEXT: xvadddp vs6, vs7, vs6
; PWR10LE-NEXT: xvadddp vs5, v9, vs5
; PWR10LE-NEXT: xvadddp vs4, v11, vs4
; PWR10LE-NEXT: xvadddp vs3, v3, vs3
; PWR10LE-NEXT: xvadddp vs1, vs2, vs1
; PWR10LE-NEXT: xvadddp vs0, vs0, vs1
; PWR10LE-NEXT: lxv v0, 416(r1)
; PWR10LE-NEXT: lxv v1, 384(r1)
; PWR10LE-NEXT: lxv v14, 512(r1)
; PWR10LE-NEXT: lxv v15, 256(r1)
; PWR10LE-NEXT: lxv v16, 320(r1)
; PWR10LE-NEXT: lxv v17, 448(r1)
; PWR10LE-NEXT: xvadddp v12, v12, v17
; PWR10LE-NEXT: xvadddp v4, v4, v16
; PWR10LE-NEXT: xvadddp v14, v15, v14
; PWR10LE-NEXT: xvadddp v1, v8, v1
; PWR10LE-NEXT: xvadddp v0, v10, v0
; PWR10LE-NEXT: xvadddp vs1, vs3, vs4
; PWR10LE-NEXT: xvadddp vs2, vs5, vs6
; PWR10LE-NEXT: xvadddp vs3, vs8, vs9
; PWR10LE-NEXT: xvadddp vs4, vs10, vs11
; PWR10LE-NEXT: xvadddp vs5, vs13, v0
; PWR10LE-NEXT: xvadddp vs6, v1, v14
; PWR10LE-NEXT: xvadddp vs7, v4, v12
; PWR10LE-NEXT: xvadddp vs6, vs7, vs6
; PWR10LE-NEXT: xvadddp vs4, vs5, vs4
; PWR10LE-NEXT: xvadddp vs2, vs3, vs2
; PWR10LE-NEXT: xvadddp vs0, vs1, vs0
; PWR10LE-NEXT: xvadddp vs0, vs0, vs2
; PWR10LE-NEXT: xvadddp vs1, vs4, vs6
; PWR10LE-NEXT: xvadddp vs0, vs1, vs0
; PWR10LE-NEXT: xxswapd vs1, vs0
; PWR10LE-NEXT: xvadddp vs0, vs0, vs1
; PWR10LE-NEXT: xxswapd vs1, vs0
; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v64f64_fast:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: lxv vs0, 384(r1)
; PWR10BE-NEXT: lxv vs1, 512(r1)
; PWR10BE-NEXT: xvadddp vs0, v7, vs0
; PWR10BE-NEXT: lxv vs2, 256(r1)
; PWR10BE-NEXT: lxv vs3, 320(r1)
; PWR10BE-NEXT: lxv vs4, 448(r1)
; PWR10BE-NEXT: lxv vs5, 416(r1)
; PWR10BE-NEXT: lxv vs6, 544(r1)
; PWR10BE-NEXT: lxv vs7, 288(r1)
; PWR10BE-NEXT: lxv vs8, 352(r1)
; PWR10BE-NEXT: lxv vs9, 480(r1)
; PWR10BE-NEXT: lxv vs10, 368(r1)
; PWR10BE-NEXT: lxv vs11, 496(r1)
; PWR10BE-NEXT: lxv vs12, 240(r1)
; PWR10BE-NEXT: lxv vs13, 304(r1)
; PWR10BE-NEXT: xvadddp vs13, v2, vs13
; PWR10BE-NEXT: xvadddp vs11, vs12, vs11
; PWR10BE-NEXT: xvadddp vs10, v6, vs10
; PWR10BE-NEXT: xvadddp vs9, v13, vs9
; PWR10BE-NEXT: xvadddp vs8, v5, vs8
; PWR10BE-NEXT: xvadddp vs6, vs7, vs6
; PWR10BE-NEXT: xvadddp vs5, v9, vs5
; PWR10BE-NEXT: xvadddp vs4, v11, vs4
; PWR10BE-NEXT: xvadddp vs3, v3, vs3
; PWR10BE-NEXT: xvadddp vs1, vs2, vs1
; PWR10BE-NEXT: xvadddp vs0, vs0, vs1
; PWR10BE-NEXT: lxv v0, 432(r1)
; PWR10BE-NEXT: lxv v1, 400(r1)
; PWR10BE-NEXT: lxv v14, 528(r1)
; PWR10BE-NEXT: lxv v15, 272(r1)
; PWR10BE-NEXT: lxv v16, 336(r1)
; PWR10BE-NEXT: lxv v17, 464(r1)
; PWR10BE-NEXT: xvadddp v12, v12, v17
; PWR10BE-NEXT: xvadddp v4, v4, v16
; PWR10BE-NEXT: xvadddp v14, v15, v14
; PWR10BE-NEXT: xvadddp v1, v8, v1
; PWR10BE-NEXT: xvadddp v0, v10, v0
; PWR10BE-NEXT: xvadddp vs1, vs3, vs4
; PWR10BE-NEXT: xvadddp vs2, vs5, vs6
; PWR10BE-NEXT: xvadddp vs3, vs8, vs9
; PWR10BE-NEXT: xvadddp vs4, vs10, vs11
; PWR10BE-NEXT: xvadddp vs5, vs13, v0
; PWR10BE-NEXT: xvadddp vs6, v1, v14
; PWR10BE-NEXT: xvadddp vs7, v4, v12
; PWR10BE-NEXT: xvadddp vs6, vs7, vs6
; PWR10BE-NEXT: xvadddp vs4, vs5, vs4
; PWR10BE-NEXT: xvadddp vs2, vs3, vs2
; PWR10BE-NEXT: xvadddp vs0, vs1, vs0
; PWR10BE-NEXT: xvadddp vs0, vs0, vs2
; PWR10BE-NEXT: xvadddp vs1, vs4, vs6
; PWR10BE-NEXT: xvadddp vs0, vs1, vs0
; PWR10BE-NEXT: xxswapd vs1, vs0
; PWR10BE-NEXT: xvadddp vs1, vs0, vs1
; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10BE-NEXT: blr
entry:
%0 = call fast double @llvm.vector.reduce.fadd.v64f64(double -0.000000e+00, <64 x double> %a)
ret double %0
}
declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>) #0
declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>) #0
declare double @llvm.vector.reduce.fadd.v8f64(double, <8 x double>) #0
declare double @llvm.vector.reduce.fadd.v16f64(double, <16 x double>) #0
declare double @llvm.vector.reduce.fadd.v32f64(double, <32 x double>) #0
declare double @llvm.vector.reduce.fadd.v64f64(double, <64 x double>) #0
;;
;; Vectors of ppc_fp128
;;
define dso_local ppc_fp128 @v2ppcf128(<2 x ppc_fp128> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v2ppcf128:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: mflr r0
; PWR9LE-NEXT: std r0, 16(r1)
; PWR9LE-NEXT: stdu r1, -32(r1)
; PWR9LE-NEXT: bl __gcc_qadd
; PWR9LE-NEXT: nop
; PWR9LE-NEXT: addi r1, r1, 32
; PWR9LE-NEXT: ld r0, 16(r1)
; PWR9LE-NEXT: mtlr r0
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v2ppcf128:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: mflr r0
; PWR9BE-NEXT: std r0, 16(r1)
; PWR9BE-NEXT: stdu r1, -112(r1)
; PWR9BE-NEXT: bl __gcc_qadd
; PWR9BE-NEXT: nop
; PWR9BE-NEXT: addi r1, r1, 112
; PWR9BE-NEXT: ld r0, 16(r1)
; PWR9BE-NEXT: mtlr r0
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v2ppcf128:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: mflr r0
; PWR10LE-NEXT: std r0, 16(r1)
; PWR10LE-NEXT: stdu r1, -32(r1)
; PWR10LE-NEXT: bl __gcc_qadd@notoc
; PWR10LE-NEXT: addi r1, r1, 32
; PWR10LE-NEXT: ld r0, 16(r1)
; PWR10LE-NEXT: mtlr r0
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v2ppcf128:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: mflr r0
; PWR10BE-NEXT: std r0, 16(r1)
; PWR10BE-NEXT: stdu r1, -112(r1)
; PWR10BE-NEXT: bl __gcc_qadd
; PWR10BE-NEXT: nop
; PWR10BE-NEXT: addi r1, r1, 112
; PWR10BE-NEXT: ld r0, 16(r1)
; PWR10BE-NEXT: mtlr r0
; PWR10BE-NEXT: blr
entry:
%0 = call ppc_fp128 @llvm.vector.reduce.fadd.v2ppcf128(ppc_fp128 0xM80000000000000000000000000000000, <2 x ppc_fp128> %a)
ret ppc_fp128 %0
}
define dso_local ppc_fp128 @v2ppcf128_b(<2 x ppc_fp128> %a, ppc_fp128 %b) local_unnamed_addr #0 {
; PWR9LE-LABEL: v2ppcf128_b:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: mflr r0
; PWR9LE-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
; PWR9LE-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; PWR9LE-NEXT: std r0, 16(r1)
; PWR9LE-NEXT: stdu r1, -48(r1)
; PWR9LE-NEXT: fmr f31, f4
; PWR9LE-NEXT: fmr f30, f3
; PWR9LE-NEXT: fmr f4, f2
; PWR9LE-NEXT: fmr f3, f1
; PWR9LE-NEXT: fmr f1, f5
; PWR9LE-NEXT: fmr f2, f6
; PWR9LE-NEXT: bl __gcc_qadd
; PWR9LE-NEXT: nop
; PWR9LE-NEXT: fmr f3, f30
; PWR9LE-NEXT: fmr f4, f31
; PWR9LE-NEXT: bl __gcc_qadd
; PWR9LE-NEXT: nop
; PWR9LE-NEXT: addi r1, r1, 48
; PWR9LE-NEXT: ld r0, 16(r1)
; PWR9LE-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
; PWR9LE-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; PWR9LE-NEXT: mtlr r0
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v2ppcf128_b:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: mflr r0
; PWR9BE-NEXT: std r0, 16(r1)
; PWR9BE-NEXT: stdu r1, -128(r1)
; PWR9BE-NEXT: stfd f30, 112(r1) # 8-byte Folded Spill
; PWR9BE-NEXT: stfd f31, 120(r1) # 8-byte Folded Spill
; PWR9BE-NEXT: fmr f31, f4
; PWR9BE-NEXT: fmr f30, f3
; PWR9BE-NEXT: fmr f4, f2
; PWR9BE-NEXT: fmr f3, f1
; PWR9BE-NEXT: fmr f1, f5
; PWR9BE-NEXT: fmr f2, f6
; PWR9BE-NEXT: bl __gcc_qadd
; PWR9BE-NEXT: nop
; PWR9BE-NEXT: fmr f3, f30
; PWR9BE-NEXT: fmr f4, f31
; PWR9BE-NEXT: bl __gcc_qadd
; PWR9BE-NEXT: nop
; PWR9BE-NEXT: lfd f31, 120(r1) # 8-byte Folded Reload
; PWR9BE-NEXT: lfd f30, 112(r1) # 8-byte Folded Reload
; PWR9BE-NEXT: addi r1, r1, 128
; PWR9BE-NEXT: ld r0, 16(r1)
; PWR9BE-NEXT: mtlr r0
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v2ppcf128_b:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: mflr r0
; PWR10LE-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
; PWR10LE-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; PWR10LE-NEXT: std r0, 16(r1)
; PWR10LE-NEXT: stdu r1, -48(r1)
; PWR10LE-NEXT: fmr f31, f4
; PWR10LE-NEXT: fmr f30, f3
; PWR10LE-NEXT: fmr f4, f2
; PWR10LE-NEXT: fmr f3, f1
; PWR10LE-NEXT: fmr f1, f5
; PWR10LE-NEXT: fmr f2, f6
; PWR10LE-NEXT: bl __gcc_qadd@notoc
; PWR10LE-NEXT: fmr f3, f30
; PWR10LE-NEXT: fmr f4, f31
; PWR10LE-NEXT: bl __gcc_qadd@notoc
; PWR10LE-NEXT: addi r1, r1, 48
; PWR10LE-NEXT: ld r0, 16(r1)
; PWR10LE-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
; PWR10LE-NEXT: mtlr r0
; PWR10LE-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v2ppcf128_b:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: mflr r0
; PWR10BE-NEXT: std r0, 16(r1)
; PWR10BE-NEXT: stdu r1, -128(r1)
; PWR10BE-NEXT: stfd f30, 112(r1) # 8-byte Folded Spill
; PWR10BE-NEXT: stfd f31, 120(r1) # 8-byte Folded Spill
; PWR10BE-NEXT: fmr f31, f4
; PWR10BE-NEXT: fmr f30, f3
; PWR10BE-NEXT: fmr f4, f2
; PWR10BE-NEXT: fmr f3, f1
; PWR10BE-NEXT: fmr f1, f5
; PWR10BE-NEXT: fmr f2, f6
; PWR10BE-NEXT: bl __gcc_qadd
; PWR10BE-NEXT: nop
; PWR10BE-NEXT: fmr f3, f30
; PWR10BE-NEXT: fmr f4, f31
; PWR10BE-NEXT: bl __gcc_qadd
; PWR10BE-NEXT: nop
; PWR10BE-NEXT: lfd f31, 120(r1) # 8-byte Folded Reload
; PWR10BE-NEXT: lfd f30, 112(r1) # 8-byte Folded Reload
; PWR10BE-NEXT: addi r1, r1, 128
; PWR10BE-NEXT: ld r0, 16(r1)
; PWR10BE-NEXT: mtlr r0
; PWR10BE-NEXT: blr
entry:
%0 = call ppc_fp128 @llvm.vector.reduce.fadd.v2ppcf128(ppc_fp128 %b, <2 x ppc_fp128> %a)
ret ppc_fp128 %0
}
define dso_local ppc_fp128 @v2ppcf128_fast(<2 x ppc_fp128> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v2ppcf128_fast:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: mflr r0
; PWR9LE-NEXT: std r0, 16(r1)
; PWR9LE-NEXT: stdu r1, -64(r1)
; PWR9LE-NEXT: bl __gcc_qadd
; PWR9LE-NEXT: nop
; PWR9LE-NEXT: stfd f2, 40(r1)
; PWR9LE-NEXT: stfd f1, 32(r1)
; PWR9LE-NEXT: lxv vs1, 32(r1)
; PWR9LE-NEXT: xxswapd vs2, vs1
; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2
; PWR9LE-NEXT: addi r1, r1, 64
; PWR9LE-NEXT: ld r0, 16(r1)
; PWR9LE-NEXT: mtlr r0
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v2ppcf128_fast:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: mflr r0
; PWR9BE-NEXT: std r0, 16(r1)
; PWR9BE-NEXT: stdu r1, -144(r1)
; PWR9BE-NEXT: bl __gcc_qadd
; PWR9BE-NEXT: nop
; PWR9BE-NEXT: stfd f2, 120(r1)
; PWR9BE-NEXT: stfd f1, 112(r1)
; PWR9BE-NEXT: lxv vs1, 112(r1)
; PWR9BE-NEXT: xxswapd vs2, vs1
; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9BE-NEXT: # kill: def $f2 killed $f2 killed $vsl2
; PWR9BE-NEXT: addi r1, r1, 144
; PWR9BE-NEXT: ld r0, 16(r1)
; PWR9BE-NEXT: mtlr r0
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v2ppcf128_fast:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: mflr r0
; PWR10LE-NEXT: std r0, 16(r1)
; PWR10LE-NEXT: stdu r1, -64(r1)
; PWR10LE-NEXT: bl __gcc_qadd@notoc
; PWR10LE-NEXT: stfd f2, 40(r1)
; PWR10LE-NEXT: stfd f1, 32(r1)
; PWR10LE-NEXT: lxv vs1, 32(r1)
; PWR10LE-NEXT: xxswapd vs2, vs1
; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2
; PWR10LE-NEXT: addi r1, r1, 64
; PWR10LE-NEXT: ld r0, 16(r1)
; PWR10LE-NEXT: mtlr r0
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v2ppcf128_fast:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: mflr r0
; PWR10BE-NEXT: std r0, 16(r1)
; PWR10BE-NEXT: stdu r1, -144(r1)
; PWR10BE-NEXT: bl __gcc_qadd
; PWR10BE-NEXT: nop
; PWR10BE-NEXT: stfd f2, 120(r1)
; PWR10BE-NEXT: stfd f1, 112(r1)
; PWR10BE-NEXT: lxv vs1, 112(r1)
; PWR10BE-NEXT: xxswapd vs2, vs1
; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10BE-NEXT: # kill: def $f2 killed $f2 killed $vsl2
; PWR10BE-NEXT: addi r1, r1, 144
; PWR10BE-NEXT: ld r0, 16(r1)
; PWR10BE-NEXT: mtlr r0
; PWR10BE-NEXT: blr
entry:
%0 = call fast ppc_fp128 @llvm.vector.reduce.fadd.v2ppcf128(ppc_fp128 0xM80000000000000000000000000000000, <2 x ppc_fp128> %a)
ret ppc_fp128 %0
}
define dso_local ppc_fp128 @v4ppcf128(<4 x ppc_fp128> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v4ppcf128:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: mflr r0
; PWR9LE-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill
; PWR9LE-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill
; PWR9LE-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
; PWR9LE-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; PWR9LE-NEXT: std r0, 16(r1)
; PWR9LE-NEXT: stdu r1, -64(r1)
; PWR9LE-NEXT: fmr f31, f8
; PWR9LE-NEXT: fmr f30, f7
; PWR9LE-NEXT: fmr f29, f6
; PWR9LE-NEXT: fmr f28, f5
; PWR9LE-NEXT: bl __gcc_qadd
; PWR9LE-NEXT: nop
; PWR9LE-NEXT: fmr f3, f28
; PWR9LE-NEXT: fmr f4, f29
; PWR9LE-NEXT: bl __gcc_qadd
; PWR9LE-NEXT: nop
; PWR9LE-NEXT: fmr f3, f30
; PWR9LE-NEXT: fmr f4, f31
; PWR9LE-NEXT: bl __gcc_qadd
; PWR9LE-NEXT: nop
; PWR9LE-NEXT: addi r1, r1, 64
; PWR9LE-NEXT: ld r0, 16(r1)
; PWR9LE-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
; PWR9LE-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; PWR9LE-NEXT: mtlr r0
; PWR9LE-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload
; PWR9LE-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v4ppcf128:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: mflr r0
; PWR9BE-NEXT: std r0, 16(r1)
; PWR9BE-NEXT: stdu r1, -144(r1)
; PWR9BE-NEXT: stfd f28, 112(r1) # 8-byte Folded Spill
; PWR9BE-NEXT: stfd f29, 120(r1) # 8-byte Folded Spill
; PWR9BE-NEXT: stfd f30, 128(r1) # 8-byte Folded Spill
; PWR9BE-NEXT: stfd f31, 136(r1) # 8-byte Folded Spill
; PWR9BE-NEXT: fmr f31, f8
; PWR9BE-NEXT: fmr f30, f7
; PWR9BE-NEXT: fmr f29, f6
; PWR9BE-NEXT: fmr f28, f5
; PWR9BE-NEXT: bl __gcc_qadd
; PWR9BE-NEXT: nop
; PWR9BE-NEXT: fmr f3, f28
; PWR9BE-NEXT: fmr f4, f29
; PWR9BE-NEXT: bl __gcc_qadd
; PWR9BE-NEXT: nop
; PWR9BE-NEXT: fmr f3, f30
; PWR9BE-NEXT: fmr f4, f31
; PWR9BE-NEXT: bl __gcc_qadd
; PWR9BE-NEXT: nop
; PWR9BE-NEXT: lfd f31, 136(r1) # 8-byte Folded Reload
; PWR9BE-NEXT: lfd f30, 128(r1) # 8-byte Folded Reload
; PWR9BE-NEXT: lfd f29, 120(r1) # 8-byte Folded Reload
; PWR9BE-NEXT: lfd f28, 112(r1) # 8-byte Folded Reload
; PWR9BE-NEXT: addi r1, r1, 144
; PWR9BE-NEXT: ld r0, 16(r1)
; PWR9BE-NEXT: mtlr r0
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v4ppcf128:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: mflr r0
; PWR10LE-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill
; PWR10LE-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill
; PWR10LE-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
; PWR10LE-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; PWR10LE-NEXT: std r0, 16(r1)
; PWR10LE-NEXT: stdu r1, -64(r1)
; PWR10LE-NEXT: fmr f31, f8
; PWR10LE-NEXT: fmr f30, f7
; PWR10LE-NEXT: fmr f29, f6
; PWR10LE-NEXT: fmr f28, f5
; PWR10LE-NEXT: bl __gcc_qadd@notoc
; PWR10LE-NEXT: fmr f3, f28
; PWR10LE-NEXT: fmr f4, f29
; PWR10LE-NEXT: bl __gcc_qadd@notoc
; PWR10LE-NEXT: fmr f3, f30
; PWR10LE-NEXT: fmr f4, f31
; PWR10LE-NEXT: bl __gcc_qadd@notoc
; PWR10LE-NEXT: addi r1, r1, 64
; PWR10LE-NEXT: ld r0, 16(r1)
; PWR10LE-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
; PWR10LE-NEXT: mtlr r0
; PWR10LE-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; PWR10LE-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload
; PWR10LE-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v4ppcf128:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: mflr r0
; PWR10BE-NEXT: std r0, 16(r1)
; PWR10BE-NEXT: stdu r1, -144(r1)
; PWR10BE-NEXT: stfd f28, 112(r1) # 8-byte Folded Spill
; PWR10BE-NEXT: stfd f29, 120(r1) # 8-byte Folded Spill
; PWR10BE-NEXT: fmr f29, f6
; PWR10BE-NEXT: fmr f28, f5
; PWR10BE-NEXT: stfd f30, 128(r1) # 8-byte Folded Spill
; PWR10BE-NEXT: stfd f31, 136(r1) # 8-byte Folded Spill
; PWR10BE-NEXT: fmr f31, f8
; PWR10BE-NEXT: fmr f30, f7
; PWR10BE-NEXT: bl __gcc_qadd
; PWR10BE-NEXT: nop
; PWR10BE-NEXT: fmr f3, f28
; PWR10BE-NEXT: fmr f4, f29
; PWR10BE-NEXT: bl __gcc_qadd
; PWR10BE-NEXT: nop
; PWR10BE-NEXT: fmr f3, f30
; PWR10BE-NEXT: fmr f4, f31
; PWR10BE-NEXT: bl __gcc_qadd
; PWR10BE-NEXT: nop
; PWR10BE-NEXT: lfd f31, 136(r1) # 8-byte Folded Reload
; PWR10BE-NEXT: lfd f30, 128(r1) # 8-byte Folded Reload
; PWR10BE-NEXT: lfd f29, 120(r1) # 8-byte Folded Reload
; PWR10BE-NEXT: lfd f28, 112(r1) # 8-byte Folded Reload
; PWR10BE-NEXT: addi r1, r1, 144
; PWR10BE-NEXT: ld r0, 16(r1)
; PWR10BE-NEXT: mtlr r0
; PWR10BE-NEXT: blr
entry:
%0 = call ppc_fp128 @llvm.vector.reduce.fadd.v4ppcf128(ppc_fp128 0xM80000000000000000000000000000000, <4 x ppc_fp128> %a)
ret ppc_fp128 %0
}
define dso_local ppc_fp128 @v4ppcf128_b(<4 x ppc_fp128> %a, ppc_fp128 %b) local_unnamed_addr #0 {
; PWR9LE-LABEL: v4ppcf128_b:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: mflr r0
; PWR9LE-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill
; PWR9LE-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill
; PWR9LE-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill
; PWR9LE-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill
; PWR9LE-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
; PWR9LE-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; PWR9LE-NEXT: std r0, 16(r1)
; PWR9LE-NEXT: stdu r1, -80(r1)
; PWR9LE-NEXT: fmr f27, f4
; PWR9LE-NEXT: fmr f26, f3
; PWR9LE-NEXT: fmr f4, f2
; PWR9LE-NEXT: fmr f3, f1
; PWR9LE-NEXT: fmr f1, f9
; PWR9LE-NEXT: fmr f2, f10
; PWR9LE-NEXT: fmr f31, f8
; PWR9LE-NEXT: fmr f30, f7
; PWR9LE-NEXT: fmr f29, f6
; PWR9LE-NEXT: fmr f28, f5
; PWR9LE-NEXT: bl __gcc_qadd
; PWR9LE-NEXT: nop
; PWR9LE-NEXT: fmr f3, f26
; PWR9LE-NEXT: fmr f4, f27
; PWR9LE-NEXT: bl __gcc_qadd
; PWR9LE-NEXT: nop
; PWR9LE-NEXT: fmr f3, f28
; PWR9LE-NEXT: fmr f4, f29
; PWR9LE-NEXT: bl __gcc_qadd
; PWR9LE-NEXT: nop
; PWR9LE-NEXT: fmr f3, f30
; PWR9LE-NEXT: fmr f4, f31
; PWR9LE-NEXT: bl __gcc_qadd
; PWR9LE-NEXT: nop
; PWR9LE-NEXT: addi r1, r1, 80
; PWR9LE-NEXT: ld r0, 16(r1)
; PWR9LE-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
; PWR9LE-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; PWR9LE-NEXT: mtlr r0
; PWR9LE-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload
; PWR9LE-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload
; PWR9LE-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload
; PWR9LE-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v4ppcf128_b:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: mflr r0
; PWR9BE-NEXT: std r0, 16(r1)
; PWR9BE-NEXT: stdu r1, -160(r1)
; PWR9BE-NEXT: stfd f26, 112(r1) # 8-byte Folded Spill
; PWR9BE-NEXT: stfd f27, 120(r1) # 8-byte Folded Spill
; PWR9BE-NEXT: fmr f27, f4
; PWR9BE-NEXT: fmr f26, f3
; PWR9BE-NEXT: fmr f4, f2
; PWR9BE-NEXT: fmr f3, f1
; PWR9BE-NEXT: fmr f1, f9
; PWR9BE-NEXT: fmr f2, f10
; PWR9BE-NEXT: stfd f28, 128(r1) # 8-byte Folded Spill
; PWR9BE-NEXT: stfd f29, 136(r1) # 8-byte Folded Spill
; PWR9BE-NEXT: stfd f30, 144(r1) # 8-byte Folded Spill
; PWR9BE-NEXT: stfd f31, 152(r1) # 8-byte Folded Spill
; PWR9BE-NEXT: fmr f31, f8
; PWR9BE-NEXT: fmr f30, f7
; PWR9BE-NEXT: fmr f29, f6
; PWR9BE-NEXT: fmr f28, f5
; PWR9BE-NEXT: bl __gcc_qadd
; PWR9BE-NEXT: nop
; PWR9BE-NEXT: fmr f3, f26
; PWR9BE-NEXT: fmr f4, f27
; PWR9BE-NEXT: bl __gcc_qadd
; PWR9BE-NEXT: nop
; PWR9BE-NEXT: fmr f3, f28
; PWR9BE-NEXT: fmr f4, f29
; PWR9BE-NEXT: bl __gcc_qadd
; PWR9BE-NEXT: nop
; PWR9BE-NEXT: fmr f3, f30
; PWR9BE-NEXT: fmr f4, f31
; PWR9BE-NEXT: bl __gcc_qadd
; PWR9BE-NEXT: nop
; PWR9BE-NEXT: lfd f31, 152(r1) # 8-byte Folded Reload
; PWR9BE-NEXT: lfd f30, 144(r1) # 8-byte Folded Reload
; PWR9BE-NEXT: lfd f29, 136(r1) # 8-byte Folded Reload
; PWR9BE-NEXT: lfd f28, 128(r1) # 8-byte Folded Reload
; PWR9BE-NEXT: lfd f27, 120(r1) # 8-byte Folded Reload
; PWR9BE-NEXT: lfd f26, 112(r1) # 8-byte Folded Reload
; PWR9BE-NEXT: addi r1, r1, 160
; PWR9BE-NEXT: ld r0, 16(r1)
; PWR9BE-NEXT: mtlr r0
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v4ppcf128_b:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: mflr r0
; PWR10LE-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill
; PWR10LE-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill
; PWR10LE-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill
; PWR10LE-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill
; PWR10LE-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
; PWR10LE-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; PWR10LE-NEXT: std r0, 16(r1)
; PWR10LE-NEXT: stdu r1, -80(r1)
; PWR10LE-NEXT: fmr f27, f4
; PWR10LE-NEXT: fmr f26, f3
; PWR10LE-NEXT: fmr f4, f2
; PWR10LE-NEXT: fmr f3, f1
; PWR10LE-NEXT: fmr f1, f9
; PWR10LE-NEXT: fmr f2, f10
; PWR10LE-NEXT: fmr f31, f8
; PWR10LE-NEXT: fmr f30, f7
; PWR10LE-NEXT: fmr f29, f6
; PWR10LE-NEXT: fmr f28, f5
; PWR10LE-NEXT: bl __gcc_qadd@notoc
; PWR10LE-NEXT: fmr f3, f26
; PWR10LE-NEXT: fmr f4, f27
; PWR10LE-NEXT: bl __gcc_qadd@notoc
; PWR10LE-NEXT: fmr f3, f28
; PWR10LE-NEXT: fmr f4, f29
; PWR10LE-NEXT: bl __gcc_qadd@notoc
; PWR10LE-NEXT: fmr f3, f30
; PWR10LE-NEXT: fmr f4, f31
; PWR10LE-NEXT: bl __gcc_qadd@notoc
; PWR10LE-NEXT: addi r1, r1, 80
; PWR10LE-NEXT: ld r0, 16(r1)
; PWR10LE-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
; PWR10LE-NEXT: mtlr r0
; PWR10LE-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; PWR10LE-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload
; PWR10LE-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload
; PWR10LE-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload
; PWR10LE-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v4ppcf128_b:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: mflr r0
; PWR10BE-NEXT: std r0, 16(r1)
; PWR10BE-NEXT: stdu r1, -160(r1)
; PWR10BE-NEXT: stfd f26, 112(r1) # 8-byte Folded Spill
; PWR10BE-NEXT: stfd f27, 120(r1) # 8-byte Folded Spill
; PWR10BE-NEXT: fmr f27, f4
; PWR10BE-NEXT: fmr f26, f3
; PWR10BE-NEXT: fmr f4, f2
; PWR10BE-NEXT: fmr f3, f1
; PWR10BE-NEXT: fmr f1, f9
; PWR10BE-NEXT: stfd f28, 128(r1) # 8-byte Folded Spill
; PWR10BE-NEXT: stfd f29, 136(r1) # 8-byte Folded Spill
; PWR10BE-NEXT: fmr f2, f10
; PWR10BE-NEXT: fmr f29, f6
; PWR10BE-NEXT: fmr f28, f5
; PWR10BE-NEXT: stfd f30, 144(r1) # 8-byte Folded Spill
; PWR10BE-NEXT: stfd f31, 152(r1) # 8-byte Folded Spill
; PWR10BE-NEXT: fmr f31, f8
; PWR10BE-NEXT: fmr f30, f7
; PWR10BE-NEXT: bl __gcc_qadd
; PWR10BE-NEXT: nop
; PWR10BE-NEXT: fmr f3, f26
; PWR10BE-NEXT: fmr f4, f27
; PWR10BE-NEXT: bl __gcc_qadd
; PWR10BE-NEXT: nop
; PWR10BE-NEXT: fmr f3, f28
; PWR10BE-NEXT: fmr f4, f29
; PWR10BE-NEXT: bl __gcc_qadd
; PWR10BE-NEXT: nop
; PWR10BE-NEXT: fmr f3, f30
; PWR10BE-NEXT: fmr f4, f31
; PWR10BE-NEXT: bl __gcc_qadd
; PWR10BE-NEXT: nop
; PWR10BE-NEXT: lfd f31, 152(r1) # 8-byte Folded Reload
; PWR10BE-NEXT: lfd f30, 144(r1) # 8-byte Folded Reload
; PWR10BE-NEXT: lfd f29, 136(r1) # 8-byte Folded Reload
; PWR10BE-NEXT: lfd f28, 128(r1) # 8-byte Folded Reload
; PWR10BE-NEXT: lfd f27, 120(r1) # 8-byte Folded Reload
; PWR10BE-NEXT: lfd f26, 112(r1) # 8-byte Folded Reload
; PWR10BE-NEXT: addi r1, r1, 160
; PWR10BE-NEXT: ld r0, 16(r1)
; PWR10BE-NEXT: mtlr r0
; PWR10BE-NEXT: blr
entry:
%0 = call ppc_fp128 @llvm.vector.reduce.fadd.v4ppcf128(ppc_fp128 %b, <4 x ppc_fp128> %a)
ret ppc_fp128 %0
}
define dso_local ppc_fp128 @v4ppcf128_fast(<4 x ppc_fp128> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v4ppcf128_fast:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: mflr r0
; PWR9LE-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill
; PWR9LE-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill
; PWR9LE-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill
; PWR9LE-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill
; PWR9LE-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
; PWR9LE-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; PWR9LE-NEXT: std r0, 16(r1)
; PWR9LE-NEXT: stdu r1, -96(r1)
; PWR9LE-NEXT: fmr f29, f4
; PWR9LE-NEXT: fmr f28, f3
; PWR9LE-NEXT: fmr f3, f5
; PWR9LE-NEXT: fmr f4, f6
; PWR9LE-NEXT: fmr f31, f8
; PWR9LE-NEXT: fmr f30, f7
; PWR9LE-NEXT: bl __gcc_qadd
; PWR9LE-NEXT: nop
; PWR9LE-NEXT: fmr f27, f1
; PWR9LE-NEXT: fmr f26, f2
; PWR9LE-NEXT: fmr f1, f28
; PWR9LE-NEXT: fmr f2, f29
; PWR9LE-NEXT: fmr f3, f30
; PWR9LE-NEXT: fmr f4, f31
; PWR9LE-NEXT: bl __gcc_qadd
; PWR9LE-NEXT: nop
; PWR9LE-NEXT: fmr f3, f1
; PWR9LE-NEXT: fmr f4, f2
; PWR9LE-NEXT: fmr f1, f27
; PWR9LE-NEXT: fmr f2, f26
; PWR9LE-NEXT: bl __gcc_qadd
; PWR9LE-NEXT: nop
; PWR9LE-NEXT: stfd f2, 40(r1)
; PWR9LE-NEXT: stfd f1, 32(r1)
; PWR9LE-NEXT: lxv vs1, 32(r1)
; PWR9LE-NEXT: xxswapd vs2, vs1
; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2
; PWR9LE-NEXT: addi r1, r1, 96
; PWR9LE-NEXT: ld r0, 16(r1)
; PWR9LE-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
; PWR9LE-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; PWR9LE-NEXT: mtlr r0
; PWR9LE-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload
; PWR9LE-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload
; PWR9LE-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload
; PWR9LE-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v4ppcf128_fast:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: mflr r0
; PWR9BE-NEXT: std r0, 16(r1)
; PWR9BE-NEXT: stdu r1, -176(r1)
; PWR9BE-NEXT: stfd f28, 144(r1) # 8-byte Folded Spill
; PWR9BE-NEXT: stfd f29, 152(r1) # 8-byte Folded Spill
; PWR9BE-NEXT: fmr f29, f4
; PWR9BE-NEXT: fmr f28, f3
; PWR9BE-NEXT: fmr f3, f5
; PWR9BE-NEXT: fmr f4, f6
; PWR9BE-NEXT: stfd f26, 128(r1) # 8-byte Folded Spill
; PWR9BE-NEXT: stfd f27, 136(r1) # 8-byte Folded Spill
; PWR9BE-NEXT: stfd f30, 160(r1) # 8-byte Folded Spill
; PWR9BE-NEXT: stfd f31, 168(r1) # 8-byte Folded Spill
; PWR9BE-NEXT: fmr f31, f8
; PWR9BE-NEXT: fmr f30, f7
; PWR9BE-NEXT: bl __gcc_qadd
; PWR9BE-NEXT: nop
; PWR9BE-NEXT: fmr f27, f1
; PWR9BE-NEXT: fmr f26, f2
; PWR9BE-NEXT: fmr f1, f28
; PWR9BE-NEXT: fmr f2, f29
; PWR9BE-NEXT: fmr f3, f30
; PWR9BE-NEXT: fmr f4, f31
; PWR9BE-NEXT: bl __gcc_qadd
; PWR9BE-NEXT: nop
; PWR9BE-NEXT: fmr f3, f1
; PWR9BE-NEXT: fmr f4, f2
; PWR9BE-NEXT: fmr f1, f27
; PWR9BE-NEXT: fmr f2, f26
; PWR9BE-NEXT: bl __gcc_qadd
; PWR9BE-NEXT: nop
; PWR9BE-NEXT: stfd f2, 120(r1)
; PWR9BE-NEXT: stfd f1, 112(r1)
; PWR9BE-NEXT: lxv vs1, 112(r1)
; PWR9BE-NEXT: lfd f31, 168(r1) # 8-byte Folded Reload
; PWR9BE-NEXT: lfd f30, 160(r1) # 8-byte Folded Reload
; PWR9BE-NEXT: xxswapd vs2, vs1
; PWR9BE-NEXT: lfd f29, 152(r1) # 8-byte Folded Reload
; PWR9BE-NEXT: lfd f28, 144(r1) # 8-byte Folded Reload
; PWR9BE-NEXT: lfd f27, 136(r1) # 8-byte Folded Reload
; PWR9BE-NEXT: lfd f26, 128(r1) # 8-byte Folded Reload
; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR9BE-NEXT: # kill: def $f2 killed $f2 killed $vsl2
; PWR9BE-NEXT: addi r1, r1, 176
; PWR9BE-NEXT: ld r0, 16(r1)
; PWR9BE-NEXT: mtlr r0
; PWR9BE-NEXT: blr
;
; PWR10LE-LABEL: v4ppcf128_fast:
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: mflr r0
; PWR10LE-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill
; PWR10LE-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill
; PWR10LE-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill
; PWR10LE-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill
; PWR10LE-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
; PWR10LE-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; PWR10LE-NEXT: std r0, 16(r1)
; PWR10LE-NEXT: stdu r1, -96(r1)
; PWR10LE-NEXT: fmr f29, f4
; PWR10LE-NEXT: fmr f28, f3
; PWR10LE-NEXT: fmr f3, f5
; PWR10LE-NEXT: fmr f4, f6
; PWR10LE-NEXT: fmr f31, f8
; PWR10LE-NEXT: fmr f30, f7
; PWR10LE-NEXT: bl __gcc_qadd@notoc
; PWR10LE-NEXT: fmr f27, f1
; PWR10LE-NEXT: fmr f26, f2
; PWR10LE-NEXT: fmr f1, f28
; PWR10LE-NEXT: fmr f2, f29
; PWR10LE-NEXT: fmr f3, f30
; PWR10LE-NEXT: fmr f4, f31
; PWR10LE-NEXT: bl __gcc_qadd@notoc
; PWR10LE-NEXT: fmr f3, f1
; PWR10LE-NEXT: fmr f4, f2
; PWR10LE-NEXT: fmr f1, f27
; PWR10LE-NEXT: fmr f2, f26
; PWR10LE-NEXT: bl __gcc_qadd@notoc
; PWR10LE-NEXT: stfd f2, 40(r1)
; PWR10LE-NEXT: stfd f1, 32(r1)
; PWR10LE-NEXT: lxv vs1, 32(r1)
; PWR10LE-NEXT: xxswapd vs2, vs1
; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2
; PWR10LE-NEXT: addi r1, r1, 96
; PWR10LE-NEXT: ld r0, 16(r1)
; PWR10LE-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
; PWR10LE-NEXT: mtlr r0
; PWR10LE-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
; PWR10LE-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload
; PWR10LE-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload
; PWR10LE-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload
; PWR10LE-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload
; PWR10LE-NEXT: blr
;
; PWR10BE-LABEL: v4ppcf128_fast:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: mflr r0
; PWR10BE-NEXT: std r0, 16(r1)
; PWR10BE-NEXT: stdu r1, -176(r1)
; PWR10BE-NEXT: stfd f28, 144(r1) # 8-byte Folded Spill
; PWR10BE-NEXT: stfd f29, 152(r1) # 8-byte Folded Spill
; PWR10BE-NEXT: fmr f29, f4
; PWR10BE-NEXT: fmr f28, f3
; PWR10BE-NEXT: fmr f3, f5
; PWR10BE-NEXT: fmr f4, f6
; PWR10BE-NEXT: stfd f26, 128(r1) # 8-byte Folded Spill
; PWR10BE-NEXT: stfd f27, 136(r1) # 8-byte Folded Spill
; PWR10BE-NEXT: stfd f30, 160(r1) # 8-byte Folded Spill
; PWR10BE-NEXT: stfd f31, 168(r1) # 8-byte Folded Spill
; PWR10BE-NEXT: fmr f31, f8
; PWR10BE-NEXT: fmr f30, f7
; PWR10BE-NEXT: bl __gcc_qadd
; PWR10BE-NEXT: nop
; PWR10BE-NEXT: fmr f27, f1
; PWR10BE-NEXT: fmr f26, f2
; PWR10BE-NEXT: fmr f1, f28
; PWR10BE-NEXT: fmr f2, f29
; PWR10BE-NEXT: fmr f3, f30
; PWR10BE-NEXT: fmr f4, f31
; PWR10BE-NEXT: bl __gcc_qadd
; PWR10BE-NEXT: nop
; PWR10BE-NEXT: fmr f3, f1
; PWR10BE-NEXT: fmr f4, f2
; PWR10BE-NEXT: fmr f1, f27
; PWR10BE-NEXT: fmr f2, f26
; PWR10BE-NEXT: bl __gcc_qadd
; PWR10BE-NEXT: nop
; PWR10BE-NEXT: stfd f2, 120(r1)
; PWR10BE-NEXT: stfd f1, 112(r1)
; PWR10BE-NEXT: lfd f31, 168(r1) # 8-byte Folded Reload
; PWR10BE-NEXT: lfd f30, 160(r1) # 8-byte Folded Reload
; PWR10BE-NEXT: lfd f29, 152(r1) # 8-byte Folded Reload
; PWR10BE-NEXT: lfd f28, 144(r1) # 8-byte Folded Reload
; PWR10BE-NEXT: lfd f27, 136(r1) # 8-byte Folded Reload
; PWR10BE-NEXT: lfd f26, 128(r1) # 8-byte Folded Reload
; PWR10BE-NEXT: lxv vs1, 112(r1)
; PWR10BE-NEXT: xxswapd vs2, vs1
; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; PWR10BE-NEXT: # kill: def $f2 killed $f2 killed $vsl2
; PWR10BE-NEXT: addi r1, r1, 176
; PWR10BE-NEXT: ld r0, 16(r1)
; PWR10BE-NEXT: mtlr r0
; PWR10BE-NEXT: blr
entry:
%0 = call fast ppc_fp128 @llvm.vector.reduce.fadd.v4ppcf128(ppc_fp128 0xM80000000000000000000000000000000, <4 x ppc_fp128> %a)
ret ppc_fp128 %0
}
declare ppc_fp128 @llvm.vector.reduce.fadd.v2ppcf128(ppc_fp128, <2 x ppc_fp128>) #0
declare ppc_fp128 @llvm.vector.reduce.fadd.v4ppcf128(ppc_fp128, <4 x ppc_fp128>) #0
attributes #0 = { nounwind }