| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ |
| ; RUN: -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE |
| ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ |
| ; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE |
| ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ |
| ; RUN: -mcpu=pwr10 -mattr=-paired-vector-memops -mtriple=powerpc64le < %s | \ |
| ; RUN: FileCheck %s --check-prefix=PWR10LE |
| ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ |
| ; RUN: -mcpu=pwr10 -mattr=-paired-vector-memops -mtriple=powerpc64 < %s | \ |
| ; RUN: FileCheck %s --check-prefix=PWR10BE |
| |
| ;; |
| ;; Vectors of f32 |
| ;; |
| define dso_local float @v2f32(<2 x float> %a) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v2f32: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; PWR9LE-NEXT: xxswapd vs1, v2 |
| ; PWR9LE-NEXT: xscvspdpn f0, vs0 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v2f32: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR9BE-NEXT: xscvspdpn f0, v2 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v2f32: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; PWR10LE-NEXT: xxswapd vs1, v2 |
| ; PWR10LE-NEXT: xscvspdpn f0, vs0 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v2f32: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR10BE-NEXT: xscvspdpn f0, v2 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call float @llvm.vector.reduce.fmul.v2f32(float 1.000000e+00, <2 x float> %a) |
| ret float %0 |
| } |
| |
| define dso_local float @v2f32_b(<2 x float> %a, float %b) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v2f32_b: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; PWR9LE-NEXT: xscvspdpn f0, vs0 |
| ; PWR9LE-NEXT: xsmulsp f0, f1, f0 |
| ; PWR9LE-NEXT: xxswapd vs1, v2 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v2f32_b: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xscvspdpn f0, v2 |
| ; PWR9BE-NEXT: xsmulsp f0, f1, f0 |
| ; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v2f32_b: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; PWR10LE-NEXT: xscvspdpn f0, vs0 |
| ; PWR10LE-NEXT: xsmulsp f0, f1, f0 |
| ; PWR10LE-NEXT: xxswapd vs1, v2 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v2f32_b: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xscvspdpn f0, v2 |
| ; PWR10BE-NEXT: xsmulsp f0, f1, f0 |
| ; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call float @llvm.vector.reduce.fmul.v2f32(float %b, <2 x float> %a) |
| ret float %0 |
| } |
| |
| define dso_local float @v2f32_fast(<2 x float> %a) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v2f32_fast: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xxspltw vs0, v2, 2 |
| ; PWR9LE-NEXT: xvmulsp vs0, v2, vs0 |
| ; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs0 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v2f32_fast: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xxspltw vs0, v2, 1 |
| ; PWR9BE-NEXT: xvmulsp vs0, v2, vs0 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs0 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v2f32_fast: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xxspltw vs0, v2, 2 |
| ; PWR10LE-NEXT: xvmulsp vs0, v2, vs0 |
| ; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs0 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v2f32_fast: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xxspltw vs0, v2, 1 |
| ; PWR10BE-NEXT: xvmulsp vs0, v2, vs0 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs0 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call fast float @llvm.vector.reduce.fmul.v2f32(float 1.000000e+00, <2 x float> %a) |
| ret float %0 |
| } |
| |
| define dso_local float @v4f32(<4 x float> %a) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v4f32: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; PWR9LE-NEXT: xxswapd vs1, v2 |
| ; PWR9LE-NEXT: xscvspdpn f0, vs0 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xscvspdpn f1, v2 |
| ; PWR9LE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v4f32: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR9BE-NEXT: xscvspdpn f0, v2 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v2 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v4f32: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; PWR10LE-NEXT: xxswapd vs1, v2 |
| ; PWR10LE-NEXT: xscvspdpn f0, vs0 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xscvspdpn f1, v2 |
| ; PWR10LE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v4f32: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR10BE-NEXT: xscvspdpn f0, v2 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v2 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> %a) |
| ret float %0 |
| } |
| |
| define dso_local float @v4f32_b(<4 x float> %a, float %b) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v4f32_b: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; PWR9LE-NEXT: xscvspdpn f0, vs0 |
| ; PWR9LE-NEXT: xsmulsp f0, f1, f0 |
| ; PWR9LE-NEXT: xxswapd vs1, v2 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xscvspdpn f1, v2 |
| ; PWR9LE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v4f32_b: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xscvspdpn f0, v2 |
| ; PWR9BE-NEXT: xsmulsp f0, f1, f0 |
| ; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v2 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v4f32_b: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; PWR10LE-NEXT: xscvspdpn f0, vs0 |
| ; PWR10LE-NEXT: xsmulsp f0, f1, f0 |
| ; PWR10LE-NEXT: xxswapd vs1, v2 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xscvspdpn f1, v2 |
| ; PWR10LE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v4f32_b: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xscvspdpn f0, v2 |
| ; PWR10BE-NEXT: xsmulsp f0, f1, f0 |
| ; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v2 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call float @llvm.vector.reduce.fmul.v4f32(float %b, <4 x float> %a) |
| ret float %0 |
| } |
| |
| define dso_local float @v4f32_fast(<4 x float> %a) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v4f32_fast: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xxswapd v3, v2 |
| ; PWR9LE-NEXT: xvmulsp vs0, v2, v3 |
| ; PWR9LE-NEXT: xxspltw vs1, vs0, 2 |
| ; PWR9LE-NEXT: xvmulsp vs0, vs0, vs1 |
| ; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs0 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v4f32_fast: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xxswapd v3, v2 |
| ; PWR9BE-NEXT: xvmulsp vs0, v2, v3 |
| ; PWR9BE-NEXT: xxspltw vs1, vs0, 1 |
| ; PWR9BE-NEXT: xvmulsp vs0, vs0, vs1 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs0 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v4f32_fast: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xxswapd v3, v2 |
| ; PWR10LE-NEXT: xvmulsp vs0, v2, v3 |
| ; PWR10LE-NEXT: xxspltw vs1, vs0, 2 |
| ; PWR10LE-NEXT: xvmulsp vs0, vs0, vs1 |
| ; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs0 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v4f32_fast: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xxswapd v3, v2 |
| ; PWR10BE-NEXT: xvmulsp vs0, v2, v3 |
| ; PWR10BE-NEXT: xxspltw vs1, vs0, 1 |
| ; PWR10BE-NEXT: xvmulsp vs0, vs0, vs1 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs0 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> %a) |
| ret float %0 |
| } |
| |
| define dso_local float @v8f32(<8 x float> %a) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v8f32: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; PWR9LE-NEXT: xxswapd vs1, v2 |
| ; PWR9LE-NEXT: xscvspdpn f0, vs0 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xscvspdpn f1, v2 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 3 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v3 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 1 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xscvspdpn f1, v3 |
| ; PWR9LE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v8f32: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR9BE-NEXT: xscvspdpn f0, v2 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v2 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xscvspdpn f1, v3 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 1 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v3 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 3 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v8f32: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; PWR10LE-NEXT: xxswapd vs1, v2 |
| ; PWR10LE-NEXT: xscvspdpn f0, vs0 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xscvspdpn f1, v2 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 3 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v3 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 1 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xscvspdpn f1, v3 |
| ; PWR10LE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v8f32: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR10BE-NEXT: xscvspdpn f0, v2 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v2 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xscvspdpn f1, v3 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 1 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v3 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 3 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call float @llvm.vector.reduce.fmul.v8f32(float 1.000000e+00, <8 x float> %a) |
| ret float %0 |
| } |
| |
| define dso_local float @v8f32_b(<8 x float> %a, float %b) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v8f32_b: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; PWR9LE-NEXT: xscvspdpn f0, vs0 |
| ; PWR9LE-NEXT: xsmulsp f0, f1, f0 |
| ; PWR9LE-NEXT: xxswapd vs1, v2 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xscvspdpn f1, v2 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 3 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v3 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 1 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xscvspdpn f1, v3 |
| ; PWR9LE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v8f32_b: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xscvspdpn f0, v2 |
| ; PWR9BE-NEXT: xsmulsp f0, f1, f0 |
| ; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v2 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xscvspdpn f1, v3 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 1 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v3 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 3 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v8f32_b: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; PWR10LE-NEXT: xscvspdpn f0, vs0 |
| ; PWR10LE-NEXT: xsmulsp f0, f1, f0 |
| ; PWR10LE-NEXT: xxswapd vs1, v2 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xscvspdpn f1, v2 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 3 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v3 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 1 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xscvspdpn f1, v3 |
| ; PWR10LE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v8f32_b: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xscvspdpn f0, v2 |
| ; PWR10BE-NEXT: xsmulsp f0, f1, f0 |
| ; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v2 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xscvspdpn f1, v3 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 1 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v3 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 3 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call float @llvm.vector.reduce.fmul.v8f32(float %b, <8 x float> %a) |
| ret float %0 |
| } |
| |
| define dso_local float @v8f32_fast(<8 x float> %a) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v8f32_fast: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xvmulsp vs0, v2, v3 |
| ; PWR9LE-NEXT: xxswapd v2, vs0 |
| ; PWR9LE-NEXT: xvmulsp vs0, vs0, v2 |
| ; PWR9LE-NEXT: xxspltw vs1, vs0, 2 |
| ; PWR9LE-NEXT: xvmulsp vs0, vs0, vs1 |
| ; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs0 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v8f32_fast: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xvmulsp vs0, v2, v3 |
| ; PWR9BE-NEXT: xxswapd v2, vs0 |
| ; PWR9BE-NEXT: xvmulsp vs0, vs0, v2 |
| ; PWR9BE-NEXT: xxspltw vs1, vs0, 1 |
| ; PWR9BE-NEXT: xvmulsp vs0, vs0, vs1 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs0 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v8f32_fast: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xvmulsp vs0, v2, v3 |
| ; PWR10LE-NEXT: xxswapd v2, vs0 |
| ; PWR10LE-NEXT: xvmulsp vs0, vs0, v2 |
| ; PWR10LE-NEXT: xxspltw vs1, vs0, 2 |
| ; PWR10LE-NEXT: xvmulsp vs0, vs0, vs1 |
| ; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs0 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v8f32_fast: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xvmulsp vs0, v2, v3 |
| ; PWR10BE-NEXT: xxswapd v2, vs0 |
| ; PWR10BE-NEXT: xvmulsp vs0, vs0, v2 |
| ; PWR10BE-NEXT: xxspltw vs1, vs0, 1 |
| ; PWR10BE-NEXT: xvmulsp vs0, vs0, vs1 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs0 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call fast float @llvm.vector.reduce.fmul.v8f32(float 1.000000e+00, <8 x float> %a) |
| ret float %0 |
| } |
| |
| define dso_local float @v16f32(<16 x float> %a) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v16f32: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; PWR9LE-NEXT: xxswapd vs1, v2 |
| ; PWR9LE-NEXT: xscvspdpn f0, vs0 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xscvspdpn f1, v2 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 3 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v3 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 1 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xscvspdpn f1, v3 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxsldwi vs1, v4, v4, 3 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v4 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxsldwi vs1, v4, v4, 1 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xscvspdpn f1, v4 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxsldwi vs1, v5, v5, 3 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v5 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxsldwi vs1, v5, v5, 1 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xscvspdpn f1, v5 |
| ; PWR9LE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v16f32: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR9BE-NEXT: xscvspdpn f0, v2 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v2 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xscvspdpn f1, v3 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 1 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v3 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 3 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xscvspdpn f1, v4 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxsldwi vs1, v4, v4, 1 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v4 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxsldwi vs1, v4, v4, 3 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xscvspdpn f1, v5 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxsldwi vs1, v5, v5, 1 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v5 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxsldwi vs1, v5, v5, 3 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v16f32: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; PWR10LE-NEXT: xxswapd vs1, v2 |
| ; PWR10LE-NEXT: xscvspdpn f0, vs0 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xscvspdpn f1, v2 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 3 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v3 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 1 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xscvspdpn f1, v3 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxsldwi vs1, v4, v4, 3 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v4 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxsldwi vs1, v4, v4, 1 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xscvspdpn f1, v4 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxsldwi vs1, v5, v5, 3 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v5 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxsldwi vs1, v5, v5, 1 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xscvspdpn f1, v5 |
| ; PWR10LE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v16f32: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR10BE-NEXT: xscvspdpn f0, v2 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v2 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xscvspdpn f1, v3 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 1 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v3 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 3 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xscvspdpn f1, v4 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxsldwi vs1, v4, v4, 1 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v4 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxsldwi vs1, v4, v4, 3 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xscvspdpn f1, v5 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxsldwi vs1, v5, v5, 1 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v5 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxsldwi vs1, v5, v5, 3 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %a) |
| ret float %0 |
| } |
| |
| define dso_local float @v16f32_b(<16 x float> %a, float %b) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v16f32_b: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; PWR9LE-NEXT: xscvspdpn f0, vs0 |
| ; PWR9LE-NEXT: xsmulsp f0, f1, f0 |
| ; PWR9LE-NEXT: xxswapd vs1, v2 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xscvspdpn f1, v2 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 3 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v3 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 1 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xscvspdpn f1, v3 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxsldwi vs1, v4, v4, 3 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v4 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxsldwi vs1, v4, v4, 1 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xscvspdpn f1, v4 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxsldwi vs1, v5, v5, 3 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v5 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xxsldwi vs1, v5, v5, 1 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9LE-NEXT: xscvspdpn f1, v5 |
| ; PWR9LE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v16f32_b: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xscvspdpn f0, v2 |
| ; PWR9BE-NEXT: xsmulsp f0, f1, f0 |
| ; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v2 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xscvspdpn f1, v3 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 1 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v3 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 3 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xscvspdpn f1, v4 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxsldwi vs1, v4, v4, 1 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v4 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxsldwi vs1, v4, v4, 3 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xscvspdpn f1, v5 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxsldwi vs1, v5, v5, 1 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v5 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR9BE-NEXT: xxsldwi vs1, v5, v5, 3 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR9BE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v16f32_b: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; PWR10LE-NEXT: xscvspdpn f0, vs0 |
| ; PWR10LE-NEXT: xsmulsp f0, f1, f0 |
| ; PWR10LE-NEXT: xxswapd vs1, v2 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xscvspdpn f1, v2 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 3 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v3 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 1 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xscvspdpn f1, v3 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxsldwi vs1, v4, v4, 3 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v4 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxsldwi vs1, v4, v4, 1 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xscvspdpn f1, v4 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxsldwi vs1, v5, v5, 3 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v5 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xxsldwi vs1, v5, v5, 1 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10LE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10LE-NEXT: xscvspdpn f1, v5 |
| ; PWR10LE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v16f32_b: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xscvspdpn f0, v2 |
| ; PWR10BE-NEXT: xsmulsp f0, f1, f0 |
| ; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v2 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xscvspdpn f1, v3 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 1 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v3 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 3 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xscvspdpn f1, v4 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxsldwi vs1, v4, v4, 1 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v4 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxsldwi vs1, v4, v4, 3 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xscvspdpn f1, v5 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxsldwi vs1, v5, v5, 1 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v5 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f0, f0, f1 |
| ; PWR10BE-NEXT: xxsldwi vs1, v5, v5, 3 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs1 |
| ; PWR10BE-NEXT: xsmulsp f1, f0, f1 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call float @llvm.vector.reduce.fmul.v16f32(float %b, <16 x float> %a) |
| ret float %0 |
| } |
| |
| define dso_local float @v16f32_fast(<16 x float> %a) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v16f32_fast: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xvmulsp vs0, v3, v5 |
| ; PWR9LE-NEXT: xvmulsp vs1, v2, v4 |
| ; PWR9LE-NEXT: xvmulsp vs0, vs1, vs0 |
| ; PWR9LE-NEXT: xxswapd v2, vs0 |
| ; PWR9LE-NEXT: xvmulsp vs0, vs0, v2 |
| ; PWR9LE-NEXT: xxspltw vs1, vs0, 2 |
| ; PWR9LE-NEXT: xvmulsp vs0, vs0, vs1 |
| ; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 |
| ; PWR9LE-NEXT: xscvspdpn f1, vs0 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v16f32_fast: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xvmulsp vs0, v3, v5 |
| ; PWR9BE-NEXT: xvmulsp vs1, v2, v4 |
| ; PWR9BE-NEXT: xvmulsp vs0, vs1, vs0 |
| ; PWR9BE-NEXT: xxswapd v2, vs0 |
| ; PWR9BE-NEXT: xvmulsp vs0, vs0, v2 |
| ; PWR9BE-NEXT: xxspltw vs1, vs0, 1 |
| ; PWR9BE-NEXT: xvmulsp vs0, vs0, vs1 |
| ; PWR9BE-NEXT: xscvspdpn f1, vs0 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v16f32_fast: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xvmulsp vs0, v3, v5 |
| ; PWR10LE-NEXT: xvmulsp vs1, v2, v4 |
| ; PWR10LE-NEXT: xvmulsp vs0, vs1, vs0 |
| ; PWR10LE-NEXT: xxswapd v2, vs0 |
| ; PWR10LE-NEXT: xvmulsp vs0, vs0, v2 |
| ; PWR10LE-NEXT: xxspltw vs1, vs0, 2 |
| ; PWR10LE-NEXT: xvmulsp vs0, vs0, vs1 |
| ; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3 |
| ; PWR10LE-NEXT: xscvspdpn f1, vs0 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v16f32_fast: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xvmulsp vs0, v3, v5 |
| ; PWR10BE-NEXT: xvmulsp vs1, v2, v4 |
| ; PWR10BE-NEXT: xvmulsp vs0, vs1, vs0 |
| ; PWR10BE-NEXT: xxswapd v2, vs0 |
| ; PWR10BE-NEXT: xvmulsp vs0, vs0, v2 |
| ; PWR10BE-NEXT: xxspltw vs1, vs0, 1 |
| ; PWR10BE-NEXT: xvmulsp vs0, vs0, vs1 |
| ; PWR10BE-NEXT: xscvspdpn f1, vs0 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call fast float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %a) |
| ret float %0 |
| } |
| |
| declare float @llvm.vector.reduce.fmul.v2f32(float, <2 x float>) #0 |
| declare float @llvm.vector.reduce.fmul.v4f32(float, <4 x float>) #0 |
| declare float @llvm.vector.reduce.fmul.v8f32(float, <8 x float>) #0 |
| declare float @llvm.vector.reduce.fmul.v16f32(float, <16 x float>) #0 |
| |
| ;; |
| ;; Vectors of f64 |
| ;; |
| define dso_local double @v2f64(<2 x double> %a) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v2f64: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xxswapd vs0, v2 |
| ; PWR9LE-NEXT: xsmuldp f1, f0, v2 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v2f64: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xxswapd vs0, v2 |
| ; PWR9BE-NEXT: xsmuldp f1, v2, f0 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v2f64: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xxswapd vs0, v2 |
| ; PWR10LE-NEXT: xsmuldp f1, f0, v2 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v2f64: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xxswapd vs0, v2 |
| ; PWR10BE-NEXT: xsmuldp f1, v2, f0 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call double @llvm.vector.reduce.fmul.v2f64(double 1.000000e+00, <2 x double> %a) |
| ret double %0 |
| } |
| |
| define dso_local double @v2f64_b(<2 x double> %a, double %b) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v2f64_b: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xxswapd vs0, v2 |
| ; PWR9LE-NEXT: xsmuldp f0, f1, f0 |
| ; PWR9LE-NEXT: xsmuldp f1, f0, v2 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v2f64_b: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xsmuldp f0, f1, v2 |
| ; PWR9BE-NEXT: xxswapd vs1, v2 |
| ; PWR9BE-NEXT: xsmuldp f1, f0, f1 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v2f64_b: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xxswapd vs0, v2 |
| ; PWR10LE-NEXT: xsmuldp f0, f1, f0 |
| ; PWR10LE-NEXT: xsmuldp f1, f0, v2 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v2f64_b: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xsmuldp f0, f1, v2 |
| ; PWR10BE-NEXT: xxswapd vs1, v2 |
| ; PWR10BE-NEXT: xsmuldp f1, f0, f1 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call double @llvm.vector.reduce.fmul.v2f64(double %b, <2 x double> %a) |
| ret double %0 |
| } |
| |
| define dso_local double @v2f64_fast(<2 x double> %a) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v2f64_fast: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xxswapd vs0, v2 |
| ; PWR9LE-NEXT: xvmuldp vs0, v2, vs0 |
| ; PWR9LE-NEXT: xxswapd vs1, vs0 |
| ; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v2f64_fast: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xxswapd vs0, v2 |
| ; PWR9BE-NEXT: xvmuldp vs1, v2, vs0 |
| ; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v2f64_fast: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xxswapd vs0, v2 |
| ; PWR10LE-NEXT: xvmuldp vs0, v2, vs0 |
| ; PWR10LE-NEXT: xxswapd vs1, vs0 |
| ; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v2f64_fast: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xxswapd vs0, v2 |
| ; PWR10BE-NEXT: xvmuldp vs1, v2, vs0 |
| ; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call fast double @llvm.vector.reduce.fmul.v2f64(double 1.000000e+00, <2 x double> %a) |
| ret double %0 |
| } |
| |
| define dso_local double @v4f64(<4 x double> %a) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v4f64: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xxswapd vs0, v2 |
| ; PWR9LE-NEXT: xxswapd vs1, v3 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, v2 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9LE-NEXT: xsmuldp f1, f0, v3 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v4f64: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xxswapd vs0, v2 |
| ; PWR9BE-NEXT: xxswapd vs1, v3 |
| ; PWR9BE-NEXT: xsmuldp f0, v2, f0 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, v3 |
| ; PWR9BE-NEXT: xsmuldp f1, f0, f1 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v4f64: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xxswapd vs0, v2 |
| ; PWR10LE-NEXT: xxswapd vs1, v3 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, v2 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10LE-NEXT: xsmuldp f1, f0, v3 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v4f64: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xxswapd vs0, v2 |
| ; PWR10BE-NEXT: xxswapd vs1, v3 |
| ; PWR10BE-NEXT: xsmuldp f0, v2, f0 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, v3 |
| ; PWR10BE-NEXT: xsmuldp f1, f0, f1 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call double @llvm.vector.reduce.fmul.v4f64(double 1.000000e+00, <4 x double> %a) |
| ret double %0 |
| } |
| |
| define dso_local double @v4f64_b(<4 x double> %a, double %b) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v4f64_b: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xxswapd vs0, v2 |
| ; PWR9LE-NEXT: xsmuldp f0, f1, f0 |
| ; PWR9LE-NEXT: xxswapd vs1, v3 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, v2 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9LE-NEXT: xsmuldp f1, f0, v3 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v4f64_b: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xsmuldp f0, f1, v2 |
| ; PWR9BE-NEXT: xxswapd vs1, v2 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v3 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, v3 |
| ; PWR9BE-NEXT: xsmuldp f1, f0, f1 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v4f64_b: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xxswapd vs0, v2 |
| ; PWR10LE-NEXT: xsmuldp f0, f1, f0 |
| ; PWR10LE-NEXT: xxswapd vs1, v3 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, v2 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10LE-NEXT: xsmuldp f1, f0, v3 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v4f64_b: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xsmuldp f0, f1, v2 |
| ; PWR10BE-NEXT: xxswapd vs1, v2 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v3 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, v3 |
| ; PWR10BE-NEXT: xsmuldp f1, f0, f1 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call double @llvm.vector.reduce.fmul.v4f64(double %b, <4 x double> %a) |
| ret double %0 |
| } |
| |
| define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v4f64_fast: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xvmuldp vs0, v2, v3 |
| ; PWR9LE-NEXT: xxswapd vs1, vs0 |
| ; PWR9LE-NEXT: xvmuldp vs0, vs0, vs1 |
| ; PWR9LE-NEXT: xxswapd vs1, vs0 |
| ; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v4f64_fast: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xvmuldp vs0, v2, v3 |
| ; PWR9BE-NEXT: xxswapd vs1, vs0 |
| ; PWR9BE-NEXT: xvmuldp vs1, vs0, vs1 |
| ; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v4f64_fast: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xvmuldp vs0, v2, v3 |
| ; PWR10LE-NEXT: xxswapd vs1, vs0 |
| ; PWR10LE-NEXT: xvmuldp vs0, vs0, vs1 |
| ; PWR10LE-NEXT: xxswapd vs1, vs0 |
| ; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v4f64_fast: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xvmuldp vs0, v2, v3 |
| ; PWR10BE-NEXT: xxswapd vs1, vs0 |
| ; PWR10BE-NEXT: xvmuldp vs1, vs0, vs1 |
| ; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call fast double @llvm.vector.reduce.fmul.v4f64(double 1.000000e+00, <4 x double> %a) |
| ret double %0 |
| } |
| |
| define dso_local double @v8f64(<8 x double> %a) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v8f64: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xxswapd vs0, v2 |
| ; PWR9LE-NEXT: xxswapd vs1, v3 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, v2 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v4 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, v3 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v5 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, v4 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9LE-NEXT: xsmuldp f1, f0, v5 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v8f64: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xxswapd vs0, v2 |
| ; PWR9BE-NEXT: xxswapd vs1, v3 |
| ; PWR9BE-NEXT: xsmuldp f0, v2, f0 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, v3 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v4 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, v4 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v5 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, v5 |
| ; PWR9BE-NEXT: xsmuldp f1, f0, f1 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v8f64: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xxswapd vs0, v2 |
| ; PWR10LE-NEXT: xxswapd vs1, v3 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, v2 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v4 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, v3 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v5 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, v4 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10LE-NEXT: xsmuldp f1, f0, v5 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v8f64: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xxswapd vs0, v2 |
| ; PWR10BE-NEXT: xxswapd vs1, v3 |
| ; PWR10BE-NEXT: xsmuldp f0, v2, f0 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, v3 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v4 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, v4 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v5 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, v5 |
| ; PWR10BE-NEXT: xsmuldp f1, f0, f1 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %a) |
| ret double %0 |
| } |
| |
| define dso_local double @v8f64_b(<8 x double> %a, double %b) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v8f64_b: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xxswapd vs0, v2 |
| ; PWR9LE-NEXT: xsmuldp f0, f1, f0 |
| ; PWR9LE-NEXT: xxswapd vs1, v3 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, v2 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v4 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, v3 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v5 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, v4 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9LE-NEXT: xsmuldp f1, f0, v5 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v8f64_b: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xsmuldp f0, f1, v2 |
| ; PWR9BE-NEXT: xxswapd vs1, v2 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v3 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, v3 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v4 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, v4 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v5 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, v5 |
| ; PWR9BE-NEXT: xsmuldp f1, f0, f1 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v8f64_b: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xxswapd vs0, v2 |
| ; PWR10LE-NEXT: xsmuldp f0, f1, f0 |
| ; PWR10LE-NEXT: xxswapd vs1, v3 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, v2 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v4 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, v3 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v5 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, v4 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10LE-NEXT: xsmuldp f1, f0, v5 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v8f64_b: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xsmuldp f0, f1, v2 |
| ; PWR10BE-NEXT: xxswapd vs1, v2 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v3 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, v3 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v4 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, v4 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v5 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, v5 |
| ; PWR10BE-NEXT: xsmuldp f1, f0, f1 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call double @llvm.vector.reduce.fmul.v8f64(double %b, <8 x double> %a) |
| ret double %0 |
| } |
| |
| define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v8f64_fast: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xvmuldp vs0, v3, v5 |
| ; PWR9LE-NEXT: xvmuldp vs1, v2, v4 |
| ; PWR9LE-NEXT: xvmuldp vs0, vs1, vs0 |
| ; PWR9LE-NEXT: xxswapd vs1, vs0 |
| ; PWR9LE-NEXT: xvmuldp vs0, vs0, vs1 |
| ; PWR9LE-NEXT: xxswapd vs1, vs0 |
| ; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v8f64_fast: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xvmuldp vs0, v3, v5 |
| ; PWR9BE-NEXT: xvmuldp vs1, v2, v4 |
| ; PWR9BE-NEXT: xvmuldp vs0, vs1, vs0 |
| ; PWR9BE-NEXT: xxswapd vs1, vs0 |
| ; PWR9BE-NEXT: xvmuldp vs1, vs0, vs1 |
| ; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v8f64_fast: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xvmuldp vs0, v3, v5 |
| ; PWR10LE-NEXT: xvmuldp vs1, v2, v4 |
| ; PWR10LE-NEXT: xvmuldp vs0, vs1, vs0 |
| ; PWR10LE-NEXT: xxswapd vs1, vs0 |
| ; PWR10LE-NEXT: xvmuldp vs0, vs0, vs1 |
| ; PWR10LE-NEXT: xxswapd vs1, vs0 |
| ; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v8f64_fast: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xvmuldp vs0, v3, v5 |
| ; PWR10BE-NEXT: xvmuldp vs1, v2, v4 |
| ; PWR10BE-NEXT: xvmuldp vs0, vs1, vs0 |
| ; PWR10BE-NEXT: xxswapd vs1, vs0 |
| ; PWR10BE-NEXT: xvmuldp vs1, vs0, vs1 |
| ; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call fast double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %a) |
| ret double %0 |
| } |
| |
| define dso_local double @v16f64(<16 x double> %a) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v16f64: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xxswapd vs0, v2 |
| ; PWR9LE-NEXT: xxswapd vs1, v3 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, v2 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v4 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, v3 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v5 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, v4 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v6 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, v5 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v7 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, v6 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v8 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, v7 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v9 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, v8 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9LE-NEXT: xsmuldp f1, f0, v9 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v16f64: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xxswapd vs0, v2 |
| ; PWR9BE-NEXT: xxswapd vs1, v3 |
| ; PWR9BE-NEXT: xsmuldp f0, v2, f0 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, v3 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v4 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, v4 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v5 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, v5 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v6 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, v6 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v7 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, v7 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v8 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, v8 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v9 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, v9 |
| ; PWR9BE-NEXT: xsmuldp f1, f0, f1 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v16f64: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xxswapd vs0, v2 |
| ; PWR10LE-NEXT: xxswapd vs1, v3 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, v2 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v4 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, v3 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v5 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, v4 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v6 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, v5 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v7 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, v6 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v8 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, v7 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v9 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, v8 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10LE-NEXT: xsmuldp f1, f0, v9 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v16f64: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xxswapd vs0, v2 |
| ; PWR10BE-NEXT: xxswapd vs1, v3 |
| ; PWR10BE-NEXT: xsmuldp f0, v2, f0 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, v3 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v4 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, v4 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v5 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, v5 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v6 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, v6 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v7 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, v7 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v8 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, v8 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v9 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, v9 |
| ; PWR10BE-NEXT: xsmuldp f1, f0, f1 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call double @llvm.vector.reduce.fmul.v16f64(double 1.000000e+00, <16 x double> %a) |
| ret double %0 |
| } |
| |
| define dso_local double @v16f64_b(<16 x double> %a, double %b) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v16f64_b: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xxswapd vs0, v2 |
| ; PWR9LE-NEXT: xsmuldp f0, f1, f0 |
| ; PWR9LE-NEXT: xxswapd vs1, v3 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, v2 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v4 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, v3 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v5 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, v4 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v6 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, v5 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v7 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, v6 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v8 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, v7 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9LE-NEXT: xxswapd vs1, v9 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, v8 |
| ; PWR9LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9LE-NEXT: xsmuldp f1, f0, v9 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v16f64_b: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xsmuldp f0, f1, v2 |
| ; PWR9BE-NEXT: xxswapd vs1, v2 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v3 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, v3 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v4 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, v4 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v5 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, v5 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v6 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, v6 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v7 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, v7 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v8 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, v8 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR9BE-NEXT: xxswapd vs1, v9 |
| ; PWR9BE-NEXT: xsmuldp f0, f0, v9 |
| ; PWR9BE-NEXT: xsmuldp f1, f0, f1 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v16f64_b: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xxswapd vs0, v2 |
| ; PWR10LE-NEXT: xsmuldp f0, f1, f0 |
| ; PWR10LE-NEXT: xxswapd vs1, v3 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, v2 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v4 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, v3 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v5 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, v4 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v6 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, v5 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v7 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, v6 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v8 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, v7 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10LE-NEXT: xxswapd vs1, v9 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, v8 |
| ; PWR10LE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10LE-NEXT: xsmuldp f1, f0, v9 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v16f64_b: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xsmuldp f0, f1, v2 |
| ; PWR10BE-NEXT: xxswapd vs1, v2 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v3 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, v3 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v4 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, v4 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v5 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, v5 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v6 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, v6 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v7 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, v7 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v8 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, v8 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, f1 |
| ; PWR10BE-NEXT: xxswapd vs1, v9 |
| ; PWR10BE-NEXT: xsmuldp f0, f0, v9 |
| ; PWR10BE-NEXT: xsmuldp f1, f0, f1 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call double @llvm.vector.reduce.fmul.v16f64(double %b, <16 x double> %a) |
| ret double %0 |
| } |
| |
| define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 { |
| ; PWR9LE-LABEL: v16f64_fast: |
| ; PWR9LE: # %bb.0: # %entry |
| ; PWR9LE-NEXT: xvmuldp vs0, v4, v8 |
| ; PWR9LE-NEXT: xvmuldp vs1, v2, v6 |
| ; PWR9LE-NEXT: xvmuldp vs2, v5, v9 |
| ; PWR9LE-NEXT: xvmuldp vs3, v3, v7 |
| ; PWR9LE-NEXT: xvmuldp vs2, vs3, vs2 |
| ; PWR9LE-NEXT: xvmuldp vs0, vs1, vs0 |
| ; PWR9LE-NEXT: xvmuldp vs0, vs0, vs2 |
| ; PWR9LE-NEXT: xxswapd vs1, vs0 |
| ; PWR9LE-NEXT: xvmuldp vs0, vs0, vs1 |
| ; PWR9LE-NEXT: xxswapd vs1, vs0 |
| ; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 |
| ; PWR9LE-NEXT: blr |
| ; |
| ; PWR9BE-LABEL: v16f64_fast: |
| ; PWR9BE: # %bb.0: # %entry |
| ; PWR9BE-NEXT: xvmuldp vs0, v4, v8 |
| ; PWR9BE-NEXT: xvmuldp vs1, v2, v6 |
| ; PWR9BE-NEXT: xvmuldp vs2, v5, v9 |
| ; PWR9BE-NEXT: xvmuldp vs3, v3, v7 |
| ; PWR9BE-NEXT: xvmuldp vs2, vs3, vs2 |
| ; PWR9BE-NEXT: xvmuldp vs0, vs1, vs0 |
| ; PWR9BE-NEXT: xvmuldp vs0, vs0, vs2 |
| ; PWR9BE-NEXT: xxswapd vs1, vs0 |
| ; PWR9BE-NEXT: xvmuldp vs1, vs0, vs1 |
| ; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 |
| ; PWR9BE-NEXT: blr |
| ; |
| ; PWR10LE-LABEL: v16f64_fast: |
| ; PWR10LE: # %bb.0: # %entry |
| ; PWR10LE-NEXT: xvmuldp vs0, v4, v8 |
| ; PWR10LE-NEXT: xvmuldp vs1, v2, v6 |
| ; PWR10LE-NEXT: xvmuldp vs2, v5, v9 |
| ; PWR10LE-NEXT: xvmuldp vs3, v3, v7 |
| ; PWR10LE-NEXT: xvmuldp vs2, vs3, vs2 |
| ; PWR10LE-NEXT: xvmuldp vs0, vs1, vs0 |
| ; PWR10LE-NEXT: xvmuldp vs0, vs0, vs2 |
| ; PWR10LE-NEXT: xxswapd vs1, vs0 |
| ; PWR10LE-NEXT: xvmuldp vs0, vs0, vs1 |
| ; PWR10LE-NEXT: xxswapd vs1, vs0 |
| ; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 |
| ; PWR10LE-NEXT: blr |
| ; |
| ; PWR10BE-LABEL: v16f64_fast: |
| ; PWR10BE: # %bb.0: # %entry |
| ; PWR10BE-NEXT: xvmuldp vs0, v4, v8 |
| ; PWR10BE-NEXT: xvmuldp vs1, v2, v6 |
| ; PWR10BE-NEXT: xvmuldp vs2, v5, v9 |
| ; PWR10BE-NEXT: xvmuldp vs3, v3, v7 |
| ; PWR10BE-NEXT: xvmuldp vs2, vs3, vs2 |
| ; PWR10BE-NEXT: xvmuldp vs0, vs1, vs0 |
| ; PWR10BE-NEXT: xvmuldp vs0, vs0, vs2 |
| ; PWR10BE-NEXT: xxswapd vs1, vs0 |
| ; PWR10BE-NEXT: xvmuldp vs1, vs0, vs1 |
| ; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 |
| ; PWR10BE-NEXT: blr |
| entry: |
| %0 = call fast double @llvm.vector.reduce.fmul.v16f64(double 1.000000e+00, <16 x double> %a) |
| ret double %0 |
| } |
| |
| declare double @llvm.vector.reduce.fmul.v2f64(double, <2 x double>) #0 |
| declare double @llvm.vector.reduce.fmul.v4f64(double, <4 x double>) #0 |
| declare double @llvm.vector.reduce.fmul.v8f64(double, <8 x double>) #0 |
| declare double @llvm.vector.reduce.fmul.v16f64(double, <16 x double>) #0 |
| |
| attributes #0 = { nounwind } |