blob: cbd1fde866f2ad4b579e80e7a2472913858f87cf [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \
; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck --check-prefix=BE64 %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix \
; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck --check-prefix=BE32 %s
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) uwtable
define dso_local signext range(i32 -262144, 262137) i32 @tuh(ptr noundef readonly captures(none) %a) local_unnamed_addr {
; CHECK-LABEL: tuh:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxvd2x vs0, 0, r3
; CHECK-NEXT: vspltish v3, 1
; CHECK-NEXT: xxlxor v4, v4, v4
; CHECK-NEXT: xxswapd v2, vs0
; CHECK-NEXT: vmsumuhm v2, v2, v3, v4
; CHECK-NEXT: xxswapd v3, v2
; CHECK-NEXT: vadduwm v2, v2, v3
; CHECK-NEXT: xxspltw v3, v2, 2
; CHECK-NEXT: vadduwm v2, v2, v3
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: extsw r3, r3
; CHECK-NEXT: blr
;
; BE64-LABEL: tuh:
; BE64: # %bb.0: # %entry
; BE64-NEXT: lxvw4x v3, 0, r3
; BE64-NEXT: vspltish v2, 1
; BE64-NEXT: xxlxor v4, v4, v4
; BE64-NEXT: vmsumuhm v2, v3, v2, v4
; BE64-NEXT: xxswapd v3, v2
; BE64-NEXT: vadduwm v2, v2, v3
; BE64-NEXT: xxspltw v3, v2, 1
; BE64-NEXT: vadduwm v2, v2, v3
; BE64-NEXT: xxsldwi vs0, v2, v2, 3
; BE64-NEXT: mffprwz r3, f0
; BE64-NEXT: extsw r3, r3
; BE64-NEXT: blr
;
; BE32-LABEL: tuh:
; BE32: # %bb.0: # %entry
; BE32-NEXT: lxvw4x v3, 0, r3
; BE32-NEXT: vspltish v2, 1
; BE32-NEXT: xxlxor v4, v4, v4
; BE32-NEXT: addi r3, r1, -16
; BE32-NEXT: vmsumuhm v2, v3, v2, v4
; BE32-NEXT: xxswapd v3, v2
; BE32-NEXT: vadduwm v2, v2, v3
; BE32-NEXT: xxspltw v3, v2, 1
; BE32-NEXT: vadduwm v2, v2, v3
; BE32-NEXT: stxvw4x v2, 0, r3
; BE32-NEXT: lwz r3, -16(r1)
; BE32-NEXT: blr
entry:
%0 = load <8 x i16>, ptr %a, align 2
%conv = zext <8 x i16> %0 to <8 x i32>
%pred = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v8i32(<4 x i32> <i32 0, i32 0, i32 0, i32 0>, <8 x i32> %conv)
%red = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %pred)
ret i32 %red
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) uwtable
define dso_local signext range(i32 -262144, 262137) i32 @tuh2(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b) local_unnamed_addr {
; CHECK-LABEL: tuh2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxvd2x vs0, 0, r3
; CHECK-NEXT: xxlxor v4, v4, v4
; CHECK-NEXT: xxswapd v2, vs0
; CHECK-NEXT: lxvd2x vs0, 0, r4
; CHECK-NEXT: xxswapd v3, vs0
; CHECK-NEXT: vmsumuhm v2, v2, v3, v4
; CHECK-NEXT: xxswapd v3, v2
; CHECK-NEXT: vadduwm v2, v2, v3
; CHECK-NEXT: xxspltw v3, v2, 2
; CHECK-NEXT: vadduwm v2, v2, v3
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: extsw r3, r3
; CHECK-NEXT: blr
;
; BE64-LABEL: tuh2:
; BE64: # %bb.0: # %entry
; BE64-NEXT: lxvw4x v2, 0, r3
; BE64-NEXT: lxvw4x v3, 0, r4
; BE64-NEXT: xxlxor v4, v4, v4
; BE64-NEXT: vmsumuhm v2, v2, v3, v4
; BE64-NEXT: xxswapd v3, v2
; BE64-NEXT: vadduwm v2, v2, v3
; BE64-NEXT: xxspltw v3, v2, 1
; BE64-NEXT: vadduwm v2, v2, v3
; BE64-NEXT: xxsldwi vs0, v2, v2, 3
; BE64-NEXT: mffprwz r3, f0
; BE64-NEXT: extsw r3, r3
; BE64-NEXT: blr
;
; BE32-LABEL: tuh2:
; BE32: # %bb.0: # %entry
; BE32-NEXT: lxvw4x v2, 0, r3
; BE32-NEXT: lxvw4x v3, 0, r4
; BE32-NEXT: xxlxor v4, v4, v4
; BE32-NEXT: addi r3, r1, -16
; BE32-NEXT: vmsumuhm v2, v2, v3, v4
; BE32-NEXT: xxswapd v3, v2
; BE32-NEXT: vadduwm v2, v2, v3
; BE32-NEXT: xxspltw v3, v2, 1
; BE32-NEXT: vadduwm v2, v2, v3
; BE32-NEXT: stxvw4x v2, 0, r3
; BE32-NEXT: lwz r3, -16(r1)
; BE32-NEXT: blr
entry:
%0 = load <8 x i16>, ptr %a, align 2
%conv = zext <8 x i16> %0 to <8 x i32>
%lb = load <8 x i16>, ptr %b, align 2
%conv2 = zext <8 x i16> %lb to <8 x i32>
%mul = mul <8 x i32> %conv, %conv2
%pred = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v8i32(<4 x i32> <i32 0, i32 0, i32 0, i32 0>, <8 x i32> %mul)
%red = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %pred)
ret i32 %red
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) uwtable
define dso_local signext range(i32 -262144, 262137) i32 @tsh(ptr noundef readonly captures(none) %a) local_unnamed_addr {
; CHECK-LABEL: tsh:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxvd2x vs0, 0, r3
; CHECK-NEXT: vspltish v3, 1
; CHECK-NEXT: xxlxor v4, v4, v4
; CHECK-NEXT: xxswapd v2, vs0
; CHECK-NEXT: vmsumshm v2, v2, v3, v4
; CHECK-NEXT: xxswapd v3, v2
; CHECK-NEXT: vadduwm v2, v2, v3
; CHECK-NEXT: xxspltw v3, v2, 2
; CHECK-NEXT: vadduwm v2, v2, v3
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: extsw r3, r3
; CHECK-NEXT: blr
;
; BE64-LABEL: tsh:
; BE64: # %bb.0: # %entry
; BE64-NEXT: lxvw4x v3, 0, r3
; BE64-NEXT: vspltish v2, 1
; BE64-NEXT: xxlxor v4, v4, v4
; BE64-NEXT: vmsumshm v2, v3, v2, v4
; BE64-NEXT: xxswapd v3, v2
; BE64-NEXT: vadduwm v2, v2, v3
; BE64-NEXT: xxspltw v3, v2, 1
; BE64-NEXT: vadduwm v2, v2, v3
; BE64-NEXT: xxsldwi vs0, v2, v2, 3
; BE64-NEXT: mffprwz r3, f0
; BE64-NEXT: extsw r3, r3
; BE64-NEXT: blr
;
; BE32-LABEL: tsh:
; BE32: # %bb.0: # %entry
; BE32-NEXT: lxvw4x v3, 0, r3
; BE32-NEXT: vspltish v2, 1
; BE32-NEXT: xxlxor v4, v4, v4
; BE32-NEXT: addi r3, r1, -16
; BE32-NEXT: vmsumshm v2, v3, v2, v4
; BE32-NEXT: xxswapd v3, v2
; BE32-NEXT: vadduwm v2, v2, v3
; BE32-NEXT: xxspltw v3, v2, 1
; BE32-NEXT: vadduwm v2, v2, v3
; BE32-NEXT: stxvw4x v2, 0, r3
; BE32-NEXT: lwz r3, -16(r1)
; BE32-NEXT: blr
entry:
%0 = load <8 x i16>, ptr %a, align 2
%conv = sext <8 x i16> %0 to <8 x i32>
%pred = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v8i32(<4 x i32> <i32 0, i32 0, i32 0, i32 0>, <8 x i32> %conv)
%red = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %pred)
ret i32 %red
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) uwtable
define dso_local signext range(i32 -262144, 262137) i32 @tsh2(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b) local_unnamed_addr {
; CHECK-LABEL: tsh2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxvd2x vs0, 0, r3
; CHECK-NEXT: xxlxor v4, v4, v4
; CHECK-NEXT: xxswapd v2, vs0
; CHECK-NEXT: lxvd2x vs0, 0, r4
; CHECK-NEXT: xxswapd v3, vs0
; CHECK-NEXT: vmsumshm v2, v2, v3, v4
; CHECK-NEXT: xxswapd v3, v2
; CHECK-NEXT: vadduwm v2, v2, v3
; CHECK-NEXT: xxspltw v3, v2, 2
; CHECK-NEXT: vadduwm v2, v2, v3
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: extsw r3, r3
; CHECK-NEXT: blr
;
; BE64-LABEL: tsh2:
; BE64: # %bb.0: # %entry
; BE64-NEXT: lxvw4x v2, 0, r3
; BE64-NEXT: lxvw4x v3, 0, r4
; BE64-NEXT: xxlxor v4, v4, v4
; BE64-NEXT: vmsumshm v2, v2, v3, v4
; BE64-NEXT: xxswapd v3, v2
; BE64-NEXT: vadduwm v2, v2, v3
; BE64-NEXT: xxspltw v3, v2, 1
; BE64-NEXT: vadduwm v2, v2, v3
; BE64-NEXT: xxsldwi vs0, v2, v2, 3
; BE64-NEXT: mffprwz r3, f0
; BE64-NEXT: extsw r3, r3
; BE64-NEXT: blr
;
; BE32-LABEL: tsh2:
; BE32: # %bb.0: # %entry
; BE32-NEXT: lxvw4x v2, 0, r3
; BE32-NEXT: lxvw4x v3, 0, r4
; BE32-NEXT: xxlxor v4, v4, v4
; BE32-NEXT: addi r3, r1, -16
; BE32-NEXT: vmsumshm v2, v2, v3, v4
; BE32-NEXT: xxswapd v3, v2
; BE32-NEXT: vadduwm v2, v2, v3
; BE32-NEXT: xxspltw v3, v2, 1
; BE32-NEXT: vadduwm v2, v2, v3
; BE32-NEXT: stxvw4x v2, 0, r3
; BE32-NEXT: lwz r3, -16(r1)
; BE32-NEXT: blr
entry:
%0 = load <8 x i16>, ptr %a, align 2
%conv = sext <8 x i16> %0 to <8 x i32>
%lb = load <8 x i16>, ptr %b, align 2
%conv2 = sext <8 x i16> %lb to <8 x i32>
%mul = mul <8 x i32> %conv, %conv2
%pred = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v8i32(<4 x i32> <i32 0, i32 0, i32 0, i32 0>, <8 x i32> %mul)
%red = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %pred)
ret i32 %red
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) uwtable
define dso_local signext range(i32 -262144, 262137) i32 @tub(ptr noundef readonly captures(none) %a) local_unnamed_addr {
; CHECK-LABEL: tub:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxvd2x vs0, 0, r3
; CHECK-NEXT: vspltisb v3, 1
; CHECK-NEXT: xxlxor v4, v4, v4
; CHECK-NEXT: xxswapd v2, vs0
; CHECK-NEXT: vmsumubm v2, v2, v3, v4
; CHECK-NEXT: xxswapd v3, v2
; CHECK-NEXT: vadduwm v2, v2, v3
; CHECK-NEXT: xxspltw v3, v2, 2
; CHECK-NEXT: vadduwm v2, v2, v3
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: extsw r3, r3
; CHECK-NEXT: blr
;
; BE64-LABEL: tub:
; BE64: # %bb.0: # %entry
; BE64-NEXT: lxvw4x v3, 0, r3
; BE64-NEXT: vspltisb v2, 1
; BE64-NEXT: xxlxor v4, v4, v4
; BE64-NEXT: vmsumubm v2, v3, v2, v4
; BE64-NEXT: xxswapd v3, v2
; BE64-NEXT: vadduwm v2, v2, v3
; BE64-NEXT: xxspltw v3, v2, 1
; BE64-NEXT: vadduwm v2, v2, v3
; BE64-NEXT: xxsldwi vs0, v2, v2, 3
; BE64-NEXT: mffprwz r3, f0
; BE64-NEXT: extsw r3, r3
; BE64-NEXT: blr
;
; BE32-LABEL: tub:
; BE32: # %bb.0: # %entry
; BE32-NEXT: lxvw4x v3, 0, r3
; BE32-NEXT: vspltisb v2, 1
; BE32-NEXT: xxlxor v4, v4, v4
; BE32-NEXT: addi r3, r1, -16
; BE32-NEXT: vmsumubm v2, v3, v2, v4
; BE32-NEXT: xxswapd v3, v2
; BE32-NEXT: vadduwm v2, v2, v3
; BE32-NEXT: xxspltw v3, v2, 1
; BE32-NEXT: vadduwm v2, v2, v3
; BE32-NEXT: stxvw4x v2, 0, r3
; BE32-NEXT: lwz r3, -16(r1)
; BE32-NEXT: blr
entry:
%0 = load <16 x i8>, ptr %a, align 2
%conv = zext <16 x i8> %0 to <16 x i32>
%pred = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v16i32(<4 x i32> <i32 0, i32 0, i32 0, i32 0>, <16 x i32> %conv)
%red = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %pred)
ret i32 %red
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) uwtable
define dso_local signext range(i32 -262144, 262137) i32 @tub2(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b) local_unnamed_addr {
; CHECK-LABEL: tub2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxvd2x vs0, 0, r3
; CHECK-NEXT: xxlxor v4, v4, v4
; CHECK-NEXT: xxswapd v2, vs0
; CHECK-NEXT: lxvd2x vs0, 0, r4
; CHECK-NEXT: xxswapd v3, vs0
; CHECK-NEXT: vmsumubm v2, v2, v3, v4
; CHECK-NEXT: xxswapd v3, v2
; CHECK-NEXT: vadduwm v2, v2, v3
; CHECK-NEXT: xxspltw v3, v2, 2
; CHECK-NEXT: vadduwm v2, v2, v3
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: extsw r3, r3
; CHECK-NEXT: blr
;
; BE64-LABEL: tub2:
; BE64: # %bb.0: # %entry
; BE64-NEXT: lxvw4x v2, 0, r3
; BE64-NEXT: lxvw4x v3, 0, r4
; BE64-NEXT: xxlxor v4, v4, v4
; BE64-NEXT: vmsumubm v2, v2, v3, v4
; BE64-NEXT: xxswapd v3, v2
; BE64-NEXT: vadduwm v2, v2, v3
; BE64-NEXT: xxspltw v3, v2, 1
; BE64-NEXT: vadduwm v2, v2, v3
; BE64-NEXT: xxsldwi vs0, v2, v2, 3
; BE64-NEXT: mffprwz r3, f0
; BE64-NEXT: extsw r3, r3
; BE64-NEXT: blr
;
; BE32-LABEL: tub2:
; BE32: # %bb.0: # %entry
; BE32-NEXT: lxvw4x v2, 0, r3
; BE32-NEXT: lxvw4x v3, 0, r4
; BE32-NEXT: xxlxor v4, v4, v4
; BE32-NEXT: addi r3, r1, -16
; BE32-NEXT: vmsumubm v2, v2, v3, v4
; BE32-NEXT: xxswapd v3, v2
; BE32-NEXT: vadduwm v2, v2, v3
; BE32-NEXT: xxspltw v3, v2, 1
; BE32-NEXT: vadduwm v2, v2, v3
; BE32-NEXT: stxvw4x v2, 0, r3
; BE32-NEXT: lwz r3, -16(r1)
; BE32-NEXT: blr
entry:
%0 = load <16 x i8>, ptr %a, align 2
%conv = zext <16 x i8> %0 to <16 x i32>
%lb = load <16 x i8>, ptr %b, align 2
%conv2 = zext <16 x i8> %lb to <16 x i32>
%mul = mul <16 x i32> %conv, %conv2
%pred = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v16i32(<4 x i32> <i32 0, i32 0, i32 0, i32 0>, <16 x i32> %mul)
%red = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %pred)
ret i32 %red
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) uwtable
define dso_local signext range(i32 -262144, 262137) i32 @tsb(ptr noundef readonly captures(none) %a) local_unnamed_addr {
; CHECK-LABEL: tsb:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxvd2x vs0, 0, r3
; CHECK-NEXT: vspltisb v3, 1
; CHECK-NEXT: xxlxor v4, v4, v4
; CHECK-NEXT: xxswapd v2, vs0
; CHECK-NEXT: vmsummbm v2, v2, v3, v4
; CHECK-NEXT: xxswapd v3, v2
; CHECK-NEXT: vadduwm v2, v2, v3
; CHECK-NEXT: xxspltw v3, v2, 2
; CHECK-NEXT: vadduwm v2, v2, v3
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: extsw r3, r3
; CHECK-NEXT: blr
;
; BE64-LABEL: tsb:
; BE64: # %bb.0: # %entry
; BE64-NEXT: lxvw4x v3, 0, r3
; BE64-NEXT: vspltisb v2, 1
; BE64-NEXT: xxlxor v4, v4, v4
; BE64-NEXT: vmsummbm v2, v3, v2, v4
; BE64-NEXT: xxswapd v3, v2
; BE64-NEXT: vadduwm v2, v2, v3
; BE64-NEXT: xxspltw v3, v2, 1
; BE64-NEXT: vadduwm v2, v2, v3
; BE64-NEXT: xxsldwi vs0, v2, v2, 3
; BE64-NEXT: mffprwz r3, f0
; BE64-NEXT: extsw r3, r3
; BE64-NEXT: blr
;
; BE32-LABEL: tsb:
; BE32: # %bb.0: # %entry
; BE32-NEXT: lxvw4x v3, 0, r3
; BE32-NEXT: vspltisb v2, 1
; BE32-NEXT: xxlxor v4, v4, v4
; BE32-NEXT: addi r3, r1, -16
; BE32-NEXT: vmsummbm v2, v3, v2, v4
; BE32-NEXT: xxswapd v3, v2
; BE32-NEXT: vadduwm v2, v2, v3
; BE32-NEXT: xxspltw v3, v2, 1
; BE32-NEXT: vadduwm v2, v2, v3
; BE32-NEXT: stxvw4x v2, 0, r3
; BE32-NEXT: lwz r3, -16(r1)
; BE32-NEXT: blr
entry:
%0 = load <16 x i8>, ptr %a, align 2
%conv = sext <16 x i8> %0 to <16 x i32>
%pred = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v16i32(<4 x i32> <i32 0, i32 0, i32 0, i32 0>, <16 x i32> %conv)
%red = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %pred)
ret i32 %red
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) uwtable
define dso_local signext range(i32 -262144, 262137) i32 @tsb2(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b) local_unnamed_addr {
; CHECK-LABEL: tsb2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxvd2x vs0, 0, r3
; CHECK-NEXT: xxlxor v4, v4, v4
; CHECK-NEXT: xxswapd v2, vs0
; CHECK-NEXT: lxvd2x vs0, 0, r4
; CHECK-NEXT: xxswapd v3, vs0
; CHECK-NEXT: vmsummbm v2, v2, v3, v4
; CHECK-NEXT: xxswapd v3, v2
; CHECK-NEXT: vadduwm v2, v2, v3
; CHECK-NEXT: xxspltw v3, v2, 2
; CHECK-NEXT: vadduwm v2, v2, v3
; CHECK-NEXT: xxswapd vs0, v2
; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: extsw r3, r3
; CHECK-NEXT: blr
;
; BE64-LABEL: tsb2:
; BE64: # %bb.0: # %entry
; BE64-NEXT: lxvw4x v2, 0, r3
; BE64-NEXT: lxvw4x v3, 0, r4
; BE64-NEXT: xxlxor v4, v4, v4
; BE64-NEXT: vmsummbm v2, v2, v3, v4
; BE64-NEXT: xxswapd v3, v2
; BE64-NEXT: vadduwm v2, v2, v3
; BE64-NEXT: xxspltw v3, v2, 1
; BE64-NEXT: vadduwm v2, v2, v3
; BE64-NEXT: xxsldwi vs0, v2, v2, 3
; BE64-NEXT: mffprwz r3, f0
; BE64-NEXT: extsw r3, r3
; BE64-NEXT: blr
;
; BE32-LABEL: tsb2:
; BE32: # %bb.0: # %entry
; BE32-NEXT: lxvw4x v2, 0, r3
; BE32-NEXT: lxvw4x v3, 0, r4
; BE32-NEXT: xxlxor v4, v4, v4
; BE32-NEXT: addi r3, r1, -16
; BE32-NEXT: vmsummbm v2, v2, v3, v4
; BE32-NEXT: xxswapd v3, v2
; BE32-NEXT: vadduwm v2, v2, v3
; BE32-NEXT: xxspltw v3, v2, 1
; BE32-NEXT: vadduwm v2, v2, v3
; BE32-NEXT: stxvw4x v2, 0, r3
; BE32-NEXT: lwz r3, -16(r1)
; BE32-NEXT: blr
entry:
%0 = load <16 x i8>, ptr %a, align 2
%conv = sext <16 x i8> %0 to <16 x i32>
%lb = load <16 x i8>, ptr %b, align 2
%conv2 = zext <16 x i8> %lb to <16 x i32>
%mul = mul <16 x i32> %conv, %conv2
%pred = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v16i32(<4 x i32> <i32 0, i32 0, i32 0, i32 0>, <16 x i32> %mul)
%red = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %pred)
ret i32 %red
}