| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ |
| ; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names \ |
| ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s |
| ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \ |
| ; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names \ |
| ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck --check-prefix=BE64 %s |
| ; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix \ |
| ; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names \ |
| ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck --check-prefix=BE32 %s |
| |
| ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) uwtable |
| define dso_local signext range(i32 -262144, 262137) i32 @tuh(ptr noundef readonly captures(none) %a) local_unnamed_addr { |
| ; CHECK-LABEL: tuh: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: lxvd2x vs0, 0, r3 |
| ; CHECK-NEXT: vspltish v3, 1 |
| ; CHECK-NEXT: xxlxor v4, v4, v4 |
| ; CHECK-NEXT: xxswapd v2, vs0 |
| ; CHECK-NEXT: vmsumuhm v2, v2, v3, v4 |
| ; CHECK-NEXT: xxswapd v3, v2 |
| ; CHECK-NEXT: vadduwm v2, v2, v3 |
| ; CHECK-NEXT: xxspltw v3, v2, 2 |
| ; CHECK-NEXT: vadduwm v2, v2, v3 |
| ; CHECK-NEXT: xxswapd vs0, v2 |
| ; CHECK-NEXT: mffprwz r3, f0 |
| ; CHECK-NEXT: extsw r3, r3 |
| ; CHECK-NEXT: blr |
| ; |
| ; BE64-LABEL: tuh: |
| ; BE64: # %bb.0: # %entry |
| ; BE64-NEXT: lxvw4x v3, 0, r3 |
| ; BE64-NEXT: vspltish v2, 1 |
| ; BE64-NEXT: xxlxor v4, v4, v4 |
| ; BE64-NEXT: vmsumuhm v2, v3, v2, v4 |
| ; BE64-NEXT: xxswapd v3, v2 |
| ; BE64-NEXT: vadduwm v2, v2, v3 |
| ; BE64-NEXT: xxspltw v3, v2, 1 |
| ; BE64-NEXT: vadduwm v2, v2, v3 |
| ; BE64-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; BE64-NEXT: mffprwz r3, f0 |
| ; BE64-NEXT: extsw r3, r3 |
| ; BE64-NEXT: blr |
| ; |
| ; BE32-LABEL: tuh: |
| ; BE32: # %bb.0: # %entry |
| ; BE32-NEXT: lxvw4x v3, 0, r3 |
| ; BE32-NEXT: vspltish v2, 1 |
| ; BE32-NEXT: xxlxor v4, v4, v4 |
| ; BE32-NEXT: addi r3, r1, -16 |
| ; BE32-NEXT: vmsumuhm v2, v3, v2, v4 |
| ; BE32-NEXT: xxswapd v3, v2 |
| ; BE32-NEXT: vadduwm v2, v2, v3 |
| ; BE32-NEXT: xxspltw v3, v2, 1 |
| ; BE32-NEXT: vadduwm v2, v2, v3 |
| ; BE32-NEXT: stxvw4x v2, 0, r3 |
| ; BE32-NEXT: lwz r3, -16(r1) |
| ; BE32-NEXT: blr |
| entry: |
| %0 = load <8 x i16>, ptr %a, align 2 |
| %conv = zext <8 x i16> %0 to <8 x i32> |
| %pred = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v8i32(<4 x i32> <i32 0, i32 0, i32 0, i32 0>, <8 x i32> %conv) |
| %red = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %pred) |
| ret i32 %red |
| } |
| |
| ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) uwtable |
| define dso_local signext range(i32 -262144, 262137) i32 @tuh2(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b) local_unnamed_addr { |
| ; CHECK-LABEL: tuh2: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: lxvd2x vs0, 0, r3 |
| ; CHECK-NEXT: xxlxor v4, v4, v4 |
| ; CHECK-NEXT: xxswapd v2, vs0 |
| ; CHECK-NEXT: lxvd2x vs0, 0, r4 |
| ; CHECK-NEXT: xxswapd v3, vs0 |
| ; CHECK-NEXT: vmsumuhm v2, v2, v3, v4 |
| ; CHECK-NEXT: xxswapd v3, v2 |
| ; CHECK-NEXT: vadduwm v2, v2, v3 |
| ; CHECK-NEXT: xxspltw v3, v2, 2 |
| ; CHECK-NEXT: vadduwm v2, v2, v3 |
| ; CHECK-NEXT: xxswapd vs0, v2 |
| ; CHECK-NEXT: mffprwz r3, f0 |
| ; CHECK-NEXT: extsw r3, r3 |
| ; CHECK-NEXT: blr |
| ; |
| ; BE64-LABEL: tuh2: |
| ; BE64: # %bb.0: # %entry |
| ; BE64-NEXT: lxvw4x v2, 0, r3 |
| ; BE64-NEXT: lxvw4x v3, 0, r4 |
| ; BE64-NEXT: xxlxor v4, v4, v4 |
| ; BE64-NEXT: vmsumuhm v2, v2, v3, v4 |
| ; BE64-NEXT: xxswapd v3, v2 |
| ; BE64-NEXT: vadduwm v2, v2, v3 |
| ; BE64-NEXT: xxspltw v3, v2, 1 |
| ; BE64-NEXT: vadduwm v2, v2, v3 |
| ; BE64-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; BE64-NEXT: mffprwz r3, f0 |
| ; BE64-NEXT: extsw r3, r3 |
| ; BE64-NEXT: blr |
| ; |
| ; BE32-LABEL: tuh2: |
| ; BE32: # %bb.0: # %entry |
| ; BE32-NEXT: lxvw4x v2, 0, r3 |
| ; BE32-NEXT: lxvw4x v3, 0, r4 |
| ; BE32-NEXT: xxlxor v4, v4, v4 |
| ; BE32-NEXT: addi r3, r1, -16 |
| ; BE32-NEXT: vmsumuhm v2, v2, v3, v4 |
| ; BE32-NEXT: xxswapd v3, v2 |
| ; BE32-NEXT: vadduwm v2, v2, v3 |
| ; BE32-NEXT: xxspltw v3, v2, 1 |
| ; BE32-NEXT: vadduwm v2, v2, v3 |
| ; BE32-NEXT: stxvw4x v2, 0, r3 |
| ; BE32-NEXT: lwz r3, -16(r1) |
| ; BE32-NEXT: blr |
| entry: |
| %0 = load <8 x i16>, ptr %a, align 2 |
| %conv = zext <8 x i16> %0 to <8 x i32> |
| %lb = load <8 x i16>, ptr %b, align 2 |
| %conv2 = zext <8 x i16> %lb to <8 x i32> |
| %mul = mul <8 x i32> %conv, %conv2 |
| %pred = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v8i32(<4 x i32> <i32 0, i32 0, i32 0, i32 0>, <8 x i32> %mul) |
| %red = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %pred) |
| ret i32 %red |
| } |
| |
| ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) uwtable |
| define dso_local signext range(i32 -262144, 262137) i32 @tsh(ptr noundef readonly captures(none) %a) local_unnamed_addr { |
| ; CHECK-LABEL: tsh: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: lxvd2x vs0, 0, r3 |
| ; CHECK-NEXT: vspltish v3, 1 |
| ; CHECK-NEXT: xxlxor v4, v4, v4 |
| ; CHECK-NEXT: xxswapd v2, vs0 |
| ; CHECK-NEXT: vmsumshm v2, v2, v3, v4 |
| ; CHECK-NEXT: xxswapd v3, v2 |
| ; CHECK-NEXT: vadduwm v2, v2, v3 |
| ; CHECK-NEXT: xxspltw v3, v2, 2 |
| ; CHECK-NEXT: vadduwm v2, v2, v3 |
| ; CHECK-NEXT: xxswapd vs0, v2 |
| ; CHECK-NEXT: mffprwz r3, f0 |
| ; CHECK-NEXT: extsw r3, r3 |
| ; CHECK-NEXT: blr |
| ; |
| ; BE64-LABEL: tsh: |
| ; BE64: # %bb.0: # %entry |
| ; BE64-NEXT: lxvw4x v3, 0, r3 |
| ; BE64-NEXT: vspltish v2, 1 |
| ; BE64-NEXT: xxlxor v4, v4, v4 |
| ; BE64-NEXT: vmsumshm v2, v3, v2, v4 |
| ; BE64-NEXT: xxswapd v3, v2 |
| ; BE64-NEXT: vadduwm v2, v2, v3 |
| ; BE64-NEXT: xxspltw v3, v2, 1 |
| ; BE64-NEXT: vadduwm v2, v2, v3 |
| ; BE64-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; BE64-NEXT: mffprwz r3, f0 |
| ; BE64-NEXT: extsw r3, r3 |
| ; BE64-NEXT: blr |
| ; |
| ; BE32-LABEL: tsh: |
| ; BE32: # %bb.0: # %entry |
| ; BE32-NEXT: lxvw4x v3, 0, r3 |
| ; BE32-NEXT: vspltish v2, 1 |
| ; BE32-NEXT: xxlxor v4, v4, v4 |
| ; BE32-NEXT: addi r3, r1, -16 |
| ; BE32-NEXT: vmsumshm v2, v3, v2, v4 |
| ; BE32-NEXT: xxswapd v3, v2 |
| ; BE32-NEXT: vadduwm v2, v2, v3 |
| ; BE32-NEXT: xxspltw v3, v2, 1 |
| ; BE32-NEXT: vadduwm v2, v2, v3 |
| ; BE32-NEXT: stxvw4x v2, 0, r3 |
| ; BE32-NEXT: lwz r3, -16(r1) |
| ; BE32-NEXT: blr |
| entry: |
| %0 = load <8 x i16>, ptr %a, align 2 |
| %conv = sext <8 x i16> %0 to <8 x i32> |
| %pred = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v8i32(<4 x i32> <i32 0, i32 0, i32 0, i32 0>, <8 x i32> %conv) |
| %red = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %pred) |
| ret i32 %red |
| } |
| |
| ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) uwtable |
| define dso_local signext range(i32 -262144, 262137) i32 @tsh2(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b) local_unnamed_addr { |
| ; CHECK-LABEL: tsh2: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: lxvd2x vs0, 0, r3 |
| ; CHECK-NEXT: xxlxor v4, v4, v4 |
| ; CHECK-NEXT: xxswapd v2, vs0 |
| ; CHECK-NEXT: lxvd2x vs0, 0, r4 |
| ; CHECK-NEXT: xxswapd v3, vs0 |
| ; CHECK-NEXT: vmsumshm v2, v2, v3, v4 |
| ; CHECK-NEXT: xxswapd v3, v2 |
| ; CHECK-NEXT: vadduwm v2, v2, v3 |
| ; CHECK-NEXT: xxspltw v3, v2, 2 |
| ; CHECK-NEXT: vadduwm v2, v2, v3 |
| ; CHECK-NEXT: xxswapd vs0, v2 |
| ; CHECK-NEXT: mffprwz r3, f0 |
| ; CHECK-NEXT: extsw r3, r3 |
| ; CHECK-NEXT: blr |
| ; |
| ; BE64-LABEL: tsh2: |
| ; BE64: # %bb.0: # %entry |
| ; BE64-NEXT: lxvw4x v2, 0, r3 |
| ; BE64-NEXT: lxvw4x v3, 0, r4 |
| ; BE64-NEXT: xxlxor v4, v4, v4 |
| ; BE64-NEXT: vmsumshm v2, v2, v3, v4 |
| ; BE64-NEXT: xxswapd v3, v2 |
| ; BE64-NEXT: vadduwm v2, v2, v3 |
| ; BE64-NEXT: xxspltw v3, v2, 1 |
| ; BE64-NEXT: vadduwm v2, v2, v3 |
| ; BE64-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; BE64-NEXT: mffprwz r3, f0 |
| ; BE64-NEXT: extsw r3, r3 |
| ; BE64-NEXT: blr |
| ; |
| ; BE32-LABEL: tsh2: |
| ; BE32: # %bb.0: # %entry |
| ; BE32-NEXT: lxvw4x v2, 0, r3 |
| ; BE32-NEXT: lxvw4x v3, 0, r4 |
| ; BE32-NEXT: xxlxor v4, v4, v4 |
| ; BE32-NEXT: addi r3, r1, -16 |
| ; BE32-NEXT: vmsumshm v2, v2, v3, v4 |
| ; BE32-NEXT: xxswapd v3, v2 |
| ; BE32-NEXT: vadduwm v2, v2, v3 |
| ; BE32-NEXT: xxspltw v3, v2, 1 |
| ; BE32-NEXT: vadduwm v2, v2, v3 |
| ; BE32-NEXT: stxvw4x v2, 0, r3 |
| ; BE32-NEXT: lwz r3, -16(r1) |
| ; BE32-NEXT: blr |
| entry: |
| %0 = load <8 x i16>, ptr %a, align 2 |
| %conv = sext <8 x i16> %0 to <8 x i32> |
| %lb = load <8 x i16>, ptr %b, align 2 |
| %conv2 = sext <8 x i16> %lb to <8 x i32> |
| %mul = mul <8 x i32> %conv, %conv2 |
| %pred = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v8i32(<4 x i32> <i32 0, i32 0, i32 0, i32 0>, <8 x i32> %mul) |
| %red = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %pred) |
| ret i32 %red |
| } |
| |
| ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) uwtable |
| define dso_local signext range(i32 -262144, 262137) i32 @tub(ptr noundef readonly captures(none) %a) local_unnamed_addr { |
| ; CHECK-LABEL: tub: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: lxvd2x vs0, 0, r3 |
| ; CHECK-NEXT: vspltisb v3, 1 |
| ; CHECK-NEXT: xxlxor v4, v4, v4 |
| ; CHECK-NEXT: xxswapd v2, vs0 |
| ; CHECK-NEXT: vmsumubm v2, v2, v3, v4 |
| ; CHECK-NEXT: xxswapd v3, v2 |
| ; CHECK-NEXT: vadduwm v2, v2, v3 |
| ; CHECK-NEXT: xxspltw v3, v2, 2 |
| ; CHECK-NEXT: vadduwm v2, v2, v3 |
| ; CHECK-NEXT: xxswapd vs0, v2 |
| ; CHECK-NEXT: mffprwz r3, f0 |
| ; CHECK-NEXT: extsw r3, r3 |
| ; CHECK-NEXT: blr |
| ; |
| ; BE64-LABEL: tub: |
| ; BE64: # %bb.0: # %entry |
| ; BE64-NEXT: lxvw4x v3, 0, r3 |
| ; BE64-NEXT: vspltisb v2, 1 |
| ; BE64-NEXT: xxlxor v4, v4, v4 |
| ; BE64-NEXT: vmsumubm v2, v3, v2, v4 |
| ; BE64-NEXT: xxswapd v3, v2 |
| ; BE64-NEXT: vadduwm v2, v2, v3 |
| ; BE64-NEXT: xxspltw v3, v2, 1 |
| ; BE64-NEXT: vadduwm v2, v2, v3 |
| ; BE64-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; BE64-NEXT: mffprwz r3, f0 |
| ; BE64-NEXT: extsw r3, r3 |
| ; BE64-NEXT: blr |
| ; |
| ; BE32-LABEL: tub: |
| ; BE32: # %bb.0: # %entry |
| ; BE32-NEXT: lxvw4x v3, 0, r3 |
| ; BE32-NEXT: vspltisb v2, 1 |
| ; BE32-NEXT: xxlxor v4, v4, v4 |
| ; BE32-NEXT: addi r3, r1, -16 |
| ; BE32-NEXT: vmsumubm v2, v3, v2, v4 |
| ; BE32-NEXT: xxswapd v3, v2 |
| ; BE32-NEXT: vadduwm v2, v2, v3 |
| ; BE32-NEXT: xxspltw v3, v2, 1 |
| ; BE32-NEXT: vadduwm v2, v2, v3 |
| ; BE32-NEXT: stxvw4x v2, 0, r3 |
| ; BE32-NEXT: lwz r3, -16(r1) |
| ; BE32-NEXT: blr |
| entry: |
| %0 = load <16 x i8>, ptr %a, align 2 |
| %conv = zext <16 x i8> %0 to <16 x i32> |
| %pred = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v16i32(<4 x i32> <i32 0, i32 0, i32 0, i32 0>, <16 x i32> %conv) |
| %red = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %pred) |
| ret i32 %red |
| } |
| |
| ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) uwtable |
| define dso_local signext range(i32 -262144, 262137) i32 @tub2(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b) local_unnamed_addr { |
| ; CHECK-LABEL: tub2: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: lxvd2x vs0, 0, r3 |
| ; CHECK-NEXT: xxlxor v4, v4, v4 |
| ; CHECK-NEXT: xxswapd v2, vs0 |
| ; CHECK-NEXT: lxvd2x vs0, 0, r4 |
| ; CHECK-NEXT: xxswapd v3, vs0 |
| ; CHECK-NEXT: vmsumubm v2, v2, v3, v4 |
| ; CHECK-NEXT: xxswapd v3, v2 |
| ; CHECK-NEXT: vadduwm v2, v2, v3 |
| ; CHECK-NEXT: xxspltw v3, v2, 2 |
| ; CHECK-NEXT: vadduwm v2, v2, v3 |
| ; CHECK-NEXT: xxswapd vs0, v2 |
| ; CHECK-NEXT: mffprwz r3, f0 |
| ; CHECK-NEXT: extsw r3, r3 |
| ; CHECK-NEXT: blr |
| ; |
| ; BE64-LABEL: tub2: |
| ; BE64: # %bb.0: # %entry |
| ; BE64-NEXT: lxvw4x v2, 0, r3 |
| ; BE64-NEXT: lxvw4x v3, 0, r4 |
| ; BE64-NEXT: xxlxor v4, v4, v4 |
| ; BE64-NEXT: vmsumubm v2, v2, v3, v4 |
| ; BE64-NEXT: xxswapd v3, v2 |
| ; BE64-NEXT: vadduwm v2, v2, v3 |
| ; BE64-NEXT: xxspltw v3, v2, 1 |
| ; BE64-NEXT: vadduwm v2, v2, v3 |
| ; BE64-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; BE64-NEXT: mffprwz r3, f0 |
| ; BE64-NEXT: extsw r3, r3 |
| ; BE64-NEXT: blr |
| ; |
| ; BE32-LABEL: tub2: |
| ; BE32: # %bb.0: # %entry |
| ; BE32-NEXT: lxvw4x v2, 0, r3 |
| ; BE32-NEXT: lxvw4x v3, 0, r4 |
| ; BE32-NEXT: xxlxor v4, v4, v4 |
| ; BE32-NEXT: addi r3, r1, -16 |
| ; BE32-NEXT: vmsumubm v2, v2, v3, v4 |
| ; BE32-NEXT: xxswapd v3, v2 |
| ; BE32-NEXT: vadduwm v2, v2, v3 |
| ; BE32-NEXT: xxspltw v3, v2, 1 |
| ; BE32-NEXT: vadduwm v2, v2, v3 |
| ; BE32-NEXT: stxvw4x v2, 0, r3 |
| ; BE32-NEXT: lwz r3, -16(r1) |
| ; BE32-NEXT: blr |
| entry: |
| %0 = load <16 x i8>, ptr %a, align 2 |
| %conv = zext <16 x i8> %0 to <16 x i32> |
| %lb = load <16 x i8>, ptr %b, align 2 |
| %conv2 = zext <16 x i8> %lb to <16 x i32> |
| %mul = mul <16 x i32> %conv, %conv2 |
| %pred = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v16i32(<4 x i32> <i32 0, i32 0, i32 0, i32 0>, <16 x i32> %mul) |
| %red = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %pred) |
| ret i32 %red |
| } |
| |
| ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) uwtable |
| define dso_local signext range(i32 -262144, 262137) i32 @tsb(ptr noundef readonly captures(none) %a) local_unnamed_addr { |
| ; CHECK-LABEL: tsb: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: lxvd2x vs0, 0, r3 |
| ; CHECK-NEXT: vspltisb v3, 1 |
| ; CHECK-NEXT: xxlxor v4, v4, v4 |
| ; CHECK-NEXT: xxswapd v2, vs0 |
| ; CHECK-NEXT: vmsummbm v2, v2, v3, v4 |
| ; CHECK-NEXT: xxswapd v3, v2 |
| ; CHECK-NEXT: vadduwm v2, v2, v3 |
| ; CHECK-NEXT: xxspltw v3, v2, 2 |
| ; CHECK-NEXT: vadduwm v2, v2, v3 |
| ; CHECK-NEXT: xxswapd vs0, v2 |
| ; CHECK-NEXT: mffprwz r3, f0 |
| ; CHECK-NEXT: extsw r3, r3 |
| ; CHECK-NEXT: blr |
| ; |
| ; BE64-LABEL: tsb: |
| ; BE64: # %bb.0: # %entry |
| ; BE64-NEXT: lxvw4x v3, 0, r3 |
| ; BE64-NEXT: vspltisb v2, 1 |
| ; BE64-NEXT: xxlxor v4, v4, v4 |
| ; BE64-NEXT: vmsummbm v2, v3, v2, v4 |
| ; BE64-NEXT: xxswapd v3, v2 |
| ; BE64-NEXT: vadduwm v2, v2, v3 |
| ; BE64-NEXT: xxspltw v3, v2, 1 |
| ; BE64-NEXT: vadduwm v2, v2, v3 |
| ; BE64-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; BE64-NEXT: mffprwz r3, f0 |
| ; BE64-NEXT: extsw r3, r3 |
| ; BE64-NEXT: blr |
| ; |
| ; BE32-LABEL: tsb: |
| ; BE32: # %bb.0: # %entry |
| ; BE32-NEXT: lxvw4x v3, 0, r3 |
| ; BE32-NEXT: vspltisb v2, 1 |
| ; BE32-NEXT: xxlxor v4, v4, v4 |
| ; BE32-NEXT: addi r3, r1, -16 |
| ; BE32-NEXT: vmsummbm v2, v3, v2, v4 |
| ; BE32-NEXT: xxswapd v3, v2 |
| ; BE32-NEXT: vadduwm v2, v2, v3 |
| ; BE32-NEXT: xxspltw v3, v2, 1 |
| ; BE32-NEXT: vadduwm v2, v2, v3 |
| ; BE32-NEXT: stxvw4x v2, 0, r3 |
| ; BE32-NEXT: lwz r3, -16(r1) |
| ; BE32-NEXT: blr |
| entry: |
| %0 = load <16 x i8>, ptr %a, align 2 |
| %conv = sext <16 x i8> %0 to <16 x i32> |
| %pred = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v16i32(<4 x i32> <i32 0, i32 0, i32 0, i32 0>, <16 x i32> %conv) |
| %red = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %pred) |
| ret i32 %red |
| } |
| |
| ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) uwtable |
| define dso_local signext range(i32 -262144, 262137) i32 @tsb2(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b) local_unnamed_addr { |
| ; CHECK-LABEL: tsb2: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: lxvd2x vs0, 0, r3 |
| ; CHECK-NEXT: xxlxor v4, v4, v4 |
| ; CHECK-NEXT: xxswapd v2, vs0 |
| ; CHECK-NEXT: lxvd2x vs0, 0, r4 |
| ; CHECK-NEXT: xxswapd v3, vs0 |
| ; CHECK-NEXT: vmsummbm v2, v2, v3, v4 |
| ; CHECK-NEXT: xxswapd v3, v2 |
| ; CHECK-NEXT: vadduwm v2, v2, v3 |
| ; CHECK-NEXT: xxspltw v3, v2, 2 |
| ; CHECK-NEXT: vadduwm v2, v2, v3 |
| ; CHECK-NEXT: xxswapd vs0, v2 |
| ; CHECK-NEXT: mffprwz r3, f0 |
| ; CHECK-NEXT: extsw r3, r3 |
| ; CHECK-NEXT: blr |
| ; |
| ; BE64-LABEL: tsb2: |
| ; BE64: # %bb.0: # %entry |
| ; BE64-NEXT: lxvw4x v2, 0, r3 |
| ; BE64-NEXT: lxvw4x v3, 0, r4 |
| ; BE64-NEXT: xxlxor v4, v4, v4 |
| ; BE64-NEXT: vmsummbm v2, v2, v3, v4 |
| ; BE64-NEXT: xxswapd v3, v2 |
| ; BE64-NEXT: vadduwm v2, v2, v3 |
| ; BE64-NEXT: xxspltw v3, v2, 1 |
| ; BE64-NEXT: vadduwm v2, v2, v3 |
| ; BE64-NEXT: xxsldwi vs0, v2, v2, 3 |
| ; BE64-NEXT: mffprwz r3, f0 |
| ; BE64-NEXT: extsw r3, r3 |
| ; BE64-NEXT: blr |
| ; |
| ; BE32-LABEL: tsb2: |
| ; BE32: # %bb.0: # %entry |
| ; BE32-NEXT: lxvw4x v2, 0, r3 |
| ; BE32-NEXT: lxvw4x v3, 0, r4 |
| ; BE32-NEXT: xxlxor v4, v4, v4 |
| ; BE32-NEXT: addi r3, r1, -16 |
| ; BE32-NEXT: vmsummbm v2, v2, v3, v4 |
| ; BE32-NEXT: xxswapd v3, v2 |
| ; BE32-NEXT: vadduwm v2, v2, v3 |
| ; BE32-NEXT: xxspltw v3, v2, 1 |
| ; BE32-NEXT: vadduwm v2, v2, v3 |
| ; BE32-NEXT: stxvw4x v2, 0, r3 |
| ; BE32-NEXT: lwz r3, -16(r1) |
| ; BE32-NEXT: blr |
| entry: |
| %0 = load <16 x i8>, ptr %a, align 2 |
| %conv = sext <16 x i8> %0 to <16 x i32> |
| %lb = load <16 x i8>, ptr %b, align 2 |
| %conv2 = zext <16 x i8> %lb to <16 x i32> |
| %mul = mul <16 x i32> %conv, %conv2 |
| %pred = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v16i32(<4 x i32> <i32 0, i32 0, i32 0, i32 0>, <16 x i32> %mul) |
| %red = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %pred) |
| ret i32 %red |
| } |