| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-PWR9,CHECK-PWR9-LE |
| ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr9 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-PWR9,CHECK-PWR9-BE |
| ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-PWR78,CHECK-PWR8 -implicit-check-not vabsdu |
| ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,CHECK-PWR78,CHECK-PWR7 -implicit-check-not vmaxsd |
| |
| define <4 x i32> @simple_absv_32(<4 x i32> %a) local_unnamed_addr { |
| ; CHECK-PWR9-LABEL: simple_absv_32: |
| ; CHECK-PWR9: # %bb.0: # %entry |
| ; CHECK-PWR9-NEXT: vnegw v3, v2 |
| ; CHECK-PWR9-NEXT: vmaxsw v2, v2, v3 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: simple_absv_32: |
| ; CHECK-PWR78: # %bb.0: # %entry |
| ; CHECK-PWR78-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-PWR78-NEXT: vsubuwm v3, v3, v2 |
| ; CHECK-PWR78-NEXT: vmaxsw v2, v2, v3 |
| ; CHECK-PWR78-NEXT: blr |
| entry: |
| %sub.i = sub <4 x i32> zeroinitializer, %a |
| %0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %a, <4 x i32> %sub.i) |
| ret <4 x i32> %0 |
| } |
| |
| define <4 x i32> @simple_absv_32_swap(<4 x i32> %a) local_unnamed_addr { |
| ; CHECK-PWR9-LABEL: simple_absv_32_swap: |
| ; CHECK-PWR9: # %bb.0: # %entry |
| ; CHECK-PWR9-NEXT: vnegw v3, v2 |
| ; CHECK-PWR9-NEXT: vmaxsw v2, v2, v3 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: simple_absv_32_swap: |
| ; CHECK-PWR78: # %bb.0: # %entry |
| ; CHECK-PWR78-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-PWR78-NEXT: vsubuwm v3, v3, v2 |
| ; CHECK-PWR78-NEXT: vmaxsw v2, v3, v2 |
| ; CHECK-PWR78-NEXT: blr |
| entry: |
| %sub.i = sub <4 x i32> zeroinitializer, %a |
| %0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %sub.i, <4 x i32> %a) |
| ret <4 x i32> %0 |
| } |
| |
| define <8 x i16> @simple_absv_16(<8 x i16> %a) local_unnamed_addr { |
| ; CHECK-LABEL: simple_absv_16: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-NEXT: vsubuhm v3, v3, v2 |
| ; CHECK-NEXT: vmaxsh v2, v2, v3 |
| ; CHECK-NEXT: blr |
| entry: |
| %sub.i = sub <8 x i16> zeroinitializer, %a |
| %0 = tail call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %a, <8 x i16> %sub.i) |
| ret <8 x i16> %0 |
| } |
| |
| define <16 x i8> @simple_absv_8(<16 x i8> %a) local_unnamed_addr { |
| ; CHECK-LABEL: simple_absv_8: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-NEXT: vsububm v3, v3, v2 |
| ; CHECK-NEXT: vmaxsb v2, v2, v3 |
| ; CHECK-NEXT: blr |
| entry: |
| %sub.i = sub <16 x i8> zeroinitializer, %a |
| %0 = tail call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %a, <16 x i8> %sub.i) |
| ret <16 x i8> %0 |
| } |
| |
| ; v2i64 vmax isn't avaiable on pwr7 |
| define <2 x i64> @sub_absv_64(<2 x i64> %a, <2 x i64> %b) local_unnamed_addr { |
| ; CHECK-PWR9-LABEL: sub_absv_64: |
| ; CHECK-PWR9: # %bb.0: # %entry |
| ; CHECK-PWR9-NEXT: vsubudm v2, v2, v3 |
| ; CHECK-PWR9-NEXT: vnegd v3, v2 |
| ; CHECK-PWR9-NEXT: vmaxsd v2, v2, v3 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR8-LABEL: sub_absv_64: |
| ; CHECK-PWR8: # %bb.0: # %entry |
| ; CHECK-PWR8-NEXT: vsubudm v2, v2, v3 |
| ; CHECK-PWR8-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-PWR8-NEXT: vsubudm v3, v3, v2 |
| ; CHECK-PWR8-NEXT: vmaxsd v2, v2, v3 |
| ; CHECK-PWR8-NEXT: blr |
| ; |
| ; CHECK-PWR7-LABEL: sub_absv_64: |
| ; CHECK-PWR7: # %bb.0: # %entry |
| ; CHECK-PWR7-NEXT: addi r3, r1, -48 |
| ; CHECK-PWR7-NEXT: stxvd2x v2, 0, r3 |
| ; CHECK-PWR7-NEXT: addi r3, r1, -32 |
| ; CHECK-PWR7-NEXT: stxvd2x v3, 0, r3 |
| ; CHECK-PWR7-NEXT: ld r4, -40(r1) |
| ; CHECK-PWR7-NEXT: ld r5, -24(r1) |
| ; CHECK-PWR7-NEXT: ld r3, -48(r1) |
| ; CHECK-PWR7-NEXT: sub r4, r4, r5 |
| ; CHECK-PWR7-NEXT: sradi r5, r4, 63 |
| ; CHECK-PWR7-NEXT: xor r4, r4, r5 |
| ; CHECK-PWR7-NEXT: sub r4, r4, r5 |
| ; CHECK-PWR7-NEXT: ld r5, -32(r1) |
| ; CHECK-PWR7-NEXT: std r4, -8(r1) |
| ; CHECK-PWR7-NEXT: sub r3, r3, r5 |
| ; CHECK-PWR7-NEXT: sradi r4, r3, 63 |
| ; CHECK-PWR7-NEXT: xor r3, r3, r4 |
| ; CHECK-PWR7-NEXT: sub r3, r3, r4 |
| ; CHECK-PWR7-NEXT: std r3, -16(r1) |
| ; CHECK-PWR7-NEXT: addi r3, r1, -16 |
| ; CHECK-PWR7-NEXT: lxvd2x v2, 0, r3 |
| ; CHECK-PWR7-NEXT: blr |
| entry: |
| %0 = sub nsw <2 x i64> %a, %b |
| %1 = icmp sgt <2 x i64> %0, <i64 -1, i64 -1> |
| %2 = sub <2 x i64> zeroinitializer, %0 |
| %3 = select <2 x i1> %1, <2 x i64> %0, <2 x i64> %2 |
| ret <2 x i64> %3 |
| } |
| |
| ; The select pattern can only be detected for v4i32. |
| define <4 x i32> @sub_absv_32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr { |
| ; CHECK-PWR9-LABEL: sub_absv_32: |
| ; CHECK-PWR9: # %bb.0: # %entry |
| ; CHECK-PWR9-NEXT: xvnegsp v3, v3 |
| ; CHECK-PWR9-NEXT: xvnegsp v2, v2 |
| ; CHECK-PWR9-NEXT: vabsduw v2, v2, v3 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: sub_absv_32: |
| ; CHECK-PWR78: # %bb.0: # %entry |
| ; CHECK-PWR78-NEXT: vsubuwm v2, v2, v3 |
| ; CHECK-PWR78-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-PWR78-NEXT: vsubuwm v3, v3, v2 |
| ; CHECK-PWR78-NEXT: vmaxsw v2, v2, v3 |
| ; CHECK-PWR78-NEXT: blr |
| entry: |
| %0 = sub nsw <4 x i32> %a, %b |
| %1 = icmp sgt <4 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1> |
| %2 = sub <4 x i32> zeroinitializer, %0 |
| %3 = select <4 x i1> %1, <4 x i32> %0, <4 x i32> %2 |
| ret <4 x i32> %3 |
| } |
| |
| define <8 x i16> @sub_absv_16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr { |
| ; CHECK-LABEL: sub_absv_16: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: vsubuhm v2, v2, v3 |
| ; CHECK-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-NEXT: vsubuhm v3, v3, v2 |
| ; CHECK-NEXT: vmaxsh v2, v2, v3 |
| ; CHECK-NEXT: blr |
| entry: |
| %0 = sub nsw <8 x i16> %a, %b |
| %1 = icmp sgt <8 x i16> %0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> |
| %2 = sub <8 x i16> zeroinitializer, %0 |
| %3 = select <8 x i1> %1, <8 x i16> %0, <8 x i16> %2 |
| ret <8 x i16> %3 |
| } |
| |
| define <16 x i8> @sub_absv_8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr { |
| ; CHECK-LABEL: sub_absv_8: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: vsububm v2, v2, v3 |
| ; CHECK-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-NEXT: vsububm v3, v3, v2 |
| ; CHECK-NEXT: vmaxsb v2, v2, v3 |
| ; CHECK-NEXT: blr |
| entry: |
| %0 = sub nsw <16 x i8> %a, %b |
| %1 = icmp sgt <16 x i8> %0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> |
| %2 = sub <16 x i8> zeroinitializer, %0 |
| %3 = select <16 x i1> %1, <16 x i8> %0, <16 x i8> %2 |
| ret <16 x i8> %3 |
| } |
| |
| define <8 x i16> @sub_absv_16_ext(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr { |
| ; CHECK-LABEL: sub_absv_16_ext: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: vminsh v4, v2, v3 |
| ; CHECK-NEXT: vmaxsh v2, v2, v3 |
| ; CHECK-NEXT: vsubuhm v2, v2, v4 |
| ; CHECK-NEXT: blr |
| entry: |
| %0 = sext <8 x i16> %a to <8 x i32> |
| %1 = sext <8 x i16> %b to <8 x i32> |
| %2 = sub nsw <8 x i32> %0, %1 |
| %3 = icmp sgt <8 x i32> %2, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> |
| %4 = sub nsw <8 x i32> zeroinitializer, %2 |
| %5 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> %4 |
| %6 = trunc <8 x i32> %5 to <8 x i16> |
| ret <8 x i16> %6 |
| } |
| |
| define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr { |
| ; CHECK-PWR9-LABEL: sub_absv_8_ext: |
| ; CHECK-PWR9: # %bb.0: # %entry |
| ; CHECK-PWR9-NEXT: vabsdub v2, v2, v3 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: sub_absv_8_ext: |
| ; CHECK-PWR78: # %bb.0: # %entry |
| ; CHECK-PWR78-NEXT: vminub v4, v2, v3 |
| ; CHECK-PWR78-NEXT: vmaxub v2, v2, v3 |
| ; CHECK-PWR78-NEXT: vsububm v2, v2, v4 |
| ; CHECK-PWR78-NEXT: blr |
| entry: |
| %0 = zext <16 x i8> %a to <16 x i32> |
| %1 = zext <16 x i8> %b to <16 x i32> |
| %2 = sub nsw <16 x i32> %0, %1 |
| %3 = tail call <16 x i32> @llvm.abs.v16i32(<16 x i32> %2, i1 true) |
| %4 = trunc <16 x i32> %3 to <16 x i8> |
| ret <16 x i8> %4 |
| } |
| |
| define <4 x i32> @sub_absv_vec_32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr { |
| ; CHECK-PWR9-LABEL: sub_absv_vec_32: |
| ; CHECK-PWR9: # %bb.0: # %entry |
| ; CHECK-PWR9-NEXT: xvnegsp v3, v3 |
| ; CHECK-PWR9-NEXT: xvnegsp v2, v2 |
| ; CHECK-PWR9-NEXT: vabsduw v2, v2, v3 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: sub_absv_vec_32: |
| ; CHECK-PWR78: # %bb.0: # %entry |
| ; CHECK-PWR78-NEXT: vsubuwm v2, v2, v3 |
| ; CHECK-PWR78-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-PWR78-NEXT: vsubuwm v3, v3, v2 |
| ; CHECK-PWR78-NEXT: vmaxsw v2, v2, v3 |
| ; CHECK-PWR78-NEXT: blr |
| entry: |
| %sub = sub nsw <4 x i32> %a, %b |
| %sub.i = sub <4 x i32> zeroinitializer, %sub |
| %0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %sub, <4 x i32> %sub.i) |
| ret <4 x i32> %0 |
| } |
| |
| define <8 x i16> @sub_absv_vec_16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr { |
| ; CHECK-LABEL: sub_absv_vec_16: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: vsubuhm v2, v2, v3 |
| ; CHECK-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-NEXT: vsubuhm v3, v3, v2 |
| ; CHECK-NEXT: vmaxsh v2, v2, v3 |
| ; CHECK-NEXT: blr |
| entry: |
| %sub = sub nsw <8 x i16> %a, %b |
| %sub.i = sub <8 x i16> zeroinitializer, %sub |
| %0 = tail call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %sub, <8 x i16> %sub.i) |
| ret <8 x i16> %0 |
| } |
| |
| define <16 x i8> @sub_absv_vec_8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr { |
| ; CHECK-LABEL: sub_absv_vec_8: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: vsububm v2, v2, v3 |
| ; CHECK-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-NEXT: vsububm v3, v3, v2 |
| ; CHECK-NEXT: vmaxsb v2, v2, v3 |
| ; CHECK-NEXT: blr |
| entry: |
| %sub = sub nsw <16 x i8> %a, %b |
| %sub.i = sub <16 x i8> zeroinitializer, %sub |
| %0 = tail call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %sub, <16 x i8> %sub.i) |
| ret <16 x i8> %0 |
| } |
| |
| define <4 x i32> @zext_sub_absd32(<4 x i16>, <4 x i16>) local_unnamed_addr { |
| ; CHECK-PWR9-LE-LABEL: zext_sub_absd32: |
| ; CHECK-PWR9-LE: # %bb.0: |
| ; CHECK-PWR9-LE-NEXT: vabsduh v2, v2, v3 |
| ; CHECK-PWR9-LE-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-PWR9-LE-NEXT: vmrglh v2, v3, v2 |
| ; CHECK-PWR9-LE-NEXT: blr |
| ; |
| ; CHECK-PWR9-BE-LABEL: zext_sub_absd32: |
| ; CHECK-PWR9-BE: # %bb.0: |
| ; CHECK-PWR9-BE-NEXT: vabsduh v2, v2, v3 |
| ; CHECK-PWR9-BE-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-PWR9-BE-NEXT: vmrghh v2, v3, v2 |
| ; CHECK-PWR9-BE-NEXT: blr |
| ; |
| ; CHECK-PWR8-LABEL: zext_sub_absd32: |
| ; CHECK-PWR8: # %bb.0: |
| ; CHECK-PWR8-NEXT: xxlxor v4, v4, v4 |
| ; CHECK-PWR8-NEXT: vmrglh v2, v4, v2 |
| ; CHECK-PWR8-NEXT: vmrglh v3, v4, v3 |
| ; CHECK-PWR8-NEXT: vsubuwm v2, v2, v3 |
| ; CHECK-PWR8-NEXT: vsubuwm v3, v4, v2 |
| ; CHECK-PWR8-NEXT: vmaxsw v2, v2, v3 |
| ; CHECK-PWR8-NEXT: blr |
| ; |
| ; CHECK-PWR7-LABEL: zext_sub_absd32: |
| ; CHECK-PWR7: # %bb.0: |
| ; CHECK-PWR7-NEXT: addis r3, r2, .LCPI13_0@toc@ha |
| ; CHECK-PWR7-NEXT: xxlxor v5, v5, v5 |
| ; CHECK-PWR7-NEXT: addi r3, r3, .LCPI13_0@toc@l |
| ; CHECK-PWR7-NEXT: lxvw4x v4, 0, r3 |
| ; CHECK-PWR7-NEXT: vperm v2, v5, v2, v4 |
| ; CHECK-PWR7-NEXT: vperm v3, v5, v3, v4 |
| ; CHECK-PWR7-NEXT: vsubuwm v2, v2, v3 |
| ; CHECK-PWR7-NEXT: vsubuwm v3, v5, v2 |
| ; CHECK-PWR7-NEXT: vmaxsw v2, v2, v3 |
| ; CHECK-PWR7-NEXT: blr |
| %3 = zext <4 x i16> %0 to <4 x i32> |
| %4 = zext <4 x i16> %1 to <4 x i32> |
| %5 = sub <4 x i32> %3, %4 |
| %6 = sub <4 x i32> zeroinitializer, %5 |
| %7 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %5, <4 x i32> %6) |
| ret <4 x i32> %7 |
| } |
| |
| define <8 x i16> @zext_sub_absd16(<8 x i8>, <8 x i8>) local_unnamed_addr { |
| ; CHECK-PWR9-LE-LABEL: zext_sub_absd16: |
| ; CHECK-PWR9-LE: # %bb.0: |
| ; CHECK-PWR9-LE-NEXT: vabsdub v2, v2, v3 |
| ; CHECK-PWR9-LE-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-PWR9-LE-NEXT: vmrglb v2, v3, v2 |
| ; CHECK-PWR9-LE-NEXT: blr |
| ; |
| ; CHECK-PWR9-BE-LABEL: zext_sub_absd16: |
| ; CHECK-PWR9-BE: # %bb.0: |
| ; CHECK-PWR9-BE-NEXT: vabsdub v2, v2, v3 |
| ; CHECK-PWR9-BE-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-PWR9-BE-NEXT: vmrghb v2, v3, v2 |
| ; CHECK-PWR9-BE-NEXT: blr |
| ; |
| ; CHECK-PWR8-LABEL: zext_sub_absd16: |
| ; CHECK-PWR8: # %bb.0: |
| ; CHECK-PWR8-NEXT: xxlxor v4, v4, v4 |
| ; CHECK-PWR8-NEXT: vmrglb v2, v4, v2 |
| ; CHECK-PWR8-NEXT: vmrglb v3, v4, v3 |
| ; CHECK-PWR8-NEXT: vsubuhm v2, v2, v3 |
| ; CHECK-PWR8-NEXT: vsubuhm v3, v4, v2 |
| ; CHECK-PWR8-NEXT: vmaxsh v2, v2, v3 |
| ; CHECK-PWR8-NEXT: blr |
| ; |
| ; CHECK-PWR7-LABEL: zext_sub_absd16: |
| ; CHECK-PWR7: # %bb.0: |
| ; CHECK-PWR7-NEXT: addis r3, r2, .LCPI14_0@toc@ha |
| ; CHECK-PWR7-NEXT: xxlxor v5, v5, v5 |
| ; CHECK-PWR7-NEXT: addi r3, r3, .LCPI14_0@toc@l |
| ; CHECK-PWR7-NEXT: lxvw4x v4, 0, r3 |
| ; CHECK-PWR7-NEXT: vperm v2, v5, v2, v4 |
| ; CHECK-PWR7-NEXT: vperm v3, v5, v3, v4 |
| ; CHECK-PWR7-NEXT: vsubuhm v2, v2, v3 |
| ; CHECK-PWR7-NEXT: vsubuhm v3, v5, v2 |
| ; CHECK-PWR7-NEXT: vmaxsh v2, v2, v3 |
| ; CHECK-PWR7-NEXT: blr |
| %3 = zext <8 x i8> %0 to <8 x i16> |
| %4 = zext <8 x i8> %1 to <8 x i16> |
| %5 = sub <8 x i16> %3, %4 |
| %6 = sub <8 x i16> zeroinitializer, %5 |
| %7 = tail call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %5, <8 x i16> %6) |
| ret <8 x i16> %7 |
| } |
| |
| define <16 x i8> @zext_sub_absd8(<16 x i4>, <16 x i4>) local_unnamed_addr { |
| ; CHECK-PWR9-LABEL: zext_sub_absd8: |
| ; CHECK-PWR9: # %bb.0: |
| ; CHECK-PWR9-NEXT: xxspltib vs0, 15 |
| ; CHECK-PWR9-NEXT: xxland v3, v3, vs0 |
| ; CHECK-PWR9-NEXT: xxland v2, v2, vs0 |
| ; CHECK-PWR9-NEXT: vabsdub v2, v2, v3 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: zext_sub_absd8: |
| ; CHECK-PWR78: # %bb.0: |
| ; CHECK-PWR78-NEXT: vspltisb v4, 15 |
| ; CHECK-PWR78-NEXT: xxland v2, v2, v4 |
| ; CHECK-PWR78-NEXT: xxland v3, v3, v4 |
| ; CHECK-PWR78-NEXT: vsububm v2, v2, v3 |
| ; CHECK-PWR78-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-PWR78-NEXT: vsububm v3, v3, v2 |
| ; CHECK-PWR78-NEXT: vmaxsb v2, v2, v3 |
| ; CHECK-PWR78-NEXT: blr |
| %3 = zext <16 x i4> %0 to <16 x i8> |
| %4 = zext <16 x i4> %1 to <16 x i8> |
| %5 = sub <16 x i8> %3, %4 |
| %6 = sub <16 x i8> zeroinitializer, %5 |
| %7 = tail call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %5, <16 x i8> %6) |
| ret <16 x i8> %7 |
| } |
| |
| define <4 x i32> @sext_sub_absd32(<4 x i16>, <4 x i16>) local_unnamed_addr { |
| ; CHECK-PWR9-LE-LABEL: sext_sub_absd32: |
| ; CHECK-PWR9-LE: # %bb.0: |
| ; CHECK-PWR9-LE-NEXT: vminsh v4, v2, v3 |
| ; CHECK-PWR9-LE-NEXT: vmaxsh v2, v2, v3 |
| ; CHECK-PWR9-LE-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-PWR9-LE-NEXT: vsubuhm v2, v2, v4 |
| ; CHECK-PWR9-LE-NEXT: vmrglh v2, v3, v2 |
| ; CHECK-PWR9-LE-NEXT: blr |
| ; |
| ; CHECK-PWR9-BE-LABEL: sext_sub_absd32: |
| ; CHECK-PWR9-BE: # %bb.0: |
| ; CHECK-PWR9-BE-NEXT: vminsh v4, v2, v3 |
| ; CHECK-PWR9-BE-NEXT: vmaxsh v2, v2, v3 |
| ; CHECK-PWR9-BE-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-PWR9-BE-NEXT: vsubuhm v2, v2, v4 |
| ; CHECK-PWR9-BE-NEXT: vmrghh v2, v3, v2 |
| ; CHECK-PWR9-BE-NEXT: blr |
| ; |
| ; CHECK-PWR8-LABEL: sext_sub_absd32: |
| ; CHECK-PWR8: # %bb.0: |
| ; CHECK-PWR8-NEXT: vspltisw v4, 8 |
| ; CHECK-PWR8-NEXT: vmrglh v2, v2, v2 |
| ; CHECK-PWR8-NEXT: vadduwm v4, v4, v4 |
| ; CHECK-PWR8-NEXT: vmrglh v3, v3, v3 |
| ; CHECK-PWR8-NEXT: vslw v2, v2, v4 |
| ; CHECK-PWR8-NEXT: vslw v3, v3, v4 |
| ; CHECK-PWR8-NEXT: vsraw v2, v2, v4 |
| ; CHECK-PWR8-NEXT: vsraw v3, v3, v4 |
| ; CHECK-PWR8-NEXT: vsubuwm v2, v2, v3 |
| ; CHECK-PWR8-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-PWR8-NEXT: vsubuwm v3, v3, v2 |
| ; CHECK-PWR8-NEXT: vmaxsw v2, v2, v3 |
| ; CHECK-PWR8-NEXT: blr |
| ; |
| ; CHECK-PWR7-LABEL: sext_sub_absd32: |
| ; CHECK-PWR7: # %bb.0: |
| ; CHECK-PWR7-NEXT: vspltisw v4, 8 |
| ; CHECK-PWR7-NEXT: vmrghh v2, v2, v2 |
| ; CHECK-PWR7-NEXT: vmrghh v3, v3, v3 |
| ; CHECK-PWR7-NEXT: vadduwm v4, v4, v4 |
| ; CHECK-PWR7-NEXT: vslw v2, v2, v4 |
| ; CHECK-PWR7-NEXT: vslw v3, v3, v4 |
| ; CHECK-PWR7-NEXT: vsraw v2, v2, v4 |
| ; CHECK-PWR7-NEXT: vsraw v3, v3, v4 |
| ; CHECK-PWR7-NEXT: vsubuwm v2, v2, v3 |
| ; CHECK-PWR7-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-PWR7-NEXT: vsubuwm v3, v3, v2 |
| ; CHECK-PWR7-NEXT: vmaxsw v2, v2, v3 |
| ; CHECK-PWR7-NEXT: blr |
| %3 = sext <4 x i16> %0 to <4 x i32> |
| %4 = sext <4 x i16> %1 to <4 x i32> |
| %5 = sub <4 x i32> %3, %4 |
| %6 = sub <4 x i32> zeroinitializer, %5 |
| %7 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %5, <4 x i32> %6) |
| ret <4 x i32> %7 |
| } |
| |
| define <8 x i16> @sext_sub_absd16(<8 x i8>, <8 x i8>) local_unnamed_addr { |
| ; CHECK-PWR9-LE-LABEL: sext_sub_absd16: |
| ; CHECK-PWR9-LE: # %bb.0: |
| ; CHECK-PWR9-LE-NEXT: vminsb v4, v2, v3 |
| ; CHECK-PWR9-LE-NEXT: vmaxsb v2, v2, v3 |
| ; CHECK-PWR9-LE-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-PWR9-LE-NEXT: vsububm v2, v2, v4 |
| ; CHECK-PWR9-LE-NEXT: vmrglb v2, v3, v2 |
| ; CHECK-PWR9-LE-NEXT: blr |
| ; |
| ; CHECK-PWR9-BE-LABEL: sext_sub_absd16: |
| ; CHECK-PWR9-BE: # %bb.0: |
| ; CHECK-PWR9-BE-NEXT: vminsb v4, v2, v3 |
| ; CHECK-PWR9-BE-NEXT: vmaxsb v2, v2, v3 |
| ; CHECK-PWR9-BE-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-PWR9-BE-NEXT: vsububm v2, v2, v4 |
| ; CHECK-PWR9-BE-NEXT: vmrghb v2, v3, v2 |
| ; CHECK-PWR9-BE-NEXT: blr |
| ; |
| ; CHECK-PWR8-LABEL: sext_sub_absd16: |
| ; CHECK-PWR8: # %bb.0: |
| ; CHECK-PWR8-NEXT: vmrglb v2, v2, v2 |
| ; CHECK-PWR8-NEXT: vspltish v4, 8 |
| ; CHECK-PWR8-NEXT: vslh v2, v2, v4 |
| ; CHECK-PWR8-NEXT: vmrglb v3, v3, v3 |
| ; CHECK-PWR8-NEXT: vslh v3, v3, v4 |
| ; CHECK-PWR8-NEXT: vsrah v2, v2, v4 |
| ; CHECK-PWR8-NEXT: vsrah v3, v3, v4 |
| ; CHECK-PWR8-NEXT: vsubuhm v2, v2, v3 |
| ; CHECK-PWR8-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-PWR8-NEXT: vsubuhm v3, v3, v2 |
| ; CHECK-PWR8-NEXT: vmaxsh v2, v2, v3 |
| ; CHECK-PWR8-NEXT: blr |
| ; |
| ; CHECK-PWR7-LABEL: sext_sub_absd16: |
| ; CHECK-PWR7: # %bb.0: |
| ; CHECK-PWR7-NEXT: vspltish v4, 8 |
| ; CHECK-PWR7-NEXT: vmrghb v2, v2, v2 |
| ; CHECK-PWR7-NEXT: vmrghb v3, v3, v3 |
| ; CHECK-PWR7-NEXT: vslh v2, v2, v4 |
| ; CHECK-PWR7-NEXT: vslh v3, v3, v4 |
| ; CHECK-PWR7-NEXT: vsrah v2, v2, v4 |
| ; CHECK-PWR7-NEXT: vsrah v3, v3, v4 |
| ; CHECK-PWR7-NEXT: vsubuhm v2, v2, v3 |
| ; CHECK-PWR7-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-PWR7-NEXT: vsubuhm v3, v3, v2 |
| ; CHECK-PWR7-NEXT: vmaxsh v2, v2, v3 |
| ; CHECK-PWR7-NEXT: blr |
| %3 = sext <8 x i8> %0 to <8 x i16> |
| %4 = sext <8 x i8> %1 to <8 x i16> |
| %5 = sub <8 x i16> %3, %4 |
| %6 = sub <8 x i16> zeroinitializer, %5 |
| %7 = tail call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %5, <8 x i16> %6) |
| ret <8 x i16> %7 |
| } |
| |
| define <16 x i8> @sext_sub_absd8(<16 x i4>, <16 x i4>) local_unnamed_addr { |
| ; CHECK-PWR9-LABEL: sext_sub_absd8: |
| ; CHECK-PWR9: # %bb.0: |
| ; CHECK-PWR9-NEXT: xxspltib v4, 4 |
| ; CHECK-PWR9-NEXT: vslb v3, v3, v4 |
| ; CHECK-PWR9-NEXT: vslb v2, v2, v4 |
| ; CHECK-PWR9-NEXT: vsrab v3, v3, v4 |
| ; CHECK-PWR9-NEXT: vsrab v2, v2, v4 |
| ; CHECK-PWR9-NEXT: vminsb v4, v2, v3 |
| ; CHECK-PWR9-NEXT: vmaxsb v2, v2, v3 |
| ; CHECK-PWR9-NEXT: vsububm v2, v2, v4 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: sext_sub_absd8: |
| ; CHECK-PWR78: # %bb.0: |
| ; CHECK-PWR78-NEXT: vspltisb v4, 4 |
| ; CHECK-PWR78-NEXT: vslb v2, v2, v4 |
| ; CHECK-PWR78-NEXT: vslb v3, v3, v4 |
| ; CHECK-PWR78-NEXT: vsrab v2, v2, v4 |
| ; CHECK-PWR78-NEXT: vsrab v3, v3, v4 |
| ; CHECK-PWR78-NEXT: vsububm v2, v2, v3 |
| ; CHECK-PWR78-NEXT: xxlxor v3, v3, v3 |
| ; CHECK-PWR78-NEXT: vsububm v3, v3, v2 |
| ; CHECK-PWR78-NEXT: vmaxsb v2, v2, v3 |
| ; CHECK-PWR78-NEXT: blr |
| %3 = sext <16 x i4> %0 to <16 x i8> |
| %4 = sext <16 x i4> %1 to <16 x i8> |
| %5 = sub <16 x i8> %3, %4 |
| %6 = sub <16 x i8> zeroinitializer, %5 |
| %7 = tail call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %5, <16 x i8> %6) |
| ret <16 x i8> %7 |
| } |
| |
| ; To verify vabsdu* exploitation for ucmp + sub + select sequence |
| |
| define <4 x i32> @absd_int32_ugt(<4 x i32>, <4 x i32>) { |
| ; CHECK-PWR9-LABEL: absd_int32_ugt: |
| ; CHECK-PWR9: # %bb.0: |
| ; CHECK-PWR9-NEXT: vabsduw v2, v2, v3 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: absd_int32_ugt: |
| ; CHECK-PWR78: # %bb.0: |
| ; CHECK-PWR78-NEXT: vminuw v4, v2, v3 |
| ; CHECK-PWR78-NEXT: vmaxuw v2, v2, v3 |
| ; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4 |
| ; CHECK-PWR78-NEXT: blr |
| %3 = icmp ugt <4 x i32> %0, %1 |
| %4 = sub <4 x i32> %0, %1 |
| %5 = sub <4 x i32> %1, %0 |
| %6 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %5 |
| ret <4 x i32> %6 |
| } |
| |
| define <4 x i32> @absd_int32_uge(<4 x i32>, <4 x i32>) { |
| ; CHECK-PWR9-LABEL: absd_int32_uge: |
| ; CHECK-PWR9: # %bb.0: |
| ; CHECK-PWR9-NEXT: vabsduw v2, v2, v3 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: absd_int32_uge: |
| ; CHECK-PWR78: # %bb.0: |
| ; CHECK-PWR78-NEXT: vminuw v4, v2, v3 |
| ; CHECK-PWR78-NEXT: vmaxuw v2, v2, v3 |
| ; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4 |
| ; CHECK-PWR78-NEXT: blr |
| %3 = icmp uge <4 x i32> %0, %1 |
| %4 = sub <4 x i32> %0, %1 |
| %5 = sub <4 x i32> %1, %0 |
| %6 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %5 |
| ret <4 x i32> %6 |
| } |
| |
| define <4 x i32> @absd_int32_ult(<4 x i32>, <4 x i32>) { |
| ; CHECK-PWR9-LABEL: absd_int32_ult: |
| ; CHECK-PWR9: # %bb.0: |
| ; CHECK-PWR9-NEXT: vabsduw v2, v2, v3 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: absd_int32_ult: |
| ; CHECK-PWR78: # %bb.0: |
| ; CHECK-PWR78-NEXT: vminuw v4, v2, v3 |
| ; CHECK-PWR78-NEXT: vmaxuw v2, v2, v3 |
| ; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4 |
| ; CHECK-PWR78-NEXT: blr |
| %3 = icmp ult <4 x i32> %0, %1 |
| %4 = sub <4 x i32> %0, %1 |
| %5 = sub <4 x i32> %1, %0 |
| %6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4 |
| ret <4 x i32> %6 |
| } |
| |
| define <4 x i32> @absd_int32_ule(<4 x i32>, <4 x i32>) { |
| ; CHECK-PWR9-LABEL: absd_int32_ule: |
| ; CHECK-PWR9: # %bb.0: |
| ; CHECK-PWR9-NEXT: vabsduw v2, v2, v3 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: absd_int32_ule: |
| ; CHECK-PWR78: # %bb.0: |
| ; CHECK-PWR78-NEXT: vminuw v4, v2, v3 |
| ; CHECK-PWR78-NEXT: vmaxuw v2, v2, v3 |
| ; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4 |
| ; CHECK-PWR78-NEXT: blr |
| %3 = icmp ule <4 x i32> %0, %1 |
| %4 = sub <4 x i32> %0, %1 |
| %5 = sub <4 x i32> %1, %0 |
| %6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4 |
| ret <4 x i32> %6 |
| } |
| |
| define <8 x i16> @absd_int16_ugt(<8 x i16>, <8 x i16>) { |
| ; CHECK-PWR9-LABEL: absd_int16_ugt: |
| ; CHECK-PWR9: # %bb.0: |
| ; CHECK-PWR9-NEXT: vabsduh v2, v2, v3 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: absd_int16_ugt: |
| ; CHECK-PWR78: # %bb.0: |
| ; CHECK-PWR78-NEXT: vminuh v4, v2, v3 |
| ; CHECK-PWR78-NEXT: vmaxuh v2, v2, v3 |
| ; CHECK-PWR78-NEXT: vsubuhm v2, v2, v4 |
| ; CHECK-PWR78-NEXT: blr |
| %3 = icmp ugt <8 x i16> %0, %1 |
| %4 = sub <8 x i16> %0, %1 |
| %5 = sub <8 x i16> %1, %0 |
| %6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5 |
| ret <8 x i16> %6 |
| } |
| |
| define <8 x i16> @absd_int16_uge(<8 x i16>, <8 x i16>) { |
| ; CHECK-PWR9-LABEL: absd_int16_uge: |
| ; CHECK-PWR9: # %bb.0: |
| ; CHECK-PWR9-NEXT: vabsduh v2, v2, v3 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: absd_int16_uge: |
| ; CHECK-PWR78: # %bb.0: |
| ; CHECK-PWR78-NEXT: vminuh v4, v2, v3 |
| ; CHECK-PWR78-NEXT: vmaxuh v2, v2, v3 |
| ; CHECK-PWR78-NEXT: vsubuhm v2, v2, v4 |
| ; CHECK-PWR78-NEXT: blr |
| %3 = icmp uge <8 x i16> %0, %1 |
| %4 = sub <8 x i16> %0, %1 |
| %5 = sub <8 x i16> %1, %0 |
| %6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5 |
| ret <8 x i16> %6 |
| } |
| |
| define <8 x i16> @absd_int16_ult(<8 x i16>, <8 x i16>) { |
| ; CHECK-PWR9-LABEL: absd_int16_ult: |
| ; CHECK-PWR9: # %bb.0: |
| ; CHECK-PWR9-NEXT: vabsduh v2, v2, v3 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: absd_int16_ult: |
| ; CHECK-PWR78: # %bb.0: |
| ; CHECK-PWR78-NEXT: vminuh v4, v2, v3 |
| ; CHECK-PWR78-NEXT: vmaxuh v2, v2, v3 |
| ; CHECK-PWR78-NEXT: vsubuhm v2, v2, v4 |
| ; CHECK-PWR78-NEXT: blr |
| %3 = icmp ult <8 x i16> %0, %1 |
| %4 = sub <8 x i16> %0, %1 |
| %5 = sub <8 x i16> %1, %0 |
| %6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4 |
| ret <8 x i16> %6 |
| } |
| |
| define <8 x i16> @absd_int16_ule(<8 x i16>, <8 x i16>) { |
| ; CHECK-PWR9-LABEL: absd_int16_ule: |
| ; CHECK-PWR9: # %bb.0: |
| ; CHECK-PWR9-NEXT: vabsduh v2, v2, v3 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: absd_int16_ule: |
| ; CHECK-PWR78: # %bb.0: |
| ; CHECK-PWR78-NEXT: vminuh v4, v2, v3 |
| ; CHECK-PWR78-NEXT: vmaxuh v2, v2, v3 |
| ; CHECK-PWR78-NEXT: vsubuhm v2, v2, v4 |
| ; CHECK-PWR78-NEXT: blr |
| %3 = icmp ule <8 x i16> %0, %1 |
| %4 = sub <8 x i16> %0, %1 |
| %5 = sub <8 x i16> %1, %0 |
| %6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4 |
| ret <8 x i16> %6 |
| } |
| |
| define <16 x i8> @absd_int8_ugt(<16 x i8>, <16 x i8>) { |
| ; CHECK-PWR9-LABEL: absd_int8_ugt: |
| ; CHECK-PWR9: # %bb.0: |
| ; CHECK-PWR9-NEXT: vabsdub v2, v2, v3 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: absd_int8_ugt: |
| ; CHECK-PWR78: # %bb.0: |
| ; CHECK-PWR78-NEXT: vminub v4, v2, v3 |
| ; CHECK-PWR78-NEXT: vmaxub v2, v2, v3 |
| ; CHECK-PWR78-NEXT: vsububm v2, v2, v4 |
| ; CHECK-PWR78-NEXT: blr |
| %3 = icmp ugt <16 x i8> %0, %1 |
| %4 = sub <16 x i8> %0, %1 |
| %5 = sub <16 x i8> %1, %0 |
| %6 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %5 |
| ret <16 x i8> %6 |
| } |
| |
| define <16 x i8> @absd_int8_uge(<16 x i8>, <16 x i8>) { |
| ; CHECK-PWR9-LABEL: absd_int8_uge: |
| ; CHECK-PWR9: # %bb.0: |
| ; CHECK-PWR9-NEXT: vabsdub v2, v2, v3 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: absd_int8_uge: |
| ; CHECK-PWR78: # %bb.0: |
| ; CHECK-PWR78-NEXT: vminub v4, v2, v3 |
| ; CHECK-PWR78-NEXT: vmaxub v2, v2, v3 |
| ; CHECK-PWR78-NEXT: vsububm v2, v2, v4 |
| ; CHECK-PWR78-NEXT: blr |
| %3 = icmp uge <16 x i8> %0, %1 |
| %4 = sub <16 x i8> %0, %1 |
| %5 = sub <16 x i8> %1, %0 |
| %6 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %5 |
| ret <16 x i8> %6 |
| } |
| |
| define <16 x i8> @absd_int8_ult(<16 x i8>, <16 x i8>) { |
| ; CHECK-PWR9-LABEL: absd_int8_ult: |
| ; CHECK-PWR9: # %bb.0: |
| ; CHECK-PWR9-NEXT: vabsdub v2, v2, v3 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: absd_int8_ult: |
| ; CHECK-PWR78: # %bb.0: |
| ; CHECK-PWR78-NEXT: vminub v4, v2, v3 |
| ; CHECK-PWR78-NEXT: vmaxub v2, v2, v3 |
| ; CHECK-PWR78-NEXT: vsububm v2, v2, v4 |
| ; CHECK-PWR78-NEXT: blr |
| %3 = icmp ult <16 x i8> %0, %1 |
| %4 = sub <16 x i8> %0, %1 |
| %5 = sub <16 x i8> %1, %0 |
| %6 = select <16 x i1> %3, <16 x i8> %5, <16 x i8> %4 |
| ret <16 x i8> %6 |
| } |
| |
| define <16 x i8> @absd_int8_ule(<16 x i8>, <16 x i8>) { |
| ; CHECK-PWR9-LABEL: absd_int8_ule: |
| ; CHECK-PWR9: # %bb.0: |
| ; CHECK-PWR9-NEXT: vabsdub v2, v2, v3 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: absd_int8_ule: |
| ; CHECK-PWR78: # %bb.0: |
| ; CHECK-PWR78-NEXT: vminub v4, v2, v3 |
| ; CHECK-PWR78-NEXT: vmaxub v2, v2, v3 |
| ; CHECK-PWR78-NEXT: vsububm v2, v2, v4 |
| ; CHECK-PWR78-NEXT: blr |
| %3 = icmp ule <16 x i8> %0, %1 |
| %4 = sub <16 x i8> %0, %1 |
| %5 = sub <16 x i8> %1, %0 |
| %6 = select <16 x i1> %3, <16 x i8> %5, <16 x i8> %4 |
| ret <16 x i8> %6 |
| } |
| |
| ; Tests for ABDS icmp + sub + select sequence |
| |
| define <4 x i32> @absd_int32_sgt(<4 x i32>, <4 x i32>) { |
| ; CHECK-PWR9-LABEL: absd_int32_sgt: |
| ; CHECK-PWR9: # %bb.0: |
| ; CHECK-PWR9-NEXT: xvnegsp v3, v3 |
| ; CHECK-PWR9-NEXT: xvnegsp v2, v2 |
| ; CHECK-PWR9-NEXT: vabsduw v2, v2, v3 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: absd_int32_sgt: |
| ; CHECK-PWR78: # %bb.0: |
| ; CHECK-PWR78-NEXT: vminsw v4, v2, v3 |
| ; CHECK-PWR78-NEXT: vmaxsw v2, v2, v3 |
| ; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4 |
| ; CHECK-PWR78-NEXT: blr |
| %3 = icmp sgt <4 x i32> %0, %1 |
| %4 = sub <4 x i32> %0, %1 |
| %5 = sub <4 x i32> %1, %0 |
| %6 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %5 |
| ret <4 x i32> %6 |
| } |
| |
| define <4 x i32> @absd_int32_sge(<4 x i32>, <4 x i32>) { |
| ; CHECK-PWR9-LABEL: absd_int32_sge: |
| ; CHECK-PWR9: # %bb.0: |
| ; CHECK-PWR9-NEXT: xvnegsp v3, v3 |
| ; CHECK-PWR9-NEXT: xvnegsp v2, v2 |
| ; CHECK-PWR9-NEXT: vabsduw v2, v2, v3 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: absd_int32_sge: |
| ; CHECK-PWR78: # %bb.0: |
| ; CHECK-PWR78-NEXT: vminsw v4, v2, v3 |
| ; CHECK-PWR78-NEXT: vmaxsw v2, v2, v3 |
| ; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4 |
| ; CHECK-PWR78-NEXT: blr |
| %3 = icmp sge <4 x i32> %0, %1 |
| %4 = sub <4 x i32> %0, %1 |
| %5 = sub <4 x i32> %1, %0 |
| %6 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %5 |
| ret <4 x i32> %6 |
| } |
| |
| define <4 x i32> @absd_int32_slt(<4 x i32>, <4 x i32>) { |
| ; CHECK-PWR9-LABEL: absd_int32_slt: |
| ; CHECK-PWR9: # %bb.0: |
| ; CHECK-PWR9-NEXT: xvnegsp v3, v3 |
| ; CHECK-PWR9-NEXT: xvnegsp v2, v2 |
| ; CHECK-PWR9-NEXT: vabsduw v2, v2, v3 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: absd_int32_slt: |
| ; CHECK-PWR78: # %bb.0: |
| ; CHECK-PWR78-NEXT: vminsw v4, v2, v3 |
| ; CHECK-PWR78-NEXT: vmaxsw v2, v2, v3 |
| ; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4 |
| ; CHECK-PWR78-NEXT: blr |
| %3 = icmp slt <4 x i32> %0, %1 |
| %4 = sub <4 x i32> %0, %1 |
| %5 = sub <4 x i32> %1, %0 |
| %6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4 |
| ret <4 x i32> %6 |
| } |
| |
| define <4 x i32> @absd_int32_sle(<4 x i32>, <4 x i32>) { |
| ; CHECK-PWR9-LABEL: absd_int32_sle: |
| ; CHECK-PWR9: # %bb.0: |
| ; CHECK-PWR9-NEXT: xvnegsp v3, v3 |
| ; CHECK-PWR9-NEXT: xvnegsp v2, v2 |
| ; CHECK-PWR9-NEXT: vabsduw v2, v2, v3 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: absd_int32_sle: |
| ; CHECK-PWR78: # %bb.0: |
| ; CHECK-PWR78-NEXT: vminsw v4, v2, v3 |
| ; CHECK-PWR78-NEXT: vmaxsw v2, v2, v3 |
| ; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4 |
| ; CHECK-PWR78-NEXT: blr |
| %3 = icmp sle <4 x i32> %0, %1 |
| %4 = sub <4 x i32> %0, %1 |
| %5 = sub <4 x i32> %1, %0 |
| %6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4 |
| ret <4 x i32> %6 |
| } |
| |
| define <8 x i16> @absd_int16_sgt(<8 x i16>, <8 x i16>) { |
| ; CHECK-LABEL: absd_int16_sgt: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vminsh v4, v2, v3 |
| ; CHECK-NEXT: vmaxsh v2, v2, v3 |
| ; CHECK-NEXT: vsubuhm v2, v2, v4 |
| ; CHECK-NEXT: blr |
| %3 = icmp sgt <8 x i16> %0, %1 |
| %4 = sub <8 x i16> %0, %1 |
| %5 = sub <8 x i16> %1, %0 |
| %6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5 |
| ret <8 x i16> %6 |
| } |
| |
| define <8 x i16> @absd_int16_sge(<8 x i16>, <8 x i16>) { |
| ; CHECK-LABEL: absd_int16_sge: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vminsh v4, v2, v3 |
| ; CHECK-NEXT: vmaxsh v2, v2, v3 |
| ; CHECK-NEXT: vsubuhm v2, v2, v4 |
| ; CHECK-NEXT: blr |
| %3 = icmp sge <8 x i16> %0, %1 |
| %4 = sub <8 x i16> %0, %1 |
| %5 = sub <8 x i16> %1, %0 |
| %6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5 |
| ret <8 x i16> %6 |
| } |
| |
| define <8 x i16> @absd_int16_slt(<8 x i16>, <8 x i16>) { |
| ; CHECK-LABEL: absd_int16_slt: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vminsh v4, v2, v3 |
| ; CHECK-NEXT: vmaxsh v2, v2, v3 |
| ; CHECK-NEXT: vsubuhm v2, v2, v4 |
| ; CHECK-NEXT: blr |
| %3 = icmp slt <8 x i16> %0, %1 |
| %4 = sub <8 x i16> %0, %1 |
| %5 = sub <8 x i16> %1, %0 |
| %6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4 |
| ret <8 x i16> %6 |
| } |
| |
| define <8 x i16> @absd_int16_sle(<8 x i16>, <8 x i16>) { |
| ; CHECK-LABEL: absd_int16_sle: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vminsh v4, v2, v3 |
| ; CHECK-NEXT: vmaxsh v2, v2, v3 |
| ; CHECK-NEXT: vsubuhm v2, v2, v4 |
| ; CHECK-NEXT: blr |
| %3 = icmp sle <8 x i16> %0, %1 |
| %4 = sub <8 x i16> %0, %1 |
| %5 = sub <8 x i16> %1, %0 |
| %6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4 |
| ret <8 x i16> %6 |
| } |
| |
| define <16 x i8> @absd_int8_sgt(<16 x i8>, <16 x i8>) { |
| ; CHECK-LABEL: absd_int8_sgt: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vminsb v4, v2, v3 |
| ; CHECK-NEXT: vmaxsb v2, v2, v3 |
| ; CHECK-NEXT: vsububm v2, v2, v4 |
| ; CHECK-NEXT: blr |
| %3 = icmp sgt <16 x i8> %0, %1 |
| %4 = sub <16 x i8> %0, %1 |
| %5 = sub <16 x i8> %1, %0 |
| %6 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %5 |
| ret <16 x i8> %6 |
| } |
| |
| define <16 x i8> @absd_int8_sge(<16 x i8>, <16 x i8>) { |
| ; CHECK-LABEL: absd_int8_sge: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vminsb v4, v2, v3 |
| ; CHECK-NEXT: vmaxsb v2, v2, v3 |
| ; CHECK-NEXT: vsububm v2, v2, v4 |
| ; CHECK-NEXT: blr |
| %3 = icmp sge <16 x i8> %0, %1 |
| %4 = sub <16 x i8> %0, %1 |
| %5 = sub <16 x i8> %1, %0 |
| %6 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %5 |
| ret <16 x i8> %6 |
| } |
| |
| define <16 x i8> @absd_int8_slt(<16 x i8>, <16 x i8>) { |
| ; CHECK-LABEL: absd_int8_slt: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vminsb v4, v2, v3 |
| ; CHECK-NEXT: vmaxsb v2, v2, v3 |
| ; CHECK-NEXT: vsububm v2, v2, v4 |
| ; CHECK-NEXT: blr |
| %3 = icmp slt <16 x i8> %0, %1 |
| %4 = sub <16 x i8> %0, %1 |
| %5 = sub <16 x i8> %1, %0 |
| %6 = select <16 x i1> %3, <16 x i8> %5, <16 x i8> %4 |
| ret <16 x i8> %6 |
| } |
| |
| define <16 x i8> @absd_int8_sle(<16 x i8>, <16 x i8>) { |
| ; CHECK-LABEL: absd_int8_sle: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vminsb v4, v2, v3 |
| ; CHECK-NEXT: vmaxsb v2, v2, v3 |
| ; CHECK-NEXT: vsububm v2, v2, v4 |
| ; CHECK-NEXT: blr |
| %3 = icmp sle <16 x i8> %0, %1 |
| %4 = sub <16 x i8> %0, %1 |
| %5 = sub <16 x i8> %1, %0 |
| %6 = select <16 x i1> %3, <16 x i8> %5, <16 x i8> %4 |
| ret <16 x i8> %6 |
| } |
| |
| ; some cases we are unable to optimize |
| ; check whether goes beyond the scope |
| define <4 x i32> @absd_int32_ugt_opp(<4 x i32>, <4 x i32>) { |
| ; CHECK-PWR9-LABEL: absd_int32_ugt_opp: |
| ; CHECK-PWR9: # %bb.0: |
| ; CHECK-PWR9-NEXT: vabsduw v2, v2, v3 |
| ; CHECK-PWR9-NEXT: vnegw v2, v2 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR78-LABEL: absd_int32_ugt_opp: |
| ; CHECK-PWR78: # %bb.0: |
| ; CHECK-PWR78-NEXT: vcmpgtuw v4, v2, v3 |
| ; CHECK-PWR78-NEXT: vsubuwm v5, v2, v3 |
| ; CHECK-PWR78-NEXT: vsubuwm v2, v3, v2 |
| ; CHECK-PWR78-NEXT: xxsel v2, v5, v2, v4 |
| ; CHECK-PWR78-NEXT: blr |
| %3 = icmp ugt <4 x i32> %0, %1 |
| %4 = sub <4 x i32> %0, %1 |
| %5 = sub <4 x i32> %1, %0 |
| %6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4 |
| ret <4 x i32> %6 |
| } |
| |
| define <2 x i64> @absd_int64_ugt(<2 x i64>, <2 x i64>) { |
| ; CHECK-PWR9-LABEL: absd_int64_ugt: |
| ; CHECK-PWR9: # %bb.0: |
| ; CHECK-PWR9-NEXT: vminud v4, v2, v3 |
| ; CHECK-PWR9-NEXT: vmaxud v2, v2, v3 |
| ; CHECK-PWR9-NEXT: vsubudm v2, v2, v4 |
| ; CHECK-PWR9-NEXT: blr |
| ; |
| ; CHECK-PWR8-LABEL: absd_int64_ugt: |
| ; CHECK-PWR8: # %bb.0: |
| ; CHECK-PWR8-NEXT: vminud v4, v2, v3 |
| ; CHECK-PWR8-NEXT: vmaxud v2, v2, v3 |
| ; CHECK-PWR8-NEXT: vsubudm v2, v2, v4 |
| ; CHECK-PWR8-NEXT: blr |
| ; |
| ; CHECK-PWR7-LABEL: absd_int64_ugt: |
| ; CHECK-PWR7: # %bb.0: |
| ; CHECK-PWR7-NEXT: addi r3, r1, -96 |
| ; CHECK-PWR7-NEXT: stxvd2x v2, 0, r3 |
| ; CHECK-PWR7-NEXT: addi r3, r1, -80 |
| ; CHECK-PWR7-NEXT: stxvd2x v3, 0, r3 |
| ; CHECK-PWR7-NEXT: ld r3, -88(r1) |
| ; CHECK-PWR7-NEXT: ld r4, -72(r1) |
| ; CHECK-PWR7-NEXT: ld r6, -80(r1) |
| ; CHECK-PWR7-NEXT: sub r5, r3, r4 |
| ; CHECK-PWR7-NEXT: cmpld r3, r4 |
| ; CHECK-PWR7-NEXT: li r3, 0 |
| ; CHECK-PWR7-NEXT: li r4, -1 |
| ; CHECK-PWR7-NEXT: std r5, -56(r1) |
| ; CHECK-PWR7-NEXT: ld r5, -96(r1) |
| ; CHECK-PWR7-NEXT: sub r7, r5, r6 |
| ; CHECK-PWR7-NEXT: std r7, -64(r1) |
| ; CHECK-PWR7-NEXT: iselgt r7, r4, r3 |
| ; CHECK-PWR7-NEXT: cmpld r5, r6 |
| ; CHECK-PWR7-NEXT: std r7, -40(r1) |
| ; CHECK-PWR7-NEXT: iselgt r3, r4, r3 |
| ; CHECK-PWR7-NEXT: addi r4, r1, -64 |
| ; CHECK-PWR7-NEXT: std r3, -48(r1) |
| ; CHECK-PWR7-NEXT: lxvw4x vs0, 0, r4 |
| ; CHECK-PWR7-NEXT: addi r4, r1, -48 |
| ; CHECK-PWR7-NEXT: lxvw4x vs1, 0, r4 |
| ; CHECK-PWR7-NEXT: addi r4, r1, -32 |
| ; CHECK-PWR7-NEXT: xxlxor vs0, vs0, vs1 |
| ; CHECK-PWR7-NEXT: stxvw4x vs0, 0, r4 |
| ; CHECK-PWR7-NEXT: ld r4, -24(r1) |
| ; CHECK-PWR7-NEXT: sub r4, r7, r4 |
| ; CHECK-PWR7-NEXT: std r4, -8(r1) |
| ; CHECK-PWR7-NEXT: ld r4, -32(r1) |
| ; CHECK-PWR7-NEXT: sub r3, r3, r4 |
| ; CHECK-PWR7-NEXT: std r3, -16(r1) |
| ; CHECK-PWR7-NEXT: addi r3, r1, -16 |
| ; CHECK-PWR7-NEXT: lxvd2x v2, 0, r3 |
| ; CHECK-PWR7-NEXT: blr |
| %3 = icmp ugt <2 x i64> %0, %1 |
| %4 = sub <2 x i64> %0, %1 |
| %5 = sub <2 x i64> %1, %0 |
| %6 = select <2 x i1> %3, <2 x i64> %4, <2 x i64> %5 |
| ret <2 x i64> %6 |
| } |
| |
| declare <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32>, <4 x i32>) |
| |
| declare <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16>, <8 x i16>) |
| |
| declare <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8>, <16 x i8>) |
| |