blob: eaab932c41df7eeb70095c07d3c330a8b7770171 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-PWR9,CHECK-PWR9-LE
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr9 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-PWR9,CHECK-PWR9-BE
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-PWR78,CHECK-PWR8 -implicit-check-not vabsdu
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,CHECK-PWR78,CHECK-PWR7 -implicit-check-not vmaxsd
define <4 x i32> @simple_absv_32(<4 x i32> %a) local_unnamed_addr {
; CHECK-PWR9-LABEL: simple_absv_32:
; CHECK-PWR9: # %bb.0: # %entry
; CHECK-PWR9-NEXT: vnegw v3, v2
; CHECK-PWR9-NEXT: vmaxsw v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: simple_absv_32:
; CHECK-PWR78: # %bb.0: # %entry
; CHECK-PWR78-NEXT: xxlxor v3, v3, v3
; CHECK-PWR78-NEXT: vsubuwm v3, v3, v2
; CHECK-PWR78-NEXT: vmaxsw v2, v2, v3
; CHECK-PWR78-NEXT: blr
entry:
%sub.i = sub <4 x i32> zeroinitializer, %a
%0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %a, <4 x i32> %sub.i)
ret <4 x i32> %0
}
define <4 x i32> @simple_absv_32_swap(<4 x i32> %a) local_unnamed_addr {
; CHECK-PWR9-LABEL: simple_absv_32_swap:
; CHECK-PWR9: # %bb.0: # %entry
; CHECK-PWR9-NEXT: vnegw v3, v2
; CHECK-PWR9-NEXT: vmaxsw v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: simple_absv_32_swap:
; CHECK-PWR78: # %bb.0: # %entry
; CHECK-PWR78-NEXT: xxlxor v3, v3, v3
; CHECK-PWR78-NEXT: vsubuwm v3, v3, v2
; CHECK-PWR78-NEXT: vmaxsw v2, v3, v2
; CHECK-PWR78-NEXT: blr
entry:
%sub.i = sub <4 x i32> zeroinitializer, %a
%0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %sub.i, <4 x i32> %a)
ret <4 x i32> %0
}
define <8 x i16> @simple_absv_16(<8 x i16> %a) local_unnamed_addr {
; CHECK-LABEL: simple_absv_16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v3, v3, v3
; CHECK-NEXT: vsubuhm v3, v3, v2
; CHECK-NEXT: vmaxsh v2, v2, v3
; CHECK-NEXT: blr
entry:
%sub.i = sub <8 x i16> zeroinitializer, %a
%0 = tail call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %a, <8 x i16> %sub.i)
ret <8 x i16> %0
}
define <16 x i8> @simple_absv_8(<16 x i8> %a) local_unnamed_addr {
; CHECK-LABEL: simple_absv_8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v3, v3, v3
; CHECK-NEXT: vsububm v3, v3, v2
; CHECK-NEXT: vmaxsb v2, v2, v3
; CHECK-NEXT: blr
entry:
%sub.i = sub <16 x i8> zeroinitializer, %a
%0 = tail call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %a, <16 x i8> %sub.i)
ret <16 x i8> %0
}
; v2i64 vmax isn't avaiable on pwr7
define <2 x i64> @sub_absv_64(<2 x i64> %a, <2 x i64> %b) local_unnamed_addr {
; CHECK-PWR9-LABEL: sub_absv_64:
; CHECK-PWR9: # %bb.0: # %entry
; CHECK-PWR9-NEXT: vsubudm v2, v2, v3
; CHECK-PWR9-NEXT: vnegd v3, v2
; CHECK-PWR9-NEXT: vmaxsd v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR8-LABEL: sub_absv_64:
; CHECK-PWR8: # %bb.0: # %entry
; CHECK-PWR8-NEXT: vsubudm v2, v2, v3
; CHECK-PWR8-NEXT: xxlxor v3, v3, v3
; CHECK-PWR8-NEXT: vsubudm v3, v3, v2
; CHECK-PWR8-NEXT: vmaxsd v2, v2, v3
; CHECK-PWR8-NEXT: blr
;
; CHECK-PWR7-LABEL: sub_absv_64:
; CHECK-PWR7: # %bb.0: # %entry
; CHECK-PWR7-NEXT: addi r3, r1, -48
; CHECK-PWR7-NEXT: stxvd2x v2, 0, r3
; CHECK-PWR7-NEXT: addi r3, r1, -32
; CHECK-PWR7-NEXT: stxvd2x v3, 0, r3
; CHECK-PWR7-NEXT: ld r4, -40(r1)
; CHECK-PWR7-NEXT: ld r5, -24(r1)
; CHECK-PWR7-NEXT: ld r3, -48(r1)
; CHECK-PWR7-NEXT: sub r4, r4, r5
; CHECK-PWR7-NEXT: sradi r5, r4, 63
; CHECK-PWR7-NEXT: xor r4, r4, r5
; CHECK-PWR7-NEXT: sub r4, r4, r5
; CHECK-PWR7-NEXT: ld r5, -32(r1)
; CHECK-PWR7-NEXT: std r4, -8(r1)
; CHECK-PWR7-NEXT: sub r3, r3, r5
; CHECK-PWR7-NEXT: sradi r4, r3, 63
; CHECK-PWR7-NEXT: xor r3, r3, r4
; CHECK-PWR7-NEXT: sub r3, r3, r4
; CHECK-PWR7-NEXT: std r3, -16(r1)
; CHECK-PWR7-NEXT: addi r3, r1, -16
; CHECK-PWR7-NEXT: lxvd2x v2, 0, r3
; CHECK-PWR7-NEXT: blr
entry:
%0 = sub nsw <2 x i64> %a, %b
%1 = icmp sgt <2 x i64> %0, <i64 -1, i64 -1>
%2 = sub <2 x i64> zeroinitializer, %0
%3 = select <2 x i1> %1, <2 x i64> %0, <2 x i64> %2
ret <2 x i64> %3
}
; The select pattern can only be detected for v4i32.
define <4 x i32> @sub_absv_32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr {
; CHECK-PWR9-LABEL: sub_absv_32:
; CHECK-PWR9: # %bb.0: # %entry
; CHECK-PWR9-NEXT: xvnegsp v3, v3
; CHECK-PWR9-NEXT: xvnegsp v2, v2
; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: sub_absv_32:
; CHECK-PWR78: # %bb.0: # %entry
; CHECK-PWR78-NEXT: vsubuwm v2, v2, v3
; CHECK-PWR78-NEXT: xxlxor v3, v3, v3
; CHECK-PWR78-NEXT: vsubuwm v3, v3, v2
; CHECK-PWR78-NEXT: vmaxsw v2, v2, v3
; CHECK-PWR78-NEXT: blr
entry:
%0 = sub nsw <4 x i32> %a, %b
%1 = icmp sgt <4 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1>
%2 = sub <4 x i32> zeroinitializer, %0
%3 = select <4 x i1> %1, <4 x i32> %0, <4 x i32> %2
ret <4 x i32> %3
}
define <8 x i16> @sub_absv_16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr {
; CHECK-LABEL: sub_absv_16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsubuhm v2, v2, v3
; CHECK-NEXT: xxlxor v3, v3, v3
; CHECK-NEXT: vsubuhm v3, v3, v2
; CHECK-NEXT: vmaxsh v2, v2, v3
; CHECK-NEXT: blr
entry:
%0 = sub nsw <8 x i16> %a, %b
%1 = icmp sgt <8 x i16> %0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
%2 = sub <8 x i16> zeroinitializer, %0
%3 = select <8 x i1> %1, <8 x i16> %0, <8 x i16> %2
ret <8 x i16> %3
}
define <16 x i8> @sub_absv_8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr {
; CHECK-LABEL: sub_absv_8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsububm v2, v2, v3
; CHECK-NEXT: xxlxor v3, v3, v3
; CHECK-NEXT: vsububm v3, v3, v2
; CHECK-NEXT: vmaxsb v2, v2, v3
; CHECK-NEXT: blr
entry:
%0 = sub nsw <16 x i8> %a, %b
%1 = icmp sgt <16 x i8> %0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%2 = sub <16 x i8> zeroinitializer, %0
%3 = select <16 x i1> %1, <16 x i8> %0, <16 x i8> %2
ret <16 x i8> %3
}
define <8 x i16> @sub_absv_16_ext(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr {
; CHECK-LABEL: sub_absv_16_ext:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vminsh v4, v2, v3
; CHECK-NEXT: vmaxsh v2, v2, v3
; CHECK-NEXT: vsubuhm v2, v2, v4
; CHECK-NEXT: blr
entry:
%0 = sext <8 x i16> %a to <8 x i32>
%1 = sext <8 x i16> %b to <8 x i32>
%2 = sub nsw <8 x i32> %0, %1
%3 = icmp sgt <8 x i32> %2, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
%4 = sub nsw <8 x i32> zeroinitializer, %2
%5 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> %4
%6 = trunc <8 x i32> %5 to <8 x i16>
ret <8 x i16> %6
}
define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr {
; CHECK-PWR9-LABEL: sub_absv_8_ext:
; CHECK-PWR9: # %bb.0: # %entry
; CHECK-PWR9-NEXT: vabsdub v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: sub_absv_8_ext:
; CHECK-PWR78: # %bb.0: # %entry
; CHECK-PWR78-NEXT: vminub v4, v2, v3
; CHECK-PWR78-NEXT: vmaxub v2, v2, v3
; CHECK-PWR78-NEXT: vsububm v2, v2, v4
; CHECK-PWR78-NEXT: blr
entry:
%0 = zext <16 x i8> %a to <16 x i32>
%1 = zext <16 x i8> %b to <16 x i32>
%2 = sub nsw <16 x i32> %0, %1
%3 = tail call <16 x i32> @llvm.abs.v16i32(<16 x i32> %2, i1 true)
%4 = trunc <16 x i32> %3 to <16 x i8>
ret <16 x i8> %4
}
define <4 x i32> @sub_absv_vec_32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr {
; CHECK-PWR9-LABEL: sub_absv_vec_32:
; CHECK-PWR9: # %bb.0: # %entry
; CHECK-PWR9-NEXT: xvnegsp v3, v3
; CHECK-PWR9-NEXT: xvnegsp v2, v2
; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: sub_absv_vec_32:
; CHECK-PWR78: # %bb.0: # %entry
; CHECK-PWR78-NEXT: vsubuwm v2, v2, v3
; CHECK-PWR78-NEXT: xxlxor v3, v3, v3
; CHECK-PWR78-NEXT: vsubuwm v3, v3, v2
; CHECK-PWR78-NEXT: vmaxsw v2, v2, v3
; CHECK-PWR78-NEXT: blr
entry:
%sub = sub nsw <4 x i32> %a, %b
%sub.i = sub <4 x i32> zeroinitializer, %sub
%0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %sub, <4 x i32> %sub.i)
ret <4 x i32> %0
}
define <8 x i16> @sub_absv_vec_16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr {
; CHECK-LABEL: sub_absv_vec_16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsubuhm v2, v2, v3
; CHECK-NEXT: xxlxor v3, v3, v3
; CHECK-NEXT: vsubuhm v3, v3, v2
; CHECK-NEXT: vmaxsh v2, v2, v3
; CHECK-NEXT: blr
entry:
%sub = sub nsw <8 x i16> %a, %b
%sub.i = sub <8 x i16> zeroinitializer, %sub
%0 = tail call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %sub, <8 x i16> %sub.i)
ret <8 x i16> %0
}
define <16 x i8> @sub_absv_vec_8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr {
; CHECK-LABEL: sub_absv_vec_8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsububm v2, v2, v3
; CHECK-NEXT: xxlxor v3, v3, v3
; CHECK-NEXT: vsububm v3, v3, v2
; CHECK-NEXT: vmaxsb v2, v2, v3
; CHECK-NEXT: blr
entry:
%sub = sub nsw <16 x i8> %a, %b
%sub.i = sub <16 x i8> zeroinitializer, %sub
%0 = tail call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %sub, <16 x i8> %sub.i)
ret <16 x i8> %0
}
define <4 x i32> @zext_sub_absd32(<4 x i16>, <4 x i16>) local_unnamed_addr {
; CHECK-PWR9-LE-LABEL: zext_sub_absd32:
; CHECK-PWR9-LE: # %bb.0:
; CHECK-PWR9-LE-NEXT: vabsduh v2, v2, v3
; CHECK-PWR9-LE-NEXT: xxlxor v3, v3, v3
; CHECK-PWR9-LE-NEXT: vmrglh v2, v3, v2
; CHECK-PWR9-LE-NEXT: blr
;
; CHECK-PWR9-BE-LABEL: zext_sub_absd32:
; CHECK-PWR9-BE: # %bb.0:
; CHECK-PWR9-BE-NEXT: vabsduh v2, v2, v3
; CHECK-PWR9-BE-NEXT: xxlxor v3, v3, v3
; CHECK-PWR9-BE-NEXT: vmrghh v2, v3, v2
; CHECK-PWR9-BE-NEXT: blr
;
; CHECK-PWR8-LABEL: zext_sub_absd32:
; CHECK-PWR8: # %bb.0:
; CHECK-PWR8-NEXT: xxlxor v4, v4, v4
; CHECK-PWR8-NEXT: vmrglh v2, v4, v2
; CHECK-PWR8-NEXT: vmrglh v3, v4, v3
; CHECK-PWR8-NEXT: vsubuwm v2, v2, v3
; CHECK-PWR8-NEXT: vsubuwm v3, v4, v2
; CHECK-PWR8-NEXT: vmaxsw v2, v2, v3
; CHECK-PWR8-NEXT: blr
;
; CHECK-PWR7-LABEL: zext_sub_absd32:
; CHECK-PWR7: # %bb.0:
; CHECK-PWR7-NEXT: addis r3, r2, .LCPI13_0@toc@ha
; CHECK-PWR7-NEXT: xxlxor v5, v5, v5
; CHECK-PWR7-NEXT: addi r3, r3, .LCPI13_0@toc@l
; CHECK-PWR7-NEXT: lxvw4x v4, 0, r3
; CHECK-PWR7-NEXT: vperm v2, v5, v2, v4
; CHECK-PWR7-NEXT: vperm v3, v5, v3, v4
; CHECK-PWR7-NEXT: vsubuwm v2, v2, v3
; CHECK-PWR7-NEXT: vsubuwm v3, v5, v2
; CHECK-PWR7-NEXT: vmaxsw v2, v2, v3
; CHECK-PWR7-NEXT: blr
%3 = zext <4 x i16> %0 to <4 x i32>
%4 = zext <4 x i16> %1 to <4 x i32>
%5 = sub <4 x i32> %3, %4
%6 = sub <4 x i32> zeroinitializer, %5
%7 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %5, <4 x i32> %6)
ret <4 x i32> %7
}
define <8 x i16> @zext_sub_absd16(<8 x i8>, <8 x i8>) local_unnamed_addr {
; CHECK-PWR9-LE-LABEL: zext_sub_absd16:
; CHECK-PWR9-LE: # %bb.0:
; CHECK-PWR9-LE-NEXT: vabsdub v2, v2, v3
; CHECK-PWR9-LE-NEXT: xxlxor v3, v3, v3
; CHECK-PWR9-LE-NEXT: vmrglb v2, v3, v2
; CHECK-PWR9-LE-NEXT: blr
;
; CHECK-PWR9-BE-LABEL: zext_sub_absd16:
; CHECK-PWR9-BE: # %bb.0:
; CHECK-PWR9-BE-NEXT: vabsdub v2, v2, v3
; CHECK-PWR9-BE-NEXT: xxlxor v3, v3, v3
; CHECK-PWR9-BE-NEXT: vmrghb v2, v3, v2
; CHECK-PWR9-BE-NEXT: blr
;
; CHECK-PWR8-LABEL: zext_sub_absd16:
; CHECK-PWR8: # %bb.0:
; CHECK-PWR8-NEXT: xxlxor v4, v4, v4
; CHECK-PWR8-NEXT: vmrglb v2, v4, v2
; CHECK-PWR8-NEXT: vmrglb v3, v4, v3
; CHECK-PWR8-NEXT: vsubuhm v2, v2, v3
; CHECK-PWR8-NEXT: vsubuhm v3, v4, v2
; CHECK-PWR8-NEXT: vmaxsh v2, v2, v3
; CHECK-PWR8-NEXT: blr
;
; CHECK-PWR7-LABEL: zext_sub_absd16:
; CHECK-PWR7: # %bb.0:
; CHECK-PWR7-NEXT: addis r3, r2, .LCPI14_0@toc@ha
; CHECK-PWR7-NEXT: xxlxor v5, v5, v5
; CHECK-PWR7-NEXT: addi r3, r3, .LCPI14_0@toc@l
; CHECK-PWR7-NEXT: lxvw4x v4, 0, r3
; CHECK-PWR7-NEXT: vperm v2, v5, v2, v4
; CHECK-PWR7-NEXT: vperm v3, v5, v3, v4
; CHECK-PWR7-NEXT: vsubuhm v2, v2, v3
; CHECK-PWR7-NEXT: vsubuhm v3, v5, v2
; CHECK-PWR7-NEXT: vmaxsh v2, v2, v3
; CHECK-PWR7-NEXT: blr
%3 = zext <8 x i8> %0 to <8 x i16>
%4 = zext <8 x i8> %1 to <8 x i16>
%5 = sub <8 x i16> %3, %4
%6 = sub <8 x i16> zeroinitializer, %5
%7 = tail call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %5, <8 x i16> %6)
ret <8 x i16> %7
}
define <16 x i8> @zext_sub_absd8(<16 x i4>, <16 x i4>) local_unnamed_addr {
; CHECK-PWR9-LABEL: zext_sub_absd8:
; CHECK-PWR9: # %bb.0:
; CHECK-PWR9-NEXT: xxspltib vs0, 15
; CHECK-PWR9-NEXT: xxland v3, v3, vs0
; CHECK-PWR9-NEXT: xxland v2, v2, vs0
; CHECK-PWR9-NEXT: vabsdub v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: zext_sub_absd8:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vspltisb v4, 15
; CHECK-PWR78-NEXT: xxland v2, v2, v4
; CHECK-PWR78-NEXT: xxland v3, v3, v4
; CHECK-PWR78-NEXT: vsububm v2, v2, v3
; CHECK-PWR78-NEXT: xxlxor v3, v3, v3
; CHECK-PWR78-NEXT: vsububm v3, v3, v2
; CHECK-PWR78-NEXT: vmaxsb v2, v2, v3
; CHECK-PWR78-NEXT: blr
%3 = zext <16 x i4> %0 to <16 x i8>
%4 = zext <16 x i4> %1 to <16 x i8>
%5 = sub <16 x i8> %3, %4
%6 = sub <16 x i8> zeroinitializer, %5
%7 = tail call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %5, <16 x i8> %6)
ret <16 x i8> %7
}
define <4 x i32> @sext_sub_absd32(<4 x i16>, <4 x i16>) local_unnamed_addr {
; CHECK-PWR9-LE-LABEL: sext_sub_absd32:
; CHECK-PWR9-LE: # %bb.0:
; CHECK-PWR9-LE-NEXT: vminsh v4, v2, v3
; CHECK-PWR9-LE-NEXT: vmaxsh v2, v2, v3
; CHECK-PWR9-LE-NEXT: xxlxor v3, v3, v3
; CHECK-PWR9-LE-NEXT: vsubuhm v2, v2, v4
; CHECK-PWR9-LE-NEXT: vmrglh v2, v3, v2
; CHECK-PWR9-LE-NEXT: blr
;
; CHECK-PWR9-BE-LABEL: sext_sub_absd32:
; CHECK-PWR9-BE: # %bb.0:
; CHECK-PWR9-BE-NEXT: vminsh v4, v2, v3
; CHECK-PWR9-BE-NEXT: vmaxsh v2, v2, v3
; CHECK-PWR9-BE-NEXT: xxlxor v3, v3, v3
; CHECK-PWR9-BE-NEXT: vsubuhm v2, v2, v4
; CHECK-PWR9-BE-NEXT: vmrghh v2, v3, v2
; CHECK-PWR9-BE-NEXT: blr
;
; CHECK-PWR8-LABEL: sext_sub_absd32:
; CHECK-PWR8: # %bb.0:
; CHECK-PWR8-NEXT: vspltisw v4, 8
; CHECK-PWR8-NEXT: vmrglh v2, v2, v2
; CHECK-PWR8-NEXT: vadduwm v4, v4, v4
; CHECK-PWR8-NEXT: vmrglh v3, v3, v3
; CHECK-PWR8-NEXT: vslw v2, v2, v4
; CHECK-PWR8-NEXT: vslw v3, v3, v4
; CHECK-PWR8-NEXT: vsraw v2, v2, v4
; CHECK-PWR8-NEXT: vsraw v3, v3, v4
; CHECK-PWR8-NEXT: vsubuwm v2, v2, v3
; CHECK-PWR8-NEXT: xxlxor v3, v3, v3
; CHECK-PWR8-NEXT: vsubuwm v3, v3, v2
; CHECK-PWR8-NEXT: vmaxsw v2, v2, v3
; CHECK-PWR8-NEXT: blr
;
; CHECK-PWR7-LABEL: sext_sub_absd32:
; CHECK-PWR7: # %bb.0:
; CHECK-PWR7-NEXT: vspltisw v4, 8
; CHECK-PWR7-NEXT: vmrghh v2, v2, v2
; CHECK-PWR7-NEXT: vmrghh v3, v3, v3
; CHECK-PWR7-NEXT: vadduwm v4, v4, v4
; CHECK-PWR7-NEXT: vslw v2, v2, v4
; CHECK-PWR7-NEXT: vslw v3, v3, v4
; CHECK-PWR7-NEXT: vsraw v2, v2, v4
; CHECK-PWR7-NEXT: vsraw v3, v3, v4
; CHECK-PWR7-NEXT: vsubuwm v2, v2, v3
; CHECK-PWR7-NEXT: xxlxor v3, v3, v3
; CHECK-PWR7-NEXT: vsubuwm v3, v3, v2
; CHECK-PWR7-NEXT: vmaxsw v2, v2, v3
; CHECK-PWR7-NEXT: blr
%3 = sext <4 x i16> %0 to <4 x i32>
%4 = sext <4 x i16> %1 to <4 x i32>
%5 = sub <4 x i32> %3, %4
%6 = sub <4 x i32> zeroinitializer, %5
%7 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %5, <4 x i32> %6)
ret <4 x i32> %7
}
define <8 x i16> @sext_sub_absd16(<8 x i8>, <8 x i8>) local_unnamed_addr {
; CHECK-PWR9-LE-LABEL: sext_sub_absd16:
; CHECK-PWR9-LE: # %bb.0:
; CHECK-PWR9-LE-NEXT: vminsb v4, v2, v3
; CHECK-PWR9-LE-NEXT: vmaxsb v2, v2, v3
; CHECK-PWR9-LE-NEXT: xxlxor v3, v3, v3
; CHECK-PWR9-LE-NEXT: vsububm v2, v2, v4
; CHECK-PWR9-LE-NEXT: vmrglb v2, v3, v2
; CHECK-PWR9-LE-NEXT: blr
;
; CHECK-PWR9-BE-LABEL: sext_sub_absd16:
; CHECK-PWR9-BE: # %bb.0:
; CHECK-PWR9-BE-NEXT: vminsb v4, v2, v3
; CHECK-PWR9-BE-NEXT: vmaxsb v2, v2, v3
; CHECK-PWR9-BE-NEXT: xxlxor v3, v3, v3
; CHECK-PWR9-BE-NEXT: vsububm v2, v2, v4
; CHECK-PWR9-BE-NEXT: vmrghb v2, v3, v2
; CHECK-PWR9-BE-NEXT: blr
;
; CHECK-PWR8-LABEL: sext_sub_absd16:
; CHECK-PWR8: # %bb.0:
; CHECK-PWR8-NEXT: vmrglb v2, v2, v2
; CHECK-PWR8-NEXT: vspltish v4, 8
; CHECK-PWR8-NEXT: vslh v2, v2, v4
; CHECK-PWR8-NEXT: vmrglb v3, v3, v3
; CHECK-PWR8-NEXT: vslh v3, v3, v4
; CHECK-PWR8-NEXT: vsrah v2, v2, v4
; CHECK-PWR8-NEXT: vsrah v3, v3, v4
; CHECK-PWR8-NEXT: vsubuhm v2, v2, v3
; CHECK-PWR8-NEXT: xxlxor v3, v3, v3
; CHECK-PWR8-NEXT: vsubuhm v3, v3, v2
; CHECK-PWR8-NEXT: vmaxsh v2, v2, v3
; CHECK-PWR8-NEXT: blr
;
; CHECK-PWR7-LABEL: sext_sub_absd16:
; CHECK-PWR7: # %bb.0:
; CHECK-PWR7-NEXT: vspltish v4, 8
; CHECK-PWR7-NEXT: vmrghb v2, v2, v2
; CHECK-PWR7-NEXT: vmrghb v3, v3, v3
; CHECK-PWR7-NEXT: vslh v2, v2, v4
; CHECK-PWR7-NEXT: vslh v3, v3, v4
; CHECK-PWR7-NEXT: vsrah v2, v2, v4
; CHECK-PWR7-NEXT: vsrah v3, v3, v4
; CHECK-PWR7-NEXT: vsubuhm v2, v2, v3
; CHECK-PWR7-NEXT: xxlxor v3, v3, v3
; CHECK-PWR7-NEXT: vsubuhm v3, v3, v2
; CHECK-PWR7-NEXT: vmaxsh v2, v2, v3
; CHECK-PWR7-NEXT: blr
%3 = sext <8 x i8> %0 to <8 x i16>
%4 = sext <8 x i8> %1 to <8 x i16>
%5 = sub <8 x i16> %3, %4
%6 = sub <8 x i16> zeroinitializer, %5
%7 = tail call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %5, <8 x i16> %6)
ret <8 x i16> %7
}
define <16 x i8> @sext_sub_absd8(<16 x i4>, <16 x i4>) local_unnamed_addr {
; CHECK-PWR9-LABEL: sext_sub_absd8:
; CHECK-PWR9: # %bb.0:
; CHECK-PWR9-NEXT: xxspltib v4, 4
; CHECK-PWR9-NEXT: vslb v3, v3, v4
; CHECK-PWR9-NEXT: vslb v2, v2, v4
; CHECK-PWR9-NEXT: vsrab v3, v3, v4
; CHECK-PWR9-NEXT: vsrab v2, v2, v4
; CHECK-PWR9-NEXT: vminsb v4, v2, v3
; CHECK-PWR9-NEXT: vmaxsb v2, v2, v3
; CHECK-PWR9-NEXT: vsububm v2, v2, v4
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: sext_sub_absd8:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vspltisb v4, 4
; CHECK-PWR78-NEXT: vslb v2, v2, v4
; CHECK-PWR78-NEXT: vslb v3, v3, v4
; CHECK-PWR78-NEXT: vsrab v2, v2, v4
; CHECK-PWR78-NEXT: vsrab v3, v3, v4
; CHECK-PWR78-NEXT: vsububm v2, v2, v3
; CHECK-PWR78-NEXT: xxlxor v3, v3, v3
; CHECK-PWR78-NEXT: vsububm v3, v3, v2
; CHECK-PWR78-NEXT: vmaxsb v2, v2, v3
; CHECK-PWR78-NEXT: blr
%3 = sext <16 x i4> %0 to <16 x i8>
%4 = sext <16 x i4> %1 to <16 x i8>
%5 = sub <16 x i8> %3, %4
%6 = sub <16 x i8> zeroinitializer, %5
%7 = tail call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %5, <16 x i8> %6)
ret <16 x i8> %7
}
; To verify vabsdu* exploitation for ucmp + sub + select sequence
define <4 x i32> @absd_int32_ugt(<4 x i32>, <4 x i32>) {
; CHECK-PWR9-LABEL: absd_int32_ugt:
; CHECK-PWR9: # %bb.0:
; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: absd_int32_ugt:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vminuw v4, v2, v3
; CHECK-PWR78-NEXT: vmaxuw v2, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp ugt <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
%5 = sub <4 x i32> %1, %0
%6 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %5
ret <4 x i32> %6
}
define <4 x i32> @absd_int32_uge(<4 x i32>, <4 x i32>) {
; CHECK-PWR9-LABEL: absd_int32_uge:
; CHECK-PWR9: # %bb.0:
; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: absd_int32_uge:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vminuw v4, v2, v3
; CHECK-PWR78-NEXT: vmaxuw v2, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp uge <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
%5 = sub <4 x i32> %1, %0
%6 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %5
ret <4 x i32> %6
}
define <4 x i32> @absd_int32_ult(<4 x i32>, <4 x i32>) {
; CHECK-PWR9-LABEL: absd_int32_ult:
; CHECK-PWR9: # %bb.0:
; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: absd_int32_ult:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vminuw v4, v2, v3
; CHECK-PWR78-NEXT: vmaxuw v2, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp ult <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
%5 = sub <4 x i32> %1, %0
%6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4
ret <4 x i32> %6
}
define <4 x i32> @absd_int32_ule(<4 x i32>, <4 x i32>) {
; CHECK-PWR9-LABEL: absd_int32_ule:
; CHECK-PWR9: # %bb.0:
; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: absd_int32_ule:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vminuw v4, v2, v3
; CHECK-PWR78-NEXT: vmaxuw v2, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp ule <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
%5 = sub <4 x i32> %1, %0
%6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4
ret <4 x i32> %6
}
define <8 x i16> @absd_int16_ugt(<8 x i16>, <8 x i16>) {
; CHECK-PWR9-LABEL: absd_int16_ugt:
; CHECK-PWR9: # %bb.0:
; CHECK-PWR9-NEXT: vabsduh v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: absd_int16_ugt:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vminuh v4, v2, v3
; CHECK-PWR78-NEXT: vmaxuh v2, v2, v3
; CHECK-PWR78-NEXT: vsubuhm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp ugt <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
%5 = sub <8 x i16> %1, %0
%6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5
ret <8 x i16> %6
}
define <8 x i16> @absd_int16_uge(<8 x i16>, <8 x i16>) {
; CHECK-PWR9-LABEL: absd_int16_uge:
; CHECK-PWR9: # %bb.0:
; CHECK-PWR9-NEXT: vabsduh v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: absd_int16_uge:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vminuh v4, v2, v3
; CHECK-PWR78-NEXT: vmaxuh v2, v2, v3
; CHECK-PWR78-NEXT: vsubuhm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp uge <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
%5 = sub <8 x i16> %1, %0
%6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5
ret <8 x i16> %6
}
define <8 x i16> @absd_int16_ult(<8 x i16>, <8 x i16>) {
; CHECK-PWR9-LABEL: absd_int16_ult:
; CHECK-PWR9: # %bb.0:
; CHECK-PWR9-NEXT: vabsduh v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: absd_int16_ult:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vminuh v4, v2, v3
; CHECK-PWR78-NEXT: vmaxuh v2, v2, v3
; CHECK-PWR78-NEXT: vsubuhm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp ult <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
%5 = sub <8 x i16> %1, %0
%6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4
ret <8 x i16> %6
}
define <8 x i16> @absd_int16_ule(<8 x i16>, <8 x i16>) {
; CHECK-PWR9-LABEL: absd_int16_ule:
; CHECK-PWR9: # %bb.0:
; CHECK-PWR9-NEXT: vabsduh v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: absd_int16_ule:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vminuh v4, v2, v3
; CHECK-PWR78-NEXT: vmaxuh v2, v2, v3
; CHECK-PWR78-NEXT: vsubuhm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp ule <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
%5 = sub <8 x i16> %1, %0
%6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4
ret <8 x i16> %6
}
define <16 x i8> @absd_int8_ugt(<16 x i8>, <16 x i8>) {
; CHECK-PWR9-LABEL: absd_int8_ugt:
; CHECK-PWR9: # %bb.0:
; CHECK-PWR9-NEXT: vabsdub v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: absd_int8_ugt:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vminub v4, v2, v3
; CHECK-PWR78-NEXT: vmaxub v2, v2, v3
; CHECK-PWR78-NEXT: vsububm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp ugt <16 x i8> %0, %1
%4 = sub <16 x i8> %0, %1
%5 = sub <16 x i8> %1, %0
%6 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %5
ret <16 x i8> %6
}
define <16 x i8> @absd_int8_uge(<16 x i8>, <16 x i8>) {
; CHECK-PWR9-LABEL: absd_int8_uge:
; CHECK-PWR9: # %bb.0:
; CHECK-PWR9-NEXT: vabsdub v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: absd_int8_uge:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vminub v4, v2, v3
; CHECK-PWR78-NEXT: vmaxub v2, v2, v3
; CHECK-PWR78-NEXT: vsububm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp uge <16 x i8> %0, %1
%4 = sub <16 x i8> %0, %1
%5 = sub <16 x i8> %1, %0
%6 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %5
ret <16 x i8> %6
}
define <16 x i8> @absd_int8_ult(<16 x i8>, <16 x i8>) {
; CHECK-PWR9-LABEL: absd_int8_ult:
; CHECK-PWR9: # %bb.0:
; CHECK-PWR9-NEXT: vabsdub v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: absd_int8_ult:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vminub v4, v2, v3
; CHECK-PWR78-NEXT: vmaxub v2, v2, v3
; CHECK-PWR78-NEXT: vsububm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp ult <16 x i8> %0, %1
%4 = sub <16 x i8> %0, %1
%5 = sub <16 x i8> %1, %0
%6 = select <16 x i1> %3, <16 x i8> %5, <16 x i8> %4
ret <16 x i8> %6
}
define <16 x i8> @absd_int8_ule(<16 x i8>, <16 x i8>) {
; CHECK-PWR9-LABEL: absd_int8_ule:
; CHECK-PWR9: # %bb.0:
; CHECK-PWR9-NEXT: vabsdub v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: absd_int8_ule:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vminub v4, v2, v3
; CHECK-PWR78-NEXT: vmaxub v2, v2, v3
; CHECK-PWR78-NEXT: vsububm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp ule <16 x i8> %0, %1
%4 = sub <16 x i8> %0, %1
%5 = sub <16 x i8> %1, %0
%6 = select <16 x i1> %3, <16 x i8> %5, <16 x i8> %4
ret <16 x i8> %6
}
; Tests for ABDS icmp + sub + select sequence
define <4 x i32> @absd_int32_sgt(<4 x i32>, <4 x i32>) {
; CHECK-PWR9-LABEL: absd_int32_sgt:
; CHECK-PWR9: # %bb.0:
; CHECK-PWR9-NEXT: xvnegsp v3, v3
; CHECK-PWR9-NEXT: xvnegsp v2, v2
; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: absd_int32_sgt:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vminsw v4, v2, v3
; CHECK-PWR78-NEXT: vmaxsw v2, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp sgt <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
%5 = sub <4 x i32> %1, %0
%6 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %5
ret <4 x i32> %6
}
define <4 x i32> @absd_int32_sge(<4 x i32>, <4 x i32>) {
; CHECK-PWR9-LABEL: absd_int32_sge:
; CHECK-PWR9: # %bb.0:
; CHECK-PWR9-NEXT: xvnegsp v3, v3
; CHECK-PWR9-NEXT: xvnegsp v2, v2
; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: absd_int32_sge:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vminsw v4, v2, v3
; CHECK-PWR78-NEXT: vmaxsw v2, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp sge <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
%5 = sub <4 x i32> %1, %0
%6 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %5
ret <4 x i32> %6
}
define <4 x i32> @absd_int32_slt(<4 x i32>, <4 x i32>) {
; CHECK-PWR9-LABEL: absd_int32_slt:
; CHECK-PWR9: # %bb.0:
; CHECK-PWR9-NEXT: xvnegsp v3, v3
; CHECK-PWR9-NEXT: xvnegsp v2, v2
; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: absd_int32_slt:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vminsw v4, v2, v3
; CHECK-PWR78-NEXT: vmaxsw v2, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp slt <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
%5 = sub <4 x i32> %1, %0
%6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4
ret <4 x i32> %6
}
define <4 x i32> @absd_int32_sle(<4 x i32>, <4 x i32>) {
; CHECK-PWR9-LABEL: absd_int32_sle:
; CHECK-PWR9: # %bb.0:
; CHECK-PWR9-NEXT: xvnegsp v3, v3
; CHECK-PWR9-NEXT: xvnegsp v2, v2
; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: absd_int32_sle:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vminsw v4, v2, v3
; CHECK-PWR78-NEXT: vmaxsw v2, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp sle <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
%5 = sub <4 x i32> %1, %0
%6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4
ret <4 x i32> %6
}
define <8 x i16> @absd_int16_sgt(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: absd_int16_sgt:
; CHECK: # %bb.0:
; CHECK-NEXT: vminsh v4, v2, v3
; CHECK-NEXT: vmaxsh v2, v2, v3
; CHECK-NEXT: vsubuhm v2, v2, v4
; CHECK-NEXT: blr
%3 = icmp sgt <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
%5 = sub <8 x i16> %1, %0
%6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5
ret <8 x i16> %6
}
define <8 x i16> @absd_int16_sge(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: absd_int16_sge:
; CHECK: # %bb.0:
; CHECK-NEXT: vminsh v4, v2, v3
; CHECK-NEXT: vmaxsh v2, v2, v3
; CHECK-NEXT: vsubuhm v2, v2, v4
; CHECK-NEXT: blr
%3 = icmp sge <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
%5 = sub <8 x i16> %1, %0
%6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5
ret <8 x i16> %6
}
define <8 x i16> @absd_int16_slt(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: absd_int16_slt:
; CHECK: # %bb.0:
; CHECK-NEXT: vminsh v4, v2, v3
; CHECK-NEXT: vmaxsh v2, v2, v3
; CHECK-NEXT: vsubuhm v2, v2, v4
; CHECK-NEXT: blr
%3 = icmp slt <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
%5 = sub <8 x i16> %1, %0
%6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4
ret <8 x i16> %6
}
define <8 x i16> @absd_int16_sle(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: absd_int16_sle:
; CHECK: # %bb.0:
; CHECK-NEXT: vminsh v4, v2, v3
; CHECK-NEXT: vmaxsh v2, v2, v3
; CHECK-NEXT: vsubuhm v2, v2, v4
; CHECK-NEXT: blr
%3 = icmp sle <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
%5 = sub <8 x i16> %1, %0
%6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4
ret <8 x i16> %6
}
define <16 x i8> @absd_int8_sgt(<16 x i8>, <16 x i8>) {
; CHECK-LABEL: absd_int8_sgt:
; CHECK: # %bb.0:
; CHECK-NEXT: vminsb v4, v2, v3
; CHECK-NEXT: vmaxsb v2, v2, v3
; CHECK-NEXT: vsububm v2, v2, v4
; CHECK-NEXT: blr
%3 = icmp sgt <16 x i8> %0, %1
%4 = sub <16 x i8> %0, %1
%5 = sub <16 x i8> %1, %0
%6 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %5
ret <16 x i8> %6
}
define <16 x i8> @absd_int8_sge(<16 x i8>, <16 x i8>) {
; CHECK-LABEL: absd_int8_sge:
; CHECK: # %bb.0:
; CHECK-NEXT: vminsb v4, v2, v3
; CHECK-NEXT: vmaxsb v2, v2, v3
; CHECK-NEXT: vsububm v2, v2, v4
; CHECK-NEXT: blr
%3 = icmp sge <16 x i8> %0, %1
%4 = sub <16 x i8> %0, %1
%5 = sub <16 x i8> %1, %0
%6 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %5
ret <16 x i8> %6
}
define <16 x i8> @absd_int8_slt(<16 x i8>, <16 x i8>) {
; CHECK-LABEL: absd_int8_slt:
; CHECK: # %bb.0:
; CHECK-NEXT: vminsb v4, v2, v3
; CHECK-NEXT: vmaxsb v2, v2, v3
; CHECK-NEXT: vsububm v2, v2, v4
; CHECK-NEXT: blr
%3 = icmp slt <16 x i8> %0, %1
%4 = sub <16 x i8> %0, %1
%5 = sub <16 x i8> %1, %0
%6 = select <16 x i1> %3, <16 x i8> %5, <16 x i8> %4
ret <16 x i8> %6
}
define <16 x i8> @absd_int8_sle(<16 x i8>, <16 x i8>) {
; CHECK-LABEL: absd_int8_sle:
; CHECK: # %bb.0:
; CHECK-NEXT: vminsb v4, v2, v3
; CHECK-NEXT: vmaxsb v2, v2, v3
; CHECK-NEXT: vsububm v2, v2, v4
; CHECK-NEXT: blr
%3 = icmp sle <16 x i8> %0, %1
%4 = sub <16 x i8> %0, %1
%5 = sub <16 x i8> %1, %0
%6 = select <16 x i1> %3, <16 x i8> %5, <16 x i8> %4
ret <16 x i8> %6
}
; some cases we are unable to optimize
; check whether goes beyond the scope
define <4 x i32> @absd_int32_ugt_opp(<4 x i32>, <4 x i32>) {
; CHECK-PWR9-LABEL: absd_int32_ugt_opp:
; CHECK-PWR9: # %bb.0:
; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
; CHECK-PWR9-NEXT: vnegw v2, v2
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR78-LABEL: absd_int32_ugt_opp:
; CHECK-PWR78: # %bb.0:
; CHECK-PWR78-NEXT: vcmpgtuw v4, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v5, v2, v3
; CHECK-PWR78-NEXT: vsubuwm v2, v3, v2
; CHECK-PWR78-NEXT: xxsel v2, v5, v2, v4
; CHECK-PWR78-NEXT: blr
%3 = icmp ugt <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
%5 = sub <4 x i32> %1, %0
%6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4
ret <4 x i32> %6
}
define <2 x i64> @absd_int64_ugt(<2 x i64>, <2 x i64>) {
; CHECK-PWR9-LABEL: absd_int64_ugt:
; CHECK-PWR9: # %bb.0:
; CHECK-PWR9-NEXT: vminud v4, v2, v3
; CHECK-PWR9-NEXT: vmaxud v2, v2, v3
; CHECK-PWR9-NEXT: vsubudm v2, v2, v4
; CHECK-PWR9-NEXT: blr
;
; CHECK-PWR8-LABEL: absd_int64_ugt:
; CHECK-PWR8: # %bb.0:
; CHECK-PWR8-NEXT: vminud v4, v2, v3
; CHECK-PWR8-NEXT: vmaxud v2, v2, v3
; CHECK-PWR8-NEXT: vsubudm v2, v2, v4
; CHECK-PWR8-NEXT: blr
;
; CHECK-PWR7-LABEL: absd_int64_ugt:
; CHECK-PWR7: # %bb.0:
; CHECK-PWR7-NEXT: addi r3, r1, -96
; CHECK-PWR7-NEXT: stxvd2x v2, 0, r3
; CHECK-PWR7-NEXT: addi r3, r1, -80
; CHECK-PWR7-NEXT: stxvd2x v3, 0, r3
; CHECK-PWR7-NEXT: ld r3, -88(r1)
; CHECK-PWR7-NEXT: ld r4, -72(r1)
; CHECK-PWR7-NEXT: ld r6, -80(r1)
; CHECK-PWR7-NEXT: sub r5, r3, r4
; CHECK-PWR7-NEXT: cmpld r3, r4
; CHECK-PWR7-NEXT: li r3, 0
; CHECK-PWR7-NEXT: li r4, -1
; CHECK-PWR7-NEXT: std r5, -56(r1)
; CHECK-PWR7-NEXT: ld r5, -96(r1)
; CHECK-PWR7-NEXT: sub r7, r5, r6
; CHECK-PWR7-NEXT: std r7, -64(r1)
; CHECK-PWR7-NEXT: iselgt r7, r4, r3
; CHECK-PWR7-NEXT: cmpld r5, r6
; CHECK-PWR7-NEXT: std r7, -40(r1)
; CHECK-PWR7-NEXT: iselgt r3, r4, r3
; CHECK-PWR7-NEXT: addi r4, r1, -64
; CHECK-PWR7-NEXT: std r3, -48(r1)
; CHECK-PWR7-NEXT: lxvw4x vs0, 0, r4
; CHECK-PWR7-NEXT: addi r4, r1, -48
; CHECK-PWR7-NEXT: lxvw4x vs1, 0, r4
; CHECK-PWR7-NEXT: addi r4, r1, -32
; CHECK-PWR7-NEXT: xxlxor vs0, vs0, vs1
; CHECK-PWR7-NEXT: stxvw4x vs0, 0, r4
; CHECK-PWR7-NEXT: ld r4, -24(r1)
; CHECK-PWR7-NEXT: sub r4, r7, r4
; CHECK-PWR7-NEXT: std r4, -8(r1)
; CHECK-PWR7-NEXT: ld r4, -32(r1)
; CHECK-PWR7-NEXT: sub r3, r3, r4
; CHECK-PWR7-NEXT: std r3, -16(r1)
; CHECK-PWR7-NEXT: addi r3, r1, -16
; CHECK-PWR7-NEXT: lxvd2x v2, 0, r3
; CHECK-PWR7-NEXT: blr
%3 = icmp ugt <2 x i64> %0, %1
%4 = sub <2 x i64> %0, %1
%5 = sub <2 x i64> %1, %0
%6 = select <2 x i1> %3, <2 x i64> %4, <2 x i64> %5
ret <2 x i64> %6
}
declare <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32>, <4 x i32>)
declare <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16>, <8 x i16>)
declare <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8>, <16 x i8>)