blob: fbadbf7226fd165b460fe53fcb7c0b47f5bb8996 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: opt -passes=instcombine -S %s | llc -mtriple=aarch64--linux-gnu -mattr=+sve -o - | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"
;
; Immediate Compares
;
define i32 @cmple_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: cmple_imm_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: cmple p0.b, p0/z, z0.b, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %a)
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1)
%conv = zext i1 %3 to i32
ret i32 %conv
}
;
; Wide Compares
;
define i32 @cmple_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmple_wide_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: cmple p0.b, p0/z, z0.b, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmple.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
%2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
%conv = zext i1 %2 to i32
ret i32 %conv
}
define i32 @cmple_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmple_wide_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: cmple p0.h, p0/z, z0.h, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
%2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmple.wide.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b)
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %2)
%4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
%conv = zext i1 %4 to i32
ret i32 %conv
}
define i32 @cmple_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmple_wide_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: cmple p0.s, p0/z, z0.s, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmple.wide.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b)
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
%4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
%conv = zext i1 %4 to i32
ret i32 %conv
}
; ==============================================================================
; PTEST_OP(PG, CMP(PG, ...))
; ==============================================================================
;
; PTEST_FIRST(PG, CMP8(PG, A, B)). PTEST is redundant.
;
define i1 @cmp8_ptest_first_px(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_first_px:
; CHECK: // %bb.0:
; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, mi
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
%2 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
ret i1 %2
}
;
; PTEST_LAST(PG, CMP8(PG, A, B)). PTEST is redundant.
;
define i1 @cmp8_ptest_last_px(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_last_px:
; CHECK: // %bb.0:
; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
%2 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
ret i1 %2
}
;
; PTEST_ANY(PG, CMP8(PG, A, B)). PTEST is redundant.
;
define i1 @cmp8_ptest_any_px(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_any_px:
; CHECK: // %bb.0:
; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
%2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1)
ret i1 %2
}
;
; Same as above except PG = incorrectly sized PTRUE
;
define i1 @cmp8_ptest_any_px_bad_ptrue(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_any_px_bad_ptrue:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %2, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
%4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %2, <vscale x 16 x i1> %3)
ret i1 %4
}
;
; PTEST_FIRST(PG, CMP32(PG, A, B)). Can't remove PTEST since PTEST.B vs CMP.S.
;
define i1 @cmp32_ptest_first_px(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_first_px:
; CHECK: // %bb.0:
; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ptest p0, p1.b
; CHECK-NEXT: cset w0, mi
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
%4 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
ret i1 %4
}
;
; PTEST_LAST(PG, CMP32(PG, A, B)). Can't remove PTEST since PTEST.B vs CMP.S.
;
define i1 @cmp32_ptest_last_px(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_last_px:
; CHECK: // %bb.0:
; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ptest p0, p1.b
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
%4 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
ret i1 %4
}
;
; PTEST_ANY(PG, CMP32(PG, A, B)). PTEST is redundant.
;
define i1 @cmp32_ptest_any_px(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_any_px:
; CHECK: // %bb.0:
; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
%4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
ret i1 %4
}
;
; Same as above except PG = incorrectly sized PTRUE
;
define i1 @cmp32_ptest_any_px_bad_ptrue(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_any_px_bad_ptrue:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
%3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %2, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
%4 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %3)
%5 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %4)
ret i1 %5
}
; ==============================================================================
; PTEST_OP(X=CMP(PG, ...), X)
; ==============================================================================
;
; PTEST_FIRST(X=CMP8(PG, A, B), X). PTEST is redundant if condition is changed
; to any.
;
; Can't remove PTEST and keep the same condition (first), since the mask for
; the implicit PTEST (PG) performed by the compare differs from the mask
; specified to the explicit PTEST and could have a different result.
;
; For example, consider
;
; PG=<1, 1, x, x>
; Z0=<1, 2, x, x>
; Z1=<2, 1, x, x>
;
; X=CMPLE(PG, Z0, Z1)
; =<0, 1, x, x> NZCV=0xxx
; PTEST(X, X), NZCV=1xxx
;
; where the first active flag (bit 'N' in NZCV) is set by the explicit PTEST,
; but not by the implicit PTEST as part of the compare. However, given the
; PTEST mask and source are the same, first is equivalent to any. The same
; applies to last active.
;
define i1 @cmp8_ptest_first_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_first_xx:
; CHECK: // %bb.0:
; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
%2 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
ret i1 %2
}
;
; PTEST_LAST(X=CMP8(PG, A, B), X). PTEST is redundant if condition is changed
; to any.
;
define i1 @cmp8_ptest_last_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_last_xx:
; CHECK: // %bb.0:
; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
%2 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
ret i1 %2
}
;
; PTEST_ANY(X=CMP8(PG, A, B), X). PTEST is redundant.
;
define i1 @cmp8_ptest_any_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_any_xx:
; CHECK: // %bb.0:
; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
%2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
ret i1 %2
}
;
; PTEST_FIRST(X=CMP32(PG, A, B), X). PTEST is redundant if condition is changed
; to any.
;
define i1 @cmp32_ptest_first_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_first_xx:
; CHECK: // %bb.0:
; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
%3 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv4i1(<vscale x 4 x i1> %2, <vscale x 4 x i1> %2)
ret i1 %3
}
;
; PTEST_LAST(X=CMP32(PG, A, B), X). PTEST is redundant if condition is changed
; to any.
;
define i1 @cmp32_ptest_last_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_last_xx:
; CHECK: // %bb.0:
; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
%3 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv4i1(<vscale x 4 x i1> %2, <vscale x 4 x i1> %2)
ret i1 %3
}
;
; PTEST_ANY(X=CMP32(PG, A, B), X). PTEST is redundant.
;
define i1 @cmp32_ptest_any_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_any_xx:
; CHECK: // %bb.0:
; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
%3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1(<vscale x 4 x i1> %2, <vscale x 4 x i1> %2)
ret i1 %3
}
; ==============================================================================
; PTEST_OP(PTRUE_ALL, CMP(PG, ...))
; ==============================================================================
;
; PTEST_FIRST(PTRUE_ALL, CMP8(PG, A, B)). Can't remove PTEST since mask is
; different.
;
define i1 @cmp8_ptest_first_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_first_ax:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, mi
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%3 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1)
ret i1 %3
}
;
; PTEST_LAST(PTRUE_ALL, CMP8(PG, A, B)). Can't remove PTEST since mask is
; different.
;
define i1 @cmp8_ptest_last_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_last_ax:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%3 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1)
ret i1 %3
}
;
; PTEST_ANY(PTRUE_ALL, CMP8(PG, A, B)). PTEST is redundant.
;
define i1 @cmp8_ptest_any_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_any_ax:
; CHECK: // %bb.0:
; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1)
ret i1 %3
}
;
; PTEST_FIRST(PTRUE_ALL, CMP32(PG, A, B)). Can't remove PTEST since mask is
; different.
;
define i1 @cmp32_ptest_first_ax(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_first_ax:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, mi
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
%3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%4 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv4i1(<vscale x 4 x i1> %3, <vscale x 4 x i1> %2)
ret i1 %4
}
;
; PTEST_LAST(PTRUE_ALL, CMP32(PG, A, B)). Can't remove PTEST since mask is
; different.
;
define i1 @cmp32_ptest_last_ax(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_last_ax:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
%3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%4 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv4i1(<vscale x 4 x i1> %3, <vscale x 4 x i1> %2)
ret i1 %4
}
;
; PTEST_ANY(PTRUE_ALL, CMP32(PG, A, B)). PTEST is redundant.
;
define i1 @cmp32_ptest_any_ax(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_any_ax:
; CHECK: // %bb.0:
; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
%3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1(<vscale x 4 x i1> %3, <vscale x 4 x i1> %2)
ret i1 %4
}
; ==============================================================================
; PTEST_OP(PTRUE_ALL, CMP(PTRUE_ALL, ...))
; ==============================================================================
;
; PTEST_FIRST(PTRUE_ALL, CMP8(PTRUE_ALL, A, B)). PTEST is redundant.
;
define i1 @cmp8_ptest_first_aa(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_first_aa:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, mi
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
%3 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %2)
ret i1 %3
}
;
; PTEST_LAST(PTRUE_ALL, CMP8(PTRUE_ALL, A, B)). PTEST is redundant.
;
define i1 @cmp8_ptest_last_aa(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_last_aa:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
%3 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %2)
ret i1 %3
}
;
; PTEST_ANY(PTRUE_ALL, CMP8(PTRUE_ALL, A, B)). PTEST is redundant.
;
define i1 @cmp8_ptest_any_aa(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_any_aa:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
%3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %2)
ret i1 %3
}
;
; PTEST_FIRST(PTRUE_ALL, CMP32(PTRUE_ALL, A, B)). PTEST is redundant.
;
define i1 @cmp32_ptest_first_aa(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_first_aa:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, mi
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
%3 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %2)
ret i1 %3
}
;
; PTEST_LAST(PTRUE_ALL, CMP32(PTRUE_ALL, A, B)). PTEST is redundant.
;
define i1 @cmp32_ptest_last_aa(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_last_aa:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
%3 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %2)
ret i1 %3
}
;
; PTEST_ANY(PTRUE_ALL, CMP32(PTRUE_ALL, A, B)). PTEST is redundant.
;
define i1 @cmp32_ptest_any_aa(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_any_aa:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
%3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %2)
ret i1 %3
}
declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 16 x i1> @llvm.aarch64.sve.cmple.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
declare <vscale x 8 x i1> @llvm.aarch64.sve.cmple.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
declare <vscale x 4 x i1> @llvm.aarch64.sve.cmple.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
declare i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
declare i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
declare i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
declare i1 @llvm.aarch64.sve.ptest.any.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
declare i1 @llvm.aarch64.sve.ptest.first.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
declare i1 @llvm.aarch64.sve.ptest.last.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)