| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: opt -passes=instcombine -S %s | llc -mtriple=aarch64--linux-gnu -mattr=+sve -o - | FileCheck %s |
| |
| target triple = "aarch64-unknown-linux-gnu" |
| |
| ; |
| ; Immediate Compares |
| ; |
| |
| define i32 @cmple_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) { |
| ; CHECK-LABEL: cmple_imm_nxv16i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cmple p0.b, p0/z, z0.b, #0 |
| ; CHECK-NEXT: cset w0, ne |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %a) |
| %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) |
| %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1) |
| %conv = zext i1 %3 to i32 |
| ret i32 %conv |
| } |
| |
| ; |
| ; Wide Compares |
| ; |
| |
| define i32 @cmple_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) { |
| ; CHECK-LABEL: cmple_wide_nxv16i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cmple p0.b, p0/z, z0.b, z1.d |
| ; CHECK-NEXT: cset w0, ne |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmple.wide.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) |
| %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1) |
| %conv = zext i1 %2 to i32 |
| ret i32 %conv |
| } |
| |
| define i32 @cmple_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) { |
| ; CHECK-LABEL: cmple_wide_nxv8i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cmple p0.h, p0/z, z0.h, z1.d |
| ; CHECK-NEXT: cset w0, ne |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg) |
| %2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmple.wide.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) |
| %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %2) |
| %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3) |
| %conv = zext i1 %4 to i32 |
| ret i32 %conv |
| } |
| |
| define i32 @cmple_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) { |
| ; CHECK-LABEL: cmple_wide_nxv4i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cmple p0.s, p0/z, z0.s, z1.d |
| ; CHECK-NEXT: cset w0, ne |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) |
| %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmple.wide.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) |
| %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2) |
| %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3) |
| %conv = zext i1 %4 to i32 |
| ret i32 %conv |
| } |
| |
| ; ============================================================================== |
| ; PTEST_OP(PG, CMP(PG, ...)) |
| ; ============================================================================== |
| |
| ; |
| ; PTEST_FIRST(PG, CMP8(PG, A, B)). PTEST is redundant. |
| ; |
| define i1 @cmp8_ptest_first_px(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { |
| ; CHECK-LABEL: cmp8_ptest_first_px: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b |
| ; CHECK-NEXT: cset w0, mi |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) |
| %2 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1) |
| ret i1 %2 |
| } |
| |
| ; |
| ; PTEST_LAST(PG, CMP8(PG, A, B)). PTEST is redundant. |
| ; |
| define i1 @cmp8_ptest_last_px(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { |
| ; CHECK-LABEL: cmp8_ptest_last_px: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b |
| ; CHECK-NEXT: cset w0, lo |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) |
| %2 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1) |
| ret i1 %2 |
| } |
| |
| ; |
| ; PTEST_ANY(PG, CMP8(PG, A, B)). PTEST is redundant. |
| ; |
| define i1 @cmp8_ptest_any_px(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { |
| ; CHECK-LABEL: cmp8_ptest_any_px: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b |
| ; CHECK-NEXT: cset w0, ne |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) |
| %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %1) |
| ret i1 %2 |
| } |
| |
| ; |
| ; Same as above except PG = incorrectly sized PTRUE |
| ; |
| define i1 @cmp8_ptest_any_px_bad_ptrue(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { |
| ; CHECK-LABEL: cmp8_ptest_any_px_bad_ptrue: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b |
| ; CHECK-NEXT: cset w0, ne |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) |
| %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1) |
| %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %2, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) |
| %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %2, <vscale x 16 x i1> %3) |
| ret i1 %4 |
| } |
| |
| ; |
| ; PTEST_FIRST(PG, CMP32(PG, A, B)). Can't remove PTEST since PTEST.B vs CMP.S. |
| ; |
| define i1 @cmp32_ptest_first_px(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: cmp32_ptest_first_px: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: ptest p0, p1.b |
| ; CHECK-NEXT: cset w0, mi |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) |
| %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) |
| %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2) |
| %4 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3) |
| ret i1 %4 |
| } |
| |
| ; |
| ; PTEST_LAST(PG, CMP32(PG, A, B)). Can't remove PTEST since PTEST.B vs CMP.S. |
| ; |
| define i1 @cmp32_ptest_last_px(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: cmp32_ptest_last_px: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: ptest p0, p1.b |
| ; CHECK-NEXT: cset w0, lo |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) |
| %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) |
| %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2) |
| %4 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3) |
| ret i1 %4 |
| } |
| |
| ; |
| ; PTEST_ANY(PG, CMP32(PG, A, B)). PTEST is redundant. |
| ; |
| define i1 @cmp32_ptest_any_px(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: cmp32_ptest_any_px: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: cset w0, ne |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) |
| %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) |
| %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2) |
| %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3) |
| ret i1 %4 |
| } |
| |
| ; |
| ; Same as above except PG = incorrectly sized PTRUE |
| ; |
| define i1 @cmp32_ptest_any_px_bad_ptrue(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: cmp32_ptest_any_px_bad_ptrue: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ptrue p0.b |
| ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: cset w0, ne |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) |
| %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1) |
| %3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %2, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) |
| %4 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %3) |
| %5 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %4) |
| ret i1 %5 |
| } |
| |
| ; ============================================================================== |
| ; PTEST_OP(X=CMP(PG, ...), X) |
| ; ============================================================================== |
| |
| ; |
| ; PTEST_FIRST(X=CMP8(PG, A, B), X). PTEST is redundant if condition is changed |
| ; to any. |
| ; |
| ; Can't remove PTEST and keep the same condition (first), since the mask for |
| ; the implicit PTEST (PG) performed by the compare differs from the mask |
| ; specified to the explicit PTEST and could have a different result. |
| ; |
| ; For example, consider |
| ; |
| ; PG=<1, 1, x, x> |
| ; Z0=<1, 2, x, x> |
| ; Z1=<2, 1, x, x> |
| ; |
| ; X=CMPLE(PG, Z0, Z1) |
| ; =<0, 1, x, x> NZCV=0xxx |
| ; PTEST(X, X), NZCV=1xxx |
| ; |
| ; where the first active flag (bit 'N' in NZCV) is set by the explicit PTEST, |
| ; but not by the implicit PTEST as part of the compare. However, given the |
| ; PTEST mask and source are the same, first is equivalent to any. The same |
| ; applies to last active. |
| ; |
| define i1 @cmp8_ptest_first_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { |
| ; CHECK-LABEL: cmp8_ptest_first_xx: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b |
| ; CHECK-NEXT: cset w0, ne |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) |
| %2 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1) |
| ret i1 %2 |
| } |
| |
| ; |
| ; PTEST_LAST(X=CMP8(PG, A, B), X). PTEST is redundant if condition is changed |
| ; to any. |
| ; |
| define i1 @cmp8_ptest_last_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { |
| ; CHECK-LABEL: cmp8_ptest_last_xx: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b |
| ; CHECK-NEXT: cset w0, ne |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) |
| %2 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1) |
| ret i1 %2 |
| } |
| |
| ; |
| ; PTEST_ANY(X=CMP8(PG, A, B), X). PTEST is redundant. |
| ; |
| define i1 @cmp8_ptest_any_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { |
| ; CHECK-LABEL: cmp8_ptest_any_xx: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b |
| ; CHECK-NEXT: cset w0, ne |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) |
| %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1) |
| ret i1 %2 |
| } |
| |
| ; |
| ; PTEST_FIRST(X=CMP32(PG, A, B), X). PTEST is redundant if condition is changed |
| ; to any. |
| ; |
| define i1 @cmp32_ptest_first_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: cmp32_ptest_first_xx: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: cset w0, ne |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) |
| %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) |
| %3 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv4i1(<vscale x 4 x i1> %2, <vscale x 4 x i1> %2) |
| ret i1 %3 |
| } |
| |
| ; |
| ; PTEST_LAST(X=CMP32(PG, A, B), X). PTEST is redundant if condition is changed |
| ; to any. |
| ; |
| define i1 @cmp32_ptest_last_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: cmp32_ptest_last_xx: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: cset w0, ne |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) |
| %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) |
| %3 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv4i1(<vscale x 4 x i1> %2, <vscale x 4 x i1> %2) |
| ret i1 %3 |
| } |
| |
| ; |
| ; PTEST_ANY(X=CMP32(PG, A, B), X). PTEST is redundant. |
| ; |
| define i1 @cmp32_ptest_any_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: cmp32_ptest_any_xx: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: cset w0, ne |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) |
| %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) |
| %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1(<vscale x 4 x i1> %2, <vscale x 4 x i1> %2) |
| ret i1 %3 |
| } |
| |
| ; ============================================================================== |
| ; PTEST_OP(PTRUE_ALL, CMP(PG, ...)) |
| ; ============================================================================== |
| |
| ; |
| ; PTEST_FIRST(PTRUE_ALL, CMP8(PG, A, B)). Can't remove PTEST since mask is |
| ; different. |
| ; |
| define i1 @cmp8_ptest_first_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { |
| ; CHECK-LABEL: cmp8_ptest_first_ax: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ptrue p1.b |
| ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b |
| ; CHECK-NEXT: ptest p1, p0.b |
| ; CHECK-NEXT: cset w0, mi |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) |
| %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) |
| %3 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1) |
| ret i1 %3 |
| } |
| |
| ; |
| ; PTEST_LAST(PTRUE_ALL, CMP8(PG, A, B)). Can't remove PTEST since mask is |
| ; different. |
| ; |
| define i1 @cmp8_ptest_last_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { |
| ; CHECK-LABEL: cmp8_ptest_last_ax: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ptrue p1.b |
| ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b |
| ; CHECK-NEXT: ptest p1, p0.b |
| ; CHECK-NEXT: cset w0, lo |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) |
| %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) |
| %3 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1) |
| ret i1 %3 |
| } |
| |
| ; |
| ; PTEST_ANY(PTRUE_ALL, CMP8(PG, A, B)). PTEST is redundant. |
| ; |
| define i1 @cmp8_ptest_any_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { |
| ; CHECK-LABEL: cmp8_ptest_any_ax: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b |
| ; CHECK-NEXT: cset w0, ne |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) |
| %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) |
| %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %2, <vscale x 16 x i1> %1) |
| ret i1 %3 |
| } |
| |
| ; |
| ; PTEST_FIRST(PTRUE_ALL, CMP32(PG, A, B)). Can't remove PTEST since mask is |
| ; different. |
| ; |
| define i1 @cmp32_ptest_first_ax(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: cmp32_ptest_first_ax: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: ptest p1, p0.b |
| ; CHECK-NEXT: cset w0, mi |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) |
| %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) |
| %3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) |
| %4 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv4i1(<vscale x 4 x i1> %3, <vscale x 4 x i1> %2) |
| ret i1 %4 |
| } |
| |
| ; |
| ; PTEST_LAST(PTRUE_ALL, CMP32(PG, A, B)). Can't remove PTEST since mask is |
| ; different. |
| ; |
| define i1 @cmp32_ptest_last_ax(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: cmp32_ptest_last_ax: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: ptest p1, p0.b |
| ; CHECK-NEXT: cset w0, lo |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) |
| %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) |
| %3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) |
| %4 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv4i1(<vscale x 4 x i1> %3, <vscale x 4 x i1> %2) |
| ret i1 %4 |
| } |
| |
| ; |
| ; PTEST_ANY(PTRUE_ALL, CMP32(PG, A, B)). PTEST is redundant. |
| ; |
| define i1 @cmp32_ptest_any_ax(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: cmp32_ptest_any_ax: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: cset w0, ne |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) |
| %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) |
| %3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) |
| %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1(<vscale x 4 x i1> %3, <vscale x 4 x i1> %2) |
| ret i1 %4 |
| } |
| |
| ; ============================================================================== |
| ; PTEST_OP(PTRUE_ALL, CMP(PTRUE_ALL, ...)) |
| ; ============================================================================== |
| |
| ; |
| ; PTEST_FIRST(PTRUE_ALL, CMP8(PTRUE_ALL, A, B)). PTEST is redundant. |
| ; |
| define i1 @cmp8_ptest_first_aa(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { |
| ; CHECK-LABEL: cmp8_ptest_first_aa: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ptrue p0.b |
| ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b |
| ; CHECK-NEXT: cset w0, mi |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) |
| %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) |
| %3 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %2) |
| ret i1 %3 |
| } |
| |
| ; |
| ; PTEST_LAST(PTRUE_ALL, CMP8(PTRUE_ALL, A, B)). PTEST is redundant. |
| ; |
| define i1 @cmp8_ptest_last_aa(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { |
| ; CHECK-LABEL: cmp8_ptest_last_aa: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ptrue p0.b |
| ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b |
| ; CHECK-NEXT: cset w0, lo |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) |
| %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) |
| %3 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %2) |
| ret i1 %3 |
| } |
| |
| ; |
| ; PTEST_ANY(PTRUE_ALL, CMP8(PTRUE_ALL, A, B)). PTEST is redundant. |
| ; |
| define i1 @cmp8_ptest_any_aa(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { |
| ; CHECK-LABEL: cmp8_ptest_any_aa: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ptrue p0.b |
| ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b |
| ; CHECK-NEXT: cset w0, ne |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) |
| %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) |
| %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %2) |
| ret i1 %3 |
| } |
| |
| ; |
| ; PTEST_FIRST(PTRUE_ALL, CMP32(PTRUE_ALL, A, B)). PTEST is redundant. |
| ; |
| define i1 @cmp32_ptest_first_aa(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: cmp32_ptest_first_aa: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: cset w0, mi |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) |
| %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) |
| %3 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %2) |
| ret i1 %3 |
| } |
| |
| ; |
| ; PTEST_LAST(PTRUE_ALL, CMP32(PTRUE_ALL, A, B)). PTEST is redundant. |
| ; |
| define i1 @cmp32_ptest_last_aa(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: cmp32_ptest_last_aa: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: cset w0, lo |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) |
| %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) |
| %3 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %2) |
| ret i1 %3 |
| } |
| |
| ; |
| ; PTEST_ANY(PTRUE_ALL, CMP32(PTRUE_ALL, A, B)). PTEST is redundant. |
| ; |
| define i1 @cmp32_ptest_any_aa(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: cmp32_ptest_any_aa: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: cset w0, ne |
| ; CHECK-NEXT: ret |
| %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) |
| %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) |
| %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %2) |
| ret i1 %3 |
| } |
| |
| declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) |
| declare <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) |
| declare <vscale x 16 x i1> @llvm.aarch64.sve.cmple.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>) |
| declare <vscale x 8 x i1> @llvm.aarch64.sve.cmple.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>) |
| declare <vscale x 4 x i1> @llvm.aarch64.sve.cmple.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>) |
| |
| declare i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>) |
| declare i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>) |
| declare i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>) |
| |
| declare i1 @llvm.aarch64.sve.ptest.any.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>) |
| declare i1 @llvm.aarch64.sve.ptest.first.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>) |
| declare i1 @llvm.aarch64.sve.ptest.last.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>) |
| |
| declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32) |
| declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32) |
| |
| declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>) |
| declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>) |
| |
| declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>) |
| declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>) |