blob: 2cf8621ca066dde50c6300e896025f08d8733b89 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -mtriple=aarch64 < %s -o - | FileCheck %s
define <vscale x 16 x i1> @match_nxv16i8_v1i8(<vscale x 16 x i8> %op1, <1 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
; CHECK-LABEL: match_nxv16i8_v1i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: mov z1.b, b1
; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: ret
%r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <1 x i8> %op2, <vscale x 16 x i1> %mask)
ret <vscale x 16 x i1> %r
}
define <vscale x 16 x i1> @match_nxv16i8_v2i8(<vscale x 16 x i8> %op1, <2 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
; CHECK-LABEL: match_nxv16i8_v2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: mov w8, v1.s[1]
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: mov z1.b, w8
; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z1.b
; CHECK-NEXT: cmpeq p1.b, p1/z, z0.b, z2.b
; CHECK-NEXT: sel p1.b, p1, p1.b, p2.b
; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
; CHECK-NEXT: ret
%r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <2 x i8> %op2, <vscale x 16 x i1> %mask)
ret <vscale x 16 x i1> %r
}
define <vscale x 16 x i1> @match_nxv16i8_v4i8(<vscale x 16 x i8> %op1, <4 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
; CHECK-LABEL: match_nxv16i8_v4i8:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: umov w8, v1.h[1]
; CHECK-NEXT: umov w9, v1.h[0]
; CHECK-NEXT: umov w10, v1.h[2]
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: umov w8, v1.h[3]
; CHECK-NEXT: mov z1.b, w10
; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z2.b
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z1.b
; CHECK-NEXT: cmpeq p1.b, p1/z, z0.b, z2.b
; CHECK-NEXT: mov p2.b, p3/m, p3.b
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: mov p1.b, p2/m, p2.b
; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <4 x i8> %op2, <vscale x 16 x i1> %mask)
ret <vscale x 16 x i1> %r
}
define <vscale x 16 x i1> @match_nxv16i8_v8i8(<vscale x 16 x i8> %op1, <8 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
; CHECK-LABEL: match_nxv16i8_v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: mov z1.d, d1
; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: ret
%r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <8 x i8> %op2, <vscale x 16 x i1> %mask)
ret <vscale x 16 x i1> %r
}
define <vscale x 16 x i1> @match_nxv16i8_v16i8(<vscale x 16 x i8> %op1, <16 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
; CHECK-LABEL: match_nxv16i8_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: mov z1.q, q1
; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: ret
%r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <16 x i8> %op2, <vscale x 16 x i1> %mask)
ret <vscale x 16 x i1> %r
}
define <16 x i1> @match_v16i8_v1i8(<16 x i8> %op1, <1 x i8> %op2, <16 x i1> %mask) #0 {
; CHECK-LABEL: match_v16i8_v1i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: dup v1.16b, v1.b[0]
; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ret
%r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <1 x i8> %op2, <16 x i1> %mask)
ret <16 x i1> %r
}
define <16 x i1> @match_v16i8_v2i8(<16 x i8> %op1, <2 x i8> %op2, <16 x i1> %mask) #0 {
; CHECK-LABEL: match_v16i8_v2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: dup v3.16b, v1.b[4]
; CHECK-NEXT: dup v1.16b, v1.b[0]
; CHECK-NEXT: cmeq v3.16b, v0.16b, v3.16b
; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ret
%r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <2 x i8> %op2, <16 x i1> %mask)
ret <16 x i1> %r
}
define <16 x i1> @match_v16i8_v4i8(<16 x i8> %op1, <4 x i8> %op2, <16 x i1> %mask) #0 {
; CHECK-LABEL: match_v16i8_v4i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: dup v3.16b, v1.b[2]
; CHECK-NEXT: dup v4.16b, v1.b[0]
; CHECK-NEXT: dup v5.16b, v1.b[4]
; CHECK-NEXT: dup v1.16b, v1.b[6]
; CHECK-NEXT: cmeq v3.16b, v0.16b, v3.16b
; CHECK-NEXT: cmeq v4.16b, v0.16b, v4.16b
; CHECK-NEXT: cmeq v5.16b, v0.16b, v5.16b
; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b
; CHECK-NEXT: orr v1.16b, v4.16b, v3.16b
; CHECK-NEXT: orr v0.16b, v5.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ret
%r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <4 x i8> %op2, <16 x i1> %mask)
ret <16 x i1> %r
}
define <16 x i1> @match_v16i8_v8i8(<16 x i8> %op1, <8 x i8> %op2, <16 x i1> %mask) #0 {
; CHECK-LABEL: match_v16i8_v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: shl v2.16b, v2.16b, #7
; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: mov z1.d, d1
; CHECK-NEXT: cmlt v2.16b, v2.16b, #0
; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0
; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <8 x i8> %op2, <16 x i1> %mask)
ret <16 x i1> %r
}
define <16 x i1> @match_v16i8_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask) #0 {
; CHECK-LABEL: match_v16i8_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: shl v2.16b, v2.16b, #7
; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: cmlt v2.16b, v2.16b, #0
; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0
; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask)
ret <16 x i1> %r
}
define <8 x i1> @match_v8i8_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) #0 {
; CHECK-LABEL: match_v8i8_v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: shl v2.8b, v2.8b, #7
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: mov z1.d, d1
; CHECK-NEXT: cmlt v2.8b, v2.8b, #0
; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0
; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%r = tail call <8 x i1> @llvm.experimental.vector.match(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask)
ret <8 x i1> %r
}
define <vscale x 8 x i1> @match_nxv8i16_v8i16(<vscale x 8 x i16> %op1, <8 x i16> %op2, <vscale x 8 x i1> %mask) #0 {
; CHECK-LABEL: match_nxv8i16_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: mov z1.q, q1
; CHECK-NEXT: match p0.h, p0/z, z0.h, z1.h
; CHECK-NEXT: ret
%r = tail call <vscale x 8 x i1> @llvm.experimental.vector.match(<vscale x 8 x i16> %op1, <8 x i16> %op2, <vscale x 8 x i1> %mask)
ret <vscale x 8 x i1> %r
}
define <8 x i1> @match_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) #0 {
; CHECK-LABEL: match_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ushll v2.8h, v2.8b, #0
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: shl v2.8h, v2.8h, #15
; CHECK-NEXT: cmlt v2.8h, v2.8h, #0
; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0
; CHECK-NEXT: match p0.h, p0/z, z0.h, z1.h
; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
%r = tail call <8 x i1> @llvm.experimental.vector.match(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask)
ret <8 x i1> %r
}
; Cases where op2 has more elements than op1.
define <8 x i1> @match_v8i8_v16i8(<8 x i8> %op1, <16 x i8> %op2, <8 x i1> %mask) #0 {
; CHECK-LABEL: match_v8i8_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: shl v2.8b, v2.8b, #7
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: cmlt v2.8b, v2.8b, #0
; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0
; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%r = tail call <8 x i1> @llvm.experimental.vector.match(<8 x i8> %op1, <16 x i8> %op2, <8 x i1> %mask)
ret <8 x i1> %r
}
define <vscale x 16 x i1> @match_nxv16i8_v32i8(<vscale x 16 x i8> %op1, <32 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
; CHECK-LABEL: match_nxv16i8_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: mov z3.b, z1.b[1]
; CHECK-NEXT: mov z4.b, b1
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: mov z5.b, z1.b[2]
; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z3.b
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z4.b
; CHECK-NEXT: mov z3.b, z1.b[3]
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z5.b
; CHECK-NEXT: mov z4.b, z1.b[4]
; CHECK-NEXT: mov p2.b, p3/m, p3.b
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
; CHECK-NEXT: mov z3.b, z1.b[5]
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b
; CHECK-NEXT: mov z4.b, z1.b[6]
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
; CHECK-NEXT: mov z3.b, z1.b[7]
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b
; CHECK-NEXT: mov z4.b, z1.b[8]
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
; CHECK-NEXT: mov z3.b, z1.b[9]
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b
; CHECK-NEXT: mov z4.b, z1.b[10]
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
; CHECK-NEXT: mov z3.b, z1.b[11]
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b
; CHECK-NEXT: mov z4.b, z1.b[12]
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
; CHECK-NEXT: mov z3.b, z1.b[13]
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b
; CHECK-NEXT: mov z4.b, z1.b[14]
; CHECK-NEXT: mov z1.b, z1.b[15]
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
; CHECK-NEXT: mov z3.b, b2
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
; CHECK-NEXT: mov z1.b, z2.b[1]
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
; CHECK-NEXT: mov z3.b, z2.b[2]
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
; CHECK-NEXT: mov z1.b, z2.b[3]
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
; CHECK-NEXT: mov z3.b, z2.b[4]
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
; CHECK-NEXT: mov z1.b, z2.b[5]
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
; CHECK-NEXT: mov z3.b, z2.b[6]
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
; CHECK-NEXT: mov z1.b, z2.b[7]
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
; CHECK-NEXT: mov z3.b, z2.b[8]
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
; CHECK-NEXT: mov z1.b, z2.b[9]
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
; CHECK-NEXT: mov z3.b, z2.b[10]
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
; CHECK-NEXT: mov z1.b, z2.b[11]
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
; CHECK-NEXT: mov z3.b, z2.b[12]
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
; CHECK-NEXT: mov z1.b, z2.b[13]
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
; CHECK-NEXT: mov z3.b, z2.b[14]
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
; CHECK-NEXT: mov z1.b, z2.b[15]
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
; CHECK-NEXT: cmpeq p1.b, p1/z, z0.b, z1.b
; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: mov p1.b, p2/m, p2.b
; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <32 x i8> %op2, <vscale x 16 x i1> %mask)
ret <vscale x 16 x i1> %r
}
define <16 x i1> @match_v16i8_v32i8(<16 x i8> %op1, <32 x i8> %op2, <16 x i1> %mask) #0 {
; CHECK-LABEL: match_v16i8_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: dup v4.16b, v1.b[1]
; CHECK-NEXT: dup v5.16b, v1.b[0]
; CHECK-NEXT: dup v6.16b, v1.b[2]
; CHECK-NEXT: dup v7.16b, v1.b[3]
; CHECK-NEXT: dup v16.16b, v1.b[4]
; CHECK-NEXT: dup v17.16b, v1.b[5]
; CHECK-NEXT: dup v18.16b, v1.b[6]
; CHECK-NEXT: dup v19.16b, v1.b[7]
; CHECK-NEXT: dup v20.16b, v1.b[8]
; CHECK-NEXT: cmeq v4.16b, v0.16b, v4.16b
; CHECK-NEXT: cmeq v5.16b, v0.16b, v5.16b
; CHECK-NEXT: cmeq v6.16b, v0.16b, v6.16b
; CHECK-NEXT: cmeq v7.16b, v0.16b, v7.16b
; CHECK-NEXT: cmeq v16.16b, v0.16b, v16.16b
; CHECK-NEXT: cmeq v17.16b, v0.16b, v17.16b
; CHECK-NEXT: dup v21.16b, v2.b[7]
; CHECK-NEXT: dup v22.16b, v1.b[10]
; CHECK-NEXT: orr v4.16b, v5.16b, v4.16b
; CHECK-NEXT: orr v5.16b, v6.16b, v7.16b
; CHECK-NEXT: orr v6.16b, v16.16b, v17.16b
; CHECK-NEXT: cmeq v7.16b, v0.16b, v18.16b
; CHECK-NEXT: cmeq v16.16b, v0.16b, v19.16b
; CHECK-NEXT: cmeq v17.16b, v0.16b, v20.16b
; CHECK-NEXT: dup v18.16b, v1.b[9]
; CHECK-NEXT: dup v19.16b, v1.b[11]
; CHECK-NEXT: dup v20.16b, v1.b[12]
; CHECK-NEXT: cmeq v22.16b, v0.16b, v22.16b
; CHECK-NEXT: orr v4.16b, v4.16b, v5.16b
; CHECK-NEXT: orr v5.16b, v6.16b, v7.16b
; CHECK-NEXT: orr v6.16b, v16.16b, v17.16b
; CHECK-NEXT: cmeq v7.16b, v0.16b, v18.16b
; CHECK-NEXT: dup v18.16b, v1.b[13]
; CHECK-NEXT: cmeq v16.16b, v0.16b, v19.16b
; CHECK-NEXT: cmeq v17.16b, v0.16b, v20.16b
; CHECK-NEXT: dup v19.16b, v2.b[0]
; CHECK-NEXT: dup v20.16b, v2.b[1]
; CHECK-NEXT: orr v4.16b, v4.16b, v5.16b
; CHECK-NEXT: dup v5.16b, v2.b[6]
; CHECK-NEXT: orr v6.16b, v6.16b, v7.16b
; CHECK-NEXT: orr v7.16b, v16.16b, v17.16b
; CHECK-NEXT: cmeq v16.16b, v0.16b, v18.16b
; CHECK-NEXT: cmeq v17.16b, v0.16b, v19.16b
; CHECK-NEXT: cmeq v18.16b, v0.16b, v20.16b
; CHECK-NEXT: dup v19.16b, v2.b[2]
; CHECK-NEXT: cmeq v5.16b, v0.16b, v5.16b
; CHECK-NEXT: cmeq v20.16b, v0.16b, v21.16b
; CHECK-NEXT: dup v21.16b, v2.b[8]
; CHECK-NEXT: orr v6.16b, v6.16b, v22.16b
; CHECK-NEXT: orr v7.16b, v7.16b, v16.16b
; CHECK-NEXT: dup v16.16b, v1.b[14]
; CHECK-NEXT: dup v1.16b, v1.b[15]
; CHECK-NEXT: orr v17.16b, v17.16b, v18.16b
; CHECK-NEXT: cmeq v18.16b, v0.16b, v19.16b
; CHECK-NEXT: dup v19.16b, v2.b[3]
; CHECK-NEXT: orr v5.16b, v5.16b, v20.16b
; CHECK-NEXT: cmeq v20.16b, v0.16b, v21.16b
; CHECK-NEXT: dup v21.16b, v2.b[9]
; CHECK-NEXT: cmeq v16.16b, v0.16b, v16.16b
; CHECK-NEXT: cmeq v1.16b, v0.16b, v1.16b
; CHECK-NEXT: orr v4.16b, v4.16b, v6.16b
; CHECK-NEXT: orr v17.16b, v17.16b, v18.16b
; CHECK-NEXT: cmeq v18.16b, v0.16b, v19.16b
; CHECK-NEXT: dup v19.16b, v2.b[4]
; CHECK-NEXT: orr v5.16b, v5.16b, v20.16b
; CHECK-NEXT: cmeq v20.16b, v0.16b, v21.16b
; CHECK-NEXT: dup v21.16b, v2.b[10]
; CHECK-NEXT: orr v7.16b, v7.16b, v16.16b
; CHECK-NEXT: orr v16.16b, v17.16b, v18.16b
; CHECK-NEXT: cmeq v17.16b, v0.16b, v19.16b
; CHECK-NEXT: dup v18.16b, v2.b[5]
; CHECK-NEXT: orr v5.16b, v5.16b, v20.16b
; CHECK-NEXT: cmeq v19.16b, v0.16b, v21.16b
; CHECK-NEXT: dup v20.16b, v2.b[11]
; CHECK-NEXT: orr v1.16b, v7.16b, v1.16b
; CHECK-NEXT: orr v6.16b, v16.16b, v17.16b
; CHECK-NEXT: cmeq v7.16b, v0.16b, v18.16b
; CHECK-NEXT: dup v17.16b, v2.b[12]
; CHECK-NEXT: orr v5.16b, v5.16b, v19.16b
; CHECK-NEXT: cmeq v16.16b, v0.16b, v20.16b
; CHECK-NEXT: dup v18.16b, v2.b[13]
; CHECK-NEXT: dup v19.16b, v2.b[14]
; CHECK-NEXT: orr v1.16b, v4.16b, v1.16b
; CHECK-NEXT: dup v2.16b, v2.b[15]
; CHECK-NEXT: orr v4.16b, v6.16b, v7.16b
; CHECK-NEXT: cmeq v6.16b, v0.16b, v17.16b
; CHECK-NEXT: orr v5.16b, v5.16b, v16.16b
; CHECK-NEXT: cmeq v7.16b, v0.16b, v18.16b
; CHECK-NEXT: cmeq v16.16b, v0.16b, v19.16b
; CHECK-NEXT: cmeq v0.16b, v0.16b, v2.16b
; CHECK-NEXT: orr v1.16b, v1.16b, v4.16b
; CHECK-NEXT: orr v4.16b, v5.16b, v6.16b
; CHECK-NEXT: orr v5.16b, v7.16b, v16.16b
; CHECK-NEXT: orr v1.16b, v1.16b, v4.16b
; CHECK-NEXT: orr v0.16b, v5.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <32 x i8> %op2, <16 x i1> %mask)
ret <16 x i1> %r
}
; Data types not supported by MATCH.
; Note: The cases for SVE could be made tighter.
define <vscale x 4 x i1> @match_nxv4xi32_v4i32(<vscale x 4 x i32> %op1, <4 x i32> %op2, <vscale x 4 x i1> %mask) #0 {
; CHECK-LABEL: match_nxv4xi32_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: mov z2.s, z1.s[1]
; CHECK-NEXT: mov z3.s, s1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z4.s, z1.s[2]
; CHECK-NEXT: mov z1.s, z1.s[3]
; CHECK-NEXT: cmpeq p2.s, p1/z, z0.s, z2.s
; CHECK-NEXT: cmpeq p3.s, p1/z, z0.s, z3.s
; CHECK-NEXT: cmpeq p4.s, p1/z, z0.s, z4.s
; CHECK-NEXT: cmpeq p1.s, p1/z, z0.s, z1.s
; CHECK-NEXT: mov p2.b, p3/m, p3.b
; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: mov p1.b, p2/m, p2.b
; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%r = tail call <vscale x 4 x i1> @llvm.experimental.vector.match(<vscale x 4 x i32> %op1, <4 x i32> %op2, <vscale x 4 x i1> %mask)
ret <vscale x 4 x i1> %r
}
define <vscale x 2 x i1> @match_nxv2xi64_v2i64(<vscale x 2 x i64> %op1, <2 x i64> %op2, <vscale x 2 x i1> %mask) #0 {
; CHECK-LABEL: match_nxv2xi64_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: mov z2.d, z1.d[1]
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: mov z1.d, d1
; CHECK-NEXT: cmpeq p2.d, p1/z, z0.d, z2.d
; CHECK-NEXT: cmpeq p1.d, p1/z, z0.d, z1.d
; CHECK-NEXT: sel p1.b, p1, p1.b, p2.b
; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
; CHECK-NEXT: ret
%r = tail call <vscale x 2 x i1> @llvm.experimental.vector.match(<vscale x 2 x i64> %op1, <2 x i64> %op2, <vscale x 2 x i1> %mask)
ret <vscale x 2 x i1> %r
}
define <4 x i1> @match_v4xi32_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) #0 {
; CHECK-LABEL: match_v4xi32_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: dup v3.4s, v1.s[1]
; CHECK-NEXT: dup v4.4s, v1.s[0]
; CHECK-NEXT: dup v5.4s, v1.s[2]
; CHECK-NEXT: dup v1.4s, v1.s[3]
; CHECK-NEXT: cmeq v3.4s, v0.4s, v3.4s
; CHECK-NEXT: cmeq v4.4s, v0.4s, v4.4s
; CHECK-NEXT: cmeq v5.4s, v0.4s, v5.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s
; CHECK-NEXT: orr v1.16b, v4.16b, v3.16b
; CHECK-NEXT: orr v0.16b, v5.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
; CHECK-NEXT: ret
%r = tail call <4 x i1> @llvm.experimental.vector.match(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask)
ret <4 x i1> %r
}
define <2 x i1> @match_v2xi64_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) #0 {
; CHECK-LABEL: match_v2xi64_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: dup v3.2d, v1.d[1]
; CHECK-NEXT: dup v1.2d, v1.d[0]
; CHECK-NEXT: cmeq v3.2d, v0.2d, v3.2d
; CHECK-NEXT: cmeq v0.2d, v0.2d, v1.2d
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: xtn v0.2s, v0.2d
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
; CHECK-NEXT: ret
%r = tail call <2 x i1> @llvm.experimental.vector.match(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask)
ret <2 x i1> %r
}
attributes #0 = { "target-features"="+sve2" }