| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 |
| ; RUN: llc -mtriple=aarch64 < %s -o - | FileCheck %s |
| |
| define <vscale x 16 x i1> @match_nxv16i8_v1i8(<vscale x 16 x i8> %op1, <1 x i8> %op2, <vscale x 16 x i1> %mask) #0 { |
| ; CHECK-LABEL: match_nxv16i8_v1i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 |
| ; CHECK-NEXT: mov z1.b, b1 |
| ; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.b |
| ; CHECK-NEXT: ret |
| %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <1 x i8> %op2, <vscale x 16 x i1> %mask) |
| ret <vscale x 16 x i1> %r |
| } |
| |
| define <vscale x 16 x i1> @match_nxv16i8_v2i8(<vscale x 16 x i8> %op1, <2 x i8> %op2, <vscale x 16 x i1> %mask) #0 { |
| ; CHECK-LABEL: match_nxv16i8_v2i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: mov w8, v1.s[1] |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: ptrue p1.b |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: mov z1.b, w8 |
| ; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z1.b |
| ; CHECK-NEXT: cmpeq p1.b, p1/z, z0.b, z2.b |
| ; CHECK-NEXT: sel p1.b, p1, p1.b, p2.b |
| ; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b |
| ; CHECK-NEXT: ret |
| %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <2 x i8> %op2, <vscale x 16 x i1> %mask) |
| ret <vscale x 16 x i1> %r |
| } |
| |
| define <vscale x 16 x i1> @match_nxv16i8_v4i8(<vscale x 16 x i8> %op1, <4 x i8> %op2, <vscale x 16 x i1> %mask) #0 { |
| ; CHECK-LABEL: match_nxv16i8_v4i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: addvl sp, sp, #-1 |
| ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: umov w8, v1.h[1] |
| ; CHECK-NEXT: umov w9, v1.h[0] |
| ; CHECK-NEXT: umov w10, v1.h[2] |
| ; CHECK-NEXT: ptrue p1.b |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: umov w8, v1.h[3] |
| ; CHECK-NEXT: mov z1.b, w10 |
| ; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z2.b |
| ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z1.b |
| ; CHECK-NEXT: cmpeq p1.b, p1/z, z0.b, z2.b |
| ; CHECK-NEXT: mov p2.b, p3/m, p3.b |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b |
| ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: mov p1.b, p2/m, p2.b |
| ; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b |
| ; CHECK-NEXT: addvl sp, sp, #1 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: ret |
| %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <4 x i8> %op2, <vscale x 16 x i1> %mask) |
| ret <vscale x 16 x i1> %r |
| } |
| |
| define <vscale x 16 x i1> @match_nxv16i8_v8i8(<vscale x 16 x i8> %op1, <8 x i8> %op2, <vscale x 16 x i1> %mask) #0 { |
| ; CHECK-LABEL: match_nxv16i8_v8i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 |
| ; CHECK-NEXT: mov z1.d, d1 |
| ; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b |
| ; CHECK-NEXT: ret |
| %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <8 x i8> %op2, <vscale x 16 x i1> %mask) |
| ret <vscale x 16 x i1> %r |
| } |
| |
| define <vscale x 16 x i1> @match_nxv16i8_v16i8(<vscale x 16 x i8> %op1, <16 x i8> %op2, <vscale x 16 x i1> %mask) #0 { |
| ; CHECK-LABEL: match_nxv16i8_v16i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 |
| ; CHECK-NEXT: mov z1.q, q1 |
| ; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b |
| ; CHECK-NEXT: ret |
| %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <16 x i8> %op2, <vscale x 16 x i1> %mask) |
| ret <vscale x 16 x i1> %r |
| } |
| |
| define <16 x i1> @match_v16i8_v1i8(<16 x i8> %op1, <1 x i8> %op2, <16 x i1> %mask) #0 { |
| ; CHECK-LABEL: match_v16i8_v1i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: dup v1.16b, v1.b[0] |
| ; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b |
| ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b |
| ; CHECK-NEXT: ret |
| %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <1 x i8> %op2, <16 x i1> %mask) |
| ret <16 x i1> %r |
| } |
| |
| define <16 x i1> @match_v16i8_v2i8(<16 x i8> %op1, <2 x i8> %op2, <16 x i1> %mask) #0 { |
| ; CHECK-LABEL: match_v16i8_v2i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: dup v3.16b, v1.b[4] |
| ; CHECK-NEXT: dup v1.16b, v1.b[0] |
| ; CHECK-NEXT: cmeq v3.16b, v0.16b, v3.16b |
| ; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b |
| ; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b |
| ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b |
| ; CHECK-NEXT: ret |
| %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <2 x i8> %op2, <16 x i1> %mask) |
| ret <16 x i1> %r |
| } |
| |
| define <16 x i1> @match_v16i8_v4i8(<16 x i8> %op1, <4 x i8> %op2, <16 x i1> %mask) #0 { |
| ; CHECK-LABEL: match_v16i8_v4i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: dup v3.16b, v1.b[2] |
| ; CHECK-NEXT: dup v4.16b, v1.b[0] |
| ; CHECK-NEXT: dup v5.16b, v1.b[4] |
| ; CHECK-NEXT: dup v1.16b, v1.b[6] |
| ; CHECK-NEXT: cmeq v3.16b, v0.16b, v3.16b |
| ; CHECK-NEXT: cmeq v4.16b, v0.16b, v4.16b |
| ; CHECK-NEXT: cmeq v5.16b, v0.16b, v5.16b |
| ; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b |
| ; CHECK-NEXT: orr v1.16b, v4.16b, v3.16b |
| ; CHECK-NEXT: orr v0.16b, v5.16b, v0.16b |
| ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b |
| ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b |
| ; CHECK-NEXT: ret |
| %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <4 x i8> %op2, <16 x i1> %mask) |
| ret <16 x i1> %r |
| } |
| |
| define <16 x i1> @match_v16i8_v8i8(<16 x i8> %op1, <8 x i8> %op2, <16 x i1> %mask) #0 { |
| ; CHECK-LABEL: match_v16i8_v8i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: shl v2.16b, v2.16b, #7 |
| ; CHECK-NEXT: ptrue p0.b, vl16 |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 |
| ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 |
| ; CHECK-NEXT: mov z1.d, d1 |
| ; CHECK-NEXT: cmlt v2.16b, v2.16b, #0 |
| ; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0 |
| ; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b |
| ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 |
| ; CHECK-NEXT: ret |
| %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <8 x i8> %op2, <16 x i1> %mask) |
| ret <16 x i1> %r |
| } |
| |
| define <16 x i1> @match_v16i8_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask) #0 { |
| ; CHECK-LABEL: match_v16i8_v16i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: shl v2.16b, v2.16b, #7 |
| ; CHECK-NEXT: ptrue p0.b, vl16 |
| ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 |
| ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 |
| ; CHECK-NEXT: cmlt v2.16b, v2.16b, #0 |
| ; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0 |
| ; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b |
| ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 |
| ; CHECK-NEXT: ret |
| %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask) |
| ret <16 x i1> %r |
| } |
| |
| define <8 x i1> @match_v8i8_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) #0 { |
| ; CHECK-LABEL: match_v8i8_v8i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: shl v2.8b, v2.8b, #7 |
| ; CHECK-NEXT: ptrue p0.b, vl8 |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 |
| ; CHECK-NEXT: mov z1.d, d1 |
| ; CHECK-NEXT: cmlt v2.8b, v2.8b, #0 |
| ; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0 |
| ; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b |
| ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 |
| ; CHECK-NEXT: ret |
| %r = tail call <8 x i1> @llvm.experimental.vector.match(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) |
| ret <8 x i1> %r |
| } |
| |
| define <vscale x 8 x i1> @match_nxv8i16_v8i16(<vscale x 8 x i16> %op1, <8 x i16> %op2, <vscale x 8 x i1> %mask) #0 { |
| ; CHECK-LABEL: match_nxv8i16_v8i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 |
| ; CHECK-NEXT: mov z1.q, q1 |
| ; CHECK-NEXT: match p0.h, p0/z, z0.h, z1.h |
| ; CHECK-NEXT: ret |
| %r = tail call <vscale x 8 x i1> @llvm.experimental.vector.match(<vscale x 8 x i16> %op1, <8 x i16> %op2, <vscale x 8 x i1> %mask) |
| ret <vscale x 8 x i1> %r |
| } |
| |
| define <8 x i1> @match_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) #0 { |
| ; CHECK-LABEL: match_v8i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ushll v2.8h, v2.8b, #0 |
| ; CHECK-NEXT: ptrue p0.h, vl8 |
| ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 |
| ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 |
| ; CHECK-NEXT: shl v2.8h, v2.8h, #15 |
| ; CHECK-NEXT: cmlt v2.8h, v2.8h, #0 |
| ; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0 |
| ; CHECK-NEXT: match p0.h, p0/z, z0.h, z1.h |
| ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: xtn v0.8b, v0.8h |
| ; CHECK-NEXT: ret |
| %r = tail call <8 x i1> @llvm.experimental.vector.match(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) |
| ret <8 x i1> %r |
| } |
| |
| ; Cases where op2 has more elements than op1. |
| |
| define <8 x i1> @match_v8i8_v16i8(<8 x i8> %op1, <16 x i8> %op2, <8 x i1> %mask) #0 { |
| ; CHECK-LABEL: match_v8i8_v16i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: shl v2.8b, v2.8b, #7 |
| ; CHECK-NEXT: ptrue p0.b, vl8 |
| ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 |
| ; CHECK-NEXT: cmlt v2.8b, v2.8b, #0 |
| ; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0 |
| ; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b |
| ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 |
| ; CHECK-NEXT: ret |
| %r = tail call <8 x i1> @llvm.experimental.vector.match(<8 x i8> %op1, <16 x i8> %op2, <8 x i1> %mask) |
| ret <8 x i1> %r |
| } |
| |
| define <vscale x 16 x i1> @match_nxv16i8_v32i8(<vscale x 16 x i8> %op1, <32 x i8> %op2, <vscale x 16 x i1> %mask) #0 { |
| ; CHECK-LABEL: match_nxv16i8_v32i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: addvl sp, sp, #-1 |
| ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 |
| ; CHECK-NEXT: mov z3.b, z1.b[1] |
| ; CHECK-NEXT: mov z4.b, b1 |
| ; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 |
| ; CHECK-NEXT: ptrue p1.b |
| ; CHECK-NEXT: mov z5.b, z1.b[2] |
| ; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z3.b |
| ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z4.b |
| ; CHECK-NEXT: mov z3.b, z1.b[3] |
| ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z5.b |
| ; CHECK-NEXT: mov z4.b, z1.b[4] |
| ; CHECK-NEXT: mov p2.b, p3/m, p3.b |
| ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b |
| ; CHECK-NEXT: mov z3.b, z1.b[5] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b |
| ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b |
| ; CHECK-NEXT: mov z4.b, z1.b[6] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b |
| ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b |
| ; CHECK-NEXT: mov z3.b, z1.b[7] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b |
| ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b |
| ; CHECK-NEXT: mov z4.b, z1.b[8] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b |
| ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b |
| ; CHECK-NEXT: mov z3.b, z1.b[9] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b |
| ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b |
| ; CHECK-NEXT: mov z4.b, z1.b[10] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b |
| ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b |
| ; CHECK-NEXT: mov z3.b, z1.b[11] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b |
| ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b |
| ; CHECK-NEXT: mov z4.b, z1.b[12] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b |
| ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b |
| ; CHECK-NEXT: mov z3.b, z1.b[13] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b |
| ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b |
| ; CHECK-NEXT: mov z4.b, z1.b[14] |
| ; CHECK-NEXT: mov z1.b, z1.b[15] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b |
| ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b |
| ; CHECK-NEXT: mov z3.b, b2 |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b |
| ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b |
| ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b |
| ; CHECK-NEXT: mov z1.b, z2.b[1] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b |
| ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b |
| ; CHECK-NEXT: mov z3.b, z2.b[2] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b |
| ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b |
| ; CHECK-NEXT: mov z1.b, z2.b[3] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b |
| ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b |
| ; CHECK-NEXT: mov z3.b, z2.b[4] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b |
| ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b |
| ; CHECK-NEXT: mov z1.b, z2.b[5] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b |
| ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b |
| ; CHECK-NEXT: mov z3.b, z2.b[6] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b |
| ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b |
| ; CHECK-NEXT: mov z1.b, z2.b[7] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b |
| ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b |
| ; CHECK-NEXT: mov z3.b, z2.b[8] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b |
| ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b |
| ; CHECK-NEXT: mov z1.b, z2.b[9] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b |
| ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b |
| ; CHECK-NEXT: mov z3.b, z2.b[10] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b |
| ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b |
| ; CHECK-NEXT: mov z1.b, z2.b[11] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b |
| ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b |
| ; CHECK-NEXT: mov z3.b, z2.b[12] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b |
| ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b |
| ; CHECK-NEXT: mov z1.b, z2.b[13] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b |
| ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b |
| ; CHECK-NEXT: mov z3.b, z2.b[14] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b |
| ; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b |
| ; CHECK-NEXT: mov z1.b, z2.b[15] |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b |
| ; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b |
| ; CHECK-NEXT: cmpeq p1.b, p1/z, z0.b, z1.b |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b |
| ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: mov p1.b, p2/m, p2.b |
| ; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b |
| ; CHECK-NEXT: addvl sp, sp, #1 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: ret |
| %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <32 x i8> %op2, <vscale x 16 x i1> %mask) |
| ret <vscale x 16 x i1> %r |
| } |
| |
| define <16 x i1> @match_v16i8_v32i8(<16 x i8> %op1, <32 x i8> %op2, <16 x i1> %mask) #0 { |
| ; CHECK-LABEL: match_v16i8_v32i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: dup v4.16b, v1.b[1] |
| ; CHECK-NEXT: dup v5.16b, v1.b[0] |
| ; CHECK-NEXT: dup v6.16b, v1.b[2] |
| ; CHECK-NEXT: dup v7.16b, v1.b[3] |
| ; CHECK-NEXT: dup v16.16b, v1.b[4] |
| ; CHECK-NEXT: dup v17.16b, v1.b[5] |
| ; CHECK-NEXT: dup v18.16b, v1.b[6] |
| ; CHECK-NEXT: dup v19.16b, v1.b[7] |
| ; CHECK-NEXT: dup v20.16b, v1.b[8] |
| ; CHECK-NEXT: cmeq v4.16b, v0.16b, v4.16b |
| ; CHECK-NEXT: cmeq v5.16b, v0.16b, v5.16b |
| ; CHECK-NEXT: cmeq v6.16b, v0.16b, v6.16b |
| ; CHECK-NEXT: cmeq v7.16b, v0.16b, v7.16b |
| ; CHECK-NEXT: cmeq v16.16b, v0.16b, v16.16b |
| ; CHECK-NEXT: cmeq v17.16b, v0.16b, v17.16b |
| ; CHECK-NEXT: dup v21.16b, v2.b[7] |
| ; CHECK-NEXT: dup v22.16b, v1.b[10] |
| ; CHECK-NEXT: orr v4.16b, v5.16b, v4.16b |
| ; CHECK-NEXT: orr v5.16b, v6.16b, v7.16b |
| ; CHECK-NEXT: orr v6.16b, v16.16b, v17.16b |
| ; CHECK-NEXT: cmeq v7.16b, v0.16b, v18.16b |
| ; CHECK-NEXT: cmeq v16.16b, v0.16b, v19.16b |
| ; CHECK-NEXT: cmeq v17.16b, v0.16b, v20.16b |
| ; CHECK-NEXT: dup v18.16b, v1.b[9] |
| ; CHECK-NEXT: dup v19.16b, v1.b[11] |
| ; CHECK-NEXT: dup v20.16b, v1.b[12] |
| ; CHECK-NEXT: cmeq v22.16b, v0.16b, v22.16b |
| ; CHECK-NEXT: orr v4.16b, v4.16b, v5.16b |
| ; CHECK-NEXT: orr v5.16b, v6.16b, v7.16b |
| ; CHECK-NEXT: orr v6.16b, v16.16b, v17.16b |
| ; CHECK-NEXT: cmeq v7.16b, v0.16b, v18.16b |
| ; CHECK-NEXT: dup v18.16b, v1.b[13] |
| ; CHECK-NEXT: cmeq v16.16b, v0.16b, v19.16b |
| ; CHECK-NEXT: cmeq v17.16b, v0.16b, v20.16b |
| ; CHECK-NEXT: dup v19.16b, v2.b[0] |
| ; CHECK-NEXT: dup v20.16b, v2.b[1] |
| ; CHECK-NEXT: orr v4.16b, v4.16b, v5.16b |
| ; CHECK-NEXT: dup v5.16b, v2.b[6] |
| ; CHECK-NEXT: orr v6.16b, v6.16b, v7.16b |
| ; CHECK-NEXT: orr v7.16b, v16.16b, v17.16b |
| ; CHECK-NEXT: cmeq v16.16b, v0.16b, v18.16b |
| ; CHECK-NEXT: cmeq v17.16b, v0.16b, v19.16b |
| ; CHECK-NEXT: cmeq v18.16b, v0.16b, v20.16b |
| ; CHECK-NEXT: dup v19.16b, v2.b[2] |
| ; CHECK-NEXT: cmeq v5.16b, v0.16b, v5.16b |
| ; CHECK-NEXT: cmeq v20.16b, v0.16b, v21.16b |
| ; CHECK-NEXT: dup v21.16b, v2.b[8] |
| ; CHECK-NEXT: orr v6.16b, v6.16b, v22.16b |
| ; CHECK-NEXT: orr v7.16b, v7.16b, v16.16b |
| ; CHECK-NEXT: dup v16.16b, v1.b[14] |
| ; CHECK-NEXT: dup v1.16b, v1.b[15] |
| ; CHECK-NEXT: orr v17.16b, v17.16b, v18.16b |
| ; CHECK-NEXT: cmeq v18.16b, v0.16b, v19.16b |
| ; CHECK-NEXT: dup v19.16b, v2.b[3] |
| ; CHECK-NEXT: orr v5.16b, v5.16b, v20.16b |
| ; CHECK-NEXT: cmeq v20.16b, v0.16b, v21.16b |
| ; CHECK-NEXT: dup v21.16b, v2.b[9] |
| ; CHECK-NEXT: cmeq v16.16b, v0.16b, v16.16b |
| ; CHECK-NEXT: cmeq v1.16b, v0.16b, v1.16b |
| ; CHECK-NEXT: orr v4.16b, v4.16b, v6.16b |
| ; CHECK-NEXT: orr v17.16b, v17.16b, v18.16b |
| ; CHECK-NEXT: cmeq v18.16b, v0.16b, v19.16b |
| ; CHECK-NEXT: dup v19.16b, v2.b[4] |
| ; CHECK-NEXT: orr v5.16b, v5.16b, v20.16b |
| ; CHECK-NEXT: cmeq v20.16b, v0.16b, v21.16b |
| ; CHECK-NEXT: dup v21.16b, v2.b[10] |
| ; CHECK-NEXT: orr v7.16b, v7.16b, v16.16b |
| ; CHECK-NEXT: orr v16.16b, v17.16b, v18.16b |
| ; CHECK-NEXT: cmeq v17.16b, v0.16b, v19.16b |
| ; CHECK-NEXT: dup v18.16b, v2.b[5] |
| ; CHECK-NEXT: orr v5.16b, v5.16b, v20.16b |
| ; CHECK-NEXT: cmeq v19.16b, v0.16b, v21.16b |
| ; CHECK-NEXT: dup v20.16b, v2.b[11] |
| ; CHECK-NEXT: orr v1.16b, v7.16b, v1.16b |
| ; CHECK-NEXT: orr v6.16b, v16.16b, v17.16b |
| ; CHECK-NEXT: cmeq v7.16b, v0.16b, v18.16b |
| ; CHECK-NEXT: dup v17.16b, v2.b[12] |
| ; CHECK-NEXT: orr v5.16b, v5.16b, v19.16b |
| ; CHECK-NEXT: cmeq v16.16b, v0.16b, v20.16b |
| ; CHECK-NEXT: dup v18.16b, v2.b[13] |
| ; CHECK-NEXT: dup v19.16b, v2.b[14] |
| ; CHECK-NEXT: orr v1.16b, v4.16b, v1.16b |
| ; CHECK-NEXT: dup v2.16b, v2.b[15] |
| ; CHECK-NEXT: orr v4.16b, v6.16b, v7.16b |
| ; CHECK-NEXT: cmeq v6.16b, v0.16b, v17.16b |
| ; CHECK-NEXT: orr v5.16b, v5.16b, v16.16b |
| ; CHECK-NEXT: cmeq v7.16b, v0.16b, v18.16b |
| ; CHECK-NEXT: cmeq v16.16b, v0.16b, v19.16b |
| ; CHECK-NEXT: cmeq v0.16b, v0.16b, v2.16b |
| ; CHECK-NEXT: orr v1.16b, v1.16b, v4.16b |
| ; CHECK-NEXT: orr v4.16b, v5.16b, v6.16b |
| ; CHECK-NEXT: orr v5.16b, v7.16b, v16.16b |
| ; CHECK-NEXT: orr v1.16b, v1.16b, v4.16b |
| ; CHECK-NEXT: orr v0.16b, v5.16b, v0.16b |
| ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b |
| ; CHECK-NEXT: and v0.16b, v0.16b, v3.16b |
| ; CHECK-NEXT: ret |
| %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <32 x i8> %op2, <16 x i1> %mask) |
| ret <16 x i1> %r |
| } |
| |
| ; Data types not supported by MATCH. |
| ; Note: The cases for SVE could be made tighter. |
| |
| define <vscale x 4 x i1> @match_nxv4xi32_v4i32(<vscale x 4 x i32> %op1, <4 x i32> %op2, <vscale x 4 x i1> %mask) #0 { |
| ; CHECK-LABEL: match_nxv4xi32_v4i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: addvl sp, sp, #-1 |
| ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 |
| ; CHECK-NEXT: mov z2.s, z1.s[1] |
| ; CHECK-NEXT: mov z3.s, s1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z4.s, z1.s[2] |
| ; CHECK-NEXT: mov z1.s, z1.s[3] |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z0.s, z2.s |
| ; CHECK-NEXT: cmpeq p3.s, p1/z, z0.s, z3.s |
| ; CHECK-NEXT: cmpeq p4.s, p1/z, z0.s, z4.s |
| ; CHECK-NEXT: cmpeq p1.s, p1/z, z0.s, z1.s |
| ; CHECK-NEXT: mov p2.b, p3/m, p3.b |
| ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b |
| ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: mov p1.b, p2/m, p2.b |
| ; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b |
| ; CHECK-NEXT: addvl sp, sp, #1 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: ret |
| %r = tail call <vscale x 4 x i1> @llvm.experimental.vector.match(<vscale x 4 x i32> %op1, <4 x i32> %op2, <vscale x 4 x i1> %mask) |
| ret <vscale x 4 x i1> %r |
| } |
| |
| define <vscale x 2 x i1> @match_nxv2xi64_v2i64(<vscale x 2 x i64> %op1, <2 x i64> %op2, <vscale x 2 x i1> %mask) #0 { |
| ; CHECK-LABEL: match_nxv2xi64_v2i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 |
| ; CHECK-NEXT: mov z2.d, z1.d[1] |
| ; CHECK-NEXT: ptrue p1.d |
| ; CHECK-NEXT: mov z1.d, d1 |
| ; CHECK-NEXT: cmpeq p2.d, p1/z, z0.d, z2.d |
| ; CHECK-NEXT: cmpeq p1.d, p1/z, z0.d, z1.d |
| ; CHECK-NEXT: sel p1.b, p1, p1.b, p2.b |
| ; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b |
| ; CHECK-NEXT: ret |
| %r = tail call <vscale x 2 x i1> @llvm.experimental.vector.match(<vscale x 2 x i64> %op1, <2 x i64> %op2, <vscale x 2 x i1> %mask) |
| ret <vscale x 2 x i1> %r |
| } |
| |
| define <4 x i1> @match_v4xi32_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) #0 { |
| ; CHECK-LABEL: match_v4xi32_v4i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: dup v3.4s, v1.s[1] |
| ; CHECK-NEXT: dup v4.4s, v1.s[0] |
| ; CHECK-NEXT: dup v5.4s, v1.s[2] |
| ; CHECK-NEXT: dup v1.4s, v1.s[3] |
| ; CHECK-NEXT: cmeq v3.4s, v0.4s, v3.4s |
| ; CHECK-NEXT: cmeq v4.4s, v0.4s, v4.4s |
| ; CHECK-NEXT: cmeq v5.4s, v0.4s, v5.4s |
| ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s |
| ; CHECK-NEXT: orr v1.16b, v4.16b, v3.16b |
| ; CHECK-NEXT: orr v0.16b, v5.16b, v0.16b |
| ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b |
| ; CHECK-NEXT: xtn v0.4h, v0.4s |
| ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b |
| ; CHECK-NEXT: ret |
| %r = tail call <4 x i1> @llvm.experimental.vector.match(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) |
| ret <4 x i1> %r |
| } |
| |
| define <2 x i1> @match_v2xi64_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) #0 { |
| ; CHECK-LABEL: match_v2xi64_v2i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: dup v3.2d, v1.d[1] |
| ; CHECK-NEXT: dup v1.2d, v1.d[0] |
| ; CHECK-NEXT: cmeq v3.2d, v0.2d, v3.2d |
| ; CHECK-NEXT: cmeq v0.2d, v0.2d, v1.2d |
| ; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b |
| ; CHECK-NEXT: xtn v0.2s, v0.2d |
| ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b |
| ; CHECK-NEXT: ret |
| %r = tail call <2 x i1> @llvm.experimental.vector.match(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) |
| ret <2 x i1> %r |
| } |
| |
| attributes #0 = { "target-features"="+sve2" } |