| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SVE |
| ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STREAMING |
| |
| ; == Scalable == |
| |
| define <vscale x 16 x i1> @lane_mask_nxv16i1_i32(i32 %index, i32 %TC) { |
| ; CHECK-LABEL: lane_mask_nxv16i1_i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: whilelo p0.b, w0, w1 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 %index, i32 %TC) |
| ret <vscale x 16 x i1> %active.lane.mask |
| } |
| |
| define <vscale x 8 x i1> @lane_mask_nxv8i1_i32(i32 %index, i32 %TC) { |
| ; CHECK-LABEL: lane_mask_nxv8i1_i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: whilelo p0.h, w0, w1 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 %index, i32 %TC) |
| ret <vscale x 8 x i1> %active.lane.mask |
| } |
| |
| define <vscale x 4 x i1> @lane_mask_nxv4i1_i32(i32 %index, i32 %TC) { |
| ; CHECK-LABEL: lane_mask_nxv4i1_i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: whilelo p0.s, w0, w1 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 %index, i32 %TC) |
| ret <vscale x 4 x i1> %active.lane.mask |
| } |
| |
| define <vscale x 2 x i1> @lane_mask_nxv2i1_i32(i32 %index, i32 %TC) { |
| ; CHECK-LABEL: lane_mask_nxv2i1_i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: whilelo p0.d, w0, w1 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 %index, i32 %TC) |
| ret <vscale x 2 x i1> %active.lane.mask |
| } |
| |
| define <vscale x 16 x i1> @lane_mask_nxv16i1_i64(i64 %index, i64 %TC) { |
| ; CHECK-LABEL: lane_mask_nxv16i1_i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: whilelo p0.b, x0, x1 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 %index, i64 %TC) |
| ret <vscale x 16 x i1> %active.lane.mask |
| } |
| |
| define <vscale x 8 x i1> @lane_mask_nxv8i1_i64(i64 %index, i64 %TC) { |
| ; CHECK-LABEL: lane_mask_nxv8i1_i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: whilelo p0.h, x0, x1 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 %index, i64 %TC) |
| ret <vscale x 8 x i1> %active.lane.mask |
| } |
| |
| define <vscale x 4 x i1> @lane_mask_nxv4i1_i64(i64 %index, i64 %TC) { |
| ; CHECK-LABEL: lane_mask_nxv4i1_i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: whilelo p0.s, x0, x1 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %index, i64 %TC) |
| ret <vscale x 4 x i1> %active.lane.mask |
| } |
| |
| define <vscale x 2 x i1> @lane_mask_nxv2i1_i64(i64 %index, i64 %TC) { |
| ; CHECK-LABEL: lane_mask_nxv2i1_i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: whilelo p0.d, x0, x1 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 %index, i64 %TC) |
| ret <vscale x 2 x i1> %active.lane.mask |
| } |
| |
| define <vscale x 16 x i1> @lane_mask_nxv16i1_i8(i8 %index, i8 %TC) { |
| ; CHECK-LABEL: lane_mask_nxv16i1_i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: and w8, w1, #0xff |
| ; CHECK-NEXT: and w9, w0, #0xff |
| ; CHECK-NEXT: whilelo p0.b, w9, w8 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i8(i8 %index, i8 %TC) |
| ret <vscale x 16 x i1> %active.lane.mask |
| } |
| |
| define <vscale x 8 x i1> @lane_mask_nxv8i1_i8(i8 %index, i8 %TC) { |
| ; CHECK-LABEL: lane_mask_nxv8i1_i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: and w8, w1, #0xff |
| ; CHECK-NEXT: and w9, w0, #0xff |
| ; CHECK-NEXT: whilelo p0.h, w9, w8 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i8(i8 %index, i8 %TC) |
| ret <vscale x 8 x i1> %active.lane.mask |
| } |
| |
| define <vscale x 4 x i1> @lane_mask_nxv4i1_i8(i8 %index, i8 %TC) { |
| ; CHECK-LABEL: lane_mask_nxv4i1_i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: and w8, w1, #0xff |
| ; CHECK-NEXT: and w9, w0, #0xff |
| ; CHECK-NEXT: whilelo p0.s, w9, w8 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i8(i8 %index, i8 %TC) |
| ret <vscale x 4 x i1> %active.lane.mask |
| } |
| |
| define <vscale x 2 x i1> @lane_mask_nxv2i1_i8(i8 %index, i8 %TC) { |
| ; CHECK-LABEL: lane_mask_nxv2i1_i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: and w8, w1, #0xff |
| ; CHECK-NEXT: and w9, w0, #0xff |
| ; CHECK-NEXT: whilelo p0.d, w9, w8 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i8(i8 %index, i8 %TC) |
| ret <vscale x 2 x i1> %active.lane.mask |
| } |
| |
| |
| ; Illegal types |
| |
| define <vscale x 32 x i1> @lane_mask_nxv32i1_i32(i32 %index, i32 %TC) { |
| ; CHECK-LABEL: lane_mask_nxv32i1_i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: rdvl x8, #1 |
| ; CHECK-NEXT: adds w8, w0, w8 |
| ; CHECK-NEXT: csinv w8, w8, wzr, lo |
| ; CHECK-NEXT: whilelo p0.b, w0, w1 |
| ; CHECK-NEXT: whilelo p1.b, w8, w1 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i32(i32 %index, i32 %TC) |
| ret <vscale x 32 x i1> %active.lane.mask |
| } |
| |
| define <vscale x 32 x i1> @lane_mask_nxv32i1_i64(i64 %index, i64 %TC) { |
| ; CHECK-LABEL: lane_mask_nxv32i1_i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: rdvl x8, #1 |
| ; CHECK-NEXT: adds x8, x0, x8 |
| ; CHECK-NEXT: csinv x8, x8, xzr, lo |
| ; CHECK-NEXT: whilelo p0.b, x0, x1 |
| ; CHECK-NEXT: whilelo p1.b, x8, x1 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 %index, i64 %TC) |
| ret <vscale x 32 x i1> %active.lane.mask |
| } |
| |
| define <vscale x 32 x i1> @lane_mask_nxv32i1_i8(i8 %index, i8 %TC) { |
| ; CHECK-LABEL: lane_mask_nxv32i1_i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: rdvl x8, #1 |
| ; CHECK-NEXT: and w9, w0, #0xff |
| ; CHECK-NEXT: mov w10, #255 // =0xff |
| ; CHECK-NEXT: add w8, w9, w8, uxtb |
| ; CHECK-NEXT: and w11, w1, #0xff |
| ; CHECK-NEXT: cmp w8, #255 |
| ; CHECK-NEXT: csel w8, w8, w10, lo |
| ; CHECK-NEXT: whilelo p0.b, w9, w11 |
| ; CHECK-NEXT: whilelo p1.b, w8, w11 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i8(i8 %index, i8 %TC) |
| ret <vscale x 32 x i1> %active.lane.mask |
| } |
| |
| define <vscale x 7 x i1> @lane_mask_nxv7i1_i64(i64 %index, i64 %TC) { |
| ; CHECK-LABEL: lane_mask_nxv7i1_i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: whilelo p0.h, x0, x1 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <vscale x 7 x i1> @llvm.get.active.lane.mask.nxv7i1.i64(i64 %index, i64 %TC) |
| ret <vscale x 7 x i1> %active.lane.mask |
| } |
| |
| define <vscale x 1 x i1> @lane_mask_nxv1i1_i32(i32 %index, i32 %TC) { |
| ; CHECK-LABEL: lane_mask_nxv1i1_i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: index z0.s, #0, #1 |
| ; CHECK-NEXT: mov z1.s, w0 |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: uqadd z0.s, z0.s, z1.s |
| ; CHECK-NEXT: mov z1.s, w1 |
| ; CHECK-NEXT: cmphi p0.s, p0/z, z1.s, z0.s |
| ; CHECK-NEXT: punpklo p0.h, p0.b |
| ; CHECK-NEXT: punpklo p0.h, p0.b |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i32(i32 %index, i32 %TC) |
| ret <vscale x 1 x i1> %active.lane.mask |
| } |
| |
| ; UTC_ARGS: --disable |
| ; This test exists to protect against a compiler crash caused by an attempt to |
| ; convert (via changeVectorElementType) an MVT into an EVT, which is impossible. |
| ; The test's output is large and not relevant so check lines have been disabled. |
| define <vscale x 64 x i1> @lane_mask_nxv64i1_i64(i64 %index, i64 %TC) { |
| ; CHECK-LABEL: lane_mask_nxv64i1_i64: |
| %active.lane.mask = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 %index, i64 %TC) |
| ret <vscale x 64 x i1> %active.lane.mask |
| } |
| ; UTC_ARGS: --enable |
| |
| ; == Fixed width == |
| |
| define <16 x i1> @lane_mask_v16i1_i32(i32 %index, i32 %TC) { |
| ; CHECK-LABEL: lane_mask_v16i1_i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: whilelo p0.b, w0, w1 |
| ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC) |
| ret <16 x i1> %active.lane.mask |
| } |
| |
| define <8 x i1> @lane_mask_v8i1_i32(i32 %index, i32 %TC) { |
| ; CHECK-LABEL: lane_mask_v8i1_i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: whilelo p0.b, w0, w1 |
| ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC) |
| ret <8 x i1> %active.lane.mask |
| } |
| |
| define <4 x i1> @lane_mask_v4i1_i32(i32 %index, i32 %TC) { |
| ; CHECK-LABEL: lane_mask_v4i1_i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: whilelo p0.h, w0, w1 |
| ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC) |
| ret <4 x i1> %active.lane.mask |
| } |
| |
| define <2 x i1> @lane_mask_v2i1_i32(i32 %index, i32 %TC) { |
| ; CHECK-LABEL: lane_mask_v2i1_i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: whilelo p0.s, w0, w1 |
| ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 %index, i32 %TC) |
| ret <2 x i1> %active.lane.mask |
| } |
| |
| define <16 x i1> @lane_mask_v16i1_i64(i64 %index, i64 %TC) { |
| ; CHECK-LABEL: lane_mask_v16i1_i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: whilelo p0.b, x0, x1 |
| ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 %index, i64 %TC) |
| ret <16 x i1> %active.lane.mask |
| } |
| |
| define <8 x i1> @lane_mask_v8i1_i64(i64 %index, i64 %TC) { |
| ; CHECK-LABEL: lane_mask_v8i1_i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: whilelo p0.b, x0, x1 |
| ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 %index, i64 %TC) |
| ret <8 x i1> %active.lane.mask |
| } |
| |
| define <4 x i1> @lane_mask_v4i1_i64(i64 %index, i64 %TC) { |
| ; CHECK-LABEL: lane_mask_v4i1_i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: whilelo p0.h, x0, x1 |
| ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 %index, i64 %TC) |
| ret <4 x i1> %active.lane.mask |
| } |
| |
| define <2 x i1> @lane_mask_v2i1_i64(i64 %index, i64 %TC) { |
| ; CHECK-LABEL: lane_mask_v2i1_i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: whilelo p0.s, x0, x1 |
| ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 |
| ; CHECK-NEXT: ret |
| %active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 %index, i64 %TC) |
| ret <2 x i1> %active.lane.mask |
| } |
| |
| define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) { |
| ; CHECK-SVE-LABEL: lane_mask_v16i1_i8: |
| ; CHECK-SVE: // %bb.0: |
| ; CHECK-SVE-NEXT: index z0.b, #0, #1 |
| ; CHECK-SVE-NEXT: dup v1.16b, w0 |
| ; CHECK-SVE-NEXT: uqadd v0.16b, v1.16b, v0.16b |
| ; CHECK-SVE-NEXT: dup v1.16b, w1 |
| ; CHECK-SVE-NEXT: cmhi v0.16b, v1.16b, v0.16b |
| ; CHECK-SVE-NEXT: ret |
| ; |
| ; CHECK-STREAMING-LABEL: lane_mask_v16i1_i8: |
| ; CHECK-STREAMING: // %bb.0: |
| ; CHECK-STREAMING-NEXT: index z0.b, w0, #1 |
| ; CHECK-STREAMING-NEXT: mov z1.b, w0 |
| ; CHECK-STREAMING-NEXT: ptrue p0.b, vl16 |
| ; CHECK-STREAMING-NEXT: cmphi p1.b, p0/z, z1.b, z0.b |
| ; CHECK-STREAMING-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-STREAMING-NEXT: orr z0.d, z0.d, z1.d |
| ; CHECK-STREAMING-NEXT: mov z1.b, w1 |
| ; CHECK-STREAMING-NEXT: cmphi p0.b, p0/z, z1.b, z0.b |
| ; CHECK-STREAMING-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-STREAMING-NEXT: // kill: def $q0 killed $q0 killed $z0 |
| ; CHECK-STREAMING-NEXT: ret |
| %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i8(i8 %index, i8 %TC) |
| ret <16 x i1> %active.lane.mask |
| } |
| |
| define <8 x i1> @lane_mask_v8i1_i8(i8 %index, i8 %TC) { |
| ; CHECK-SVE-LABEL: lane_mask_v8i1_i8: |
| ; CHECK-SVE: // %bb.0: |
| ; CHECK-SVE-NEXT: index z0.b, #0, #1 |
| ; CHECK-SVE-NEXT: dup v1.8b, w0 |
| ; CHECK-SVE-NEXT: uqadd v0.8b, v1.8b, v0.8b |
| ; CHECK-SVE-NEXT: dup v1.8b, w1 |
| ; CHECK-SVE-NEXT: cmhi v0.8b, v1.8b, v0.8b |
| ; CHECK-SVE-NEXT: ret |
| ; |
| ; CHECK-STREAMING-LABEL: lane_mask_v8i1_i8: |
| ; CHECK-STREAMING: // %bb.0: |
| ; CHECK-STREAMING-NEXT: index z0.b, w0, #1 |
| ; CHECK-STREAMING-NEXT: mov z1.b, w0 |
| ; CHECK-STREAMING-NEXT: ptrue p0.b, vl8 |
| ; CHECK-STREAMING-NEXT: cmphi p1.b, p0/z, z1.b, z0.b |
| ; CHECK-STREAMING-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-STREAMING-NEXT: orr z0.d, z0.d, z1.d |
| ; CHECK-STREAMING-NEXT: mov z1.b, w1 |
| ; CHECK-STREAMING-NEXT: cmphi p0.b, p0/z, z1.b, z0.b |
| ; CHECK-STREAMING-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-STREAMING-NEXT: // kill: def $d0 killed $d0 killed $z0 |
| ; CHECK-STREAMING-NEXT: ret |
| %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i8(i8 %index, i8 %TC) |
| ret <8 x i1> %active.lane.mask |
| } |
| |
| define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) { |
| ; CHECK-SVE-LABEL: lane_mask_v4i1_i8: |
| ; CHECK-SVE: // %bb.0: |
| ; CHECK-SVE-NEXT: dup v0.4h, w0 |
| ; CHECK-SVE-NEXT: index z1.h, #0, #1 |
| ; CHECK-SVE-NEXT: movi d2, #0xff00ff00ff00ff |
| ; CHECK-SVE-NEXT: dup v3.4h, w1 |
| ; CHECK-SVE-NEXT: bic v0.4h, #255, lsl #8 |
| ; CHECK-SVE-NEXT: bic v3.4h, #255, lsl #8 |
| ; CHECK-SVE-NEXT: add v0.4h, v0.4h, v1.4h |
| ; CHECK-SVE-NEXT: umin v0.4h, v0.4h, v2.4h |
| ; CHECK-SVE-NEXT: cmhi v0.4h, v3.4h, v0.4h |
| ; CHECK-SVE-NEXT: ret |
| ; |
| ; CHECK-STREAMING-LABEL: lane_mask_v4i1_i8: |
| ; CHECK-STREAMING: // %bb.0: |
| ; CHECK-STREAMING-NEXT: mov z1.h, w0 |
| ; CHECK-STREAMING-NEXT: index z0.h, #0, #1 |
| ; CHECK-STREAMING-NEXT: ptrue p0.h, vl4 |
| ; CHECK-STREAMING-NEXT: and z1.h, z1.h, #0xff |
| ; CHECK-STREAMING-NEXT: add z0.h, z1.h, z0.h |
| ; CHECK-STREAMING-NEXT: mov z1.h, w1 |
| ; CHECK-STREAMING-NEXT: umin z0.h, z0.h, #255 |
| ; CHECK-STREAMING-NEXT: and z1.h, z1.h, #0xff |
| ; CHECK-STREAMING-NEXT: cmphi p0.h, p0/z, z1.h, z0.h |
| ; CHECK-STREAMING-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-STREAMING-NEXT: // kill: def $d0 killed $d0 killed $z0 |
| ; CHECK-STREAMING-NEXT: ret |
| %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i8(i8 %index, i8 %TC) |
| ret <4 x i1> %active.lane.mask |
| } |
| |
| define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) { |
| ; CHECK-SVE-LABEL: lane_mask_v2i1_i8: |
| ; CHECK-SVE: // %bb.0: |
| ; CHECK-SVE-NEXT: movi d0, #0x0000ff000000ff |
| ; CHECK-SVE-NEXT: dup v1.2s, w0 |
| ; CHECK-SVE-NEXT: index z2.s, #0, #1 |
| ; CHECK-SVE-NEXT: dup v3.2s, w1 |
| ; CHECK-SVE-NEXT: and v1.8b, v1.8b, v0.8b |
| ; CHECK-SVE-NEXT: add v1.2s, v1.2s, v2.2s |
| ; CHECK-SVE-NEXT: and v2.8b, v3.8b, v0.8b |
| ; CHECK-SVE-NEXT: umin v0.2s, v1.2s, v0.2s |
| ; CHECK-SVE-NEXT: cmhi v0.2s, v2.2s, v0.2s |
| ; CHECK-SVE-NEXT: ret |
| ; |
| ; CHECK-STREAMING-LABEL: lane_mask_v2i1_i8: |
| ; CHECK-STREAMING: // %bb.0: |
| ; CHECK-STREAMING-NEXT: and w8, w0, #0xff |
| ; CHECK-STREAMING-NEXT: ptrue p0.s, vl2 |
| ; CHECK-STREAMING-NEXT: index z0.s, w8, #1 |
| ; CHECK-STREAMING-NEXT: and w8, w1, #0xff |
| ; CHECK-STREAMING-NEXT: mov z1.s, w8 |
| ; CHECK-STREAMING-NEXT: umin z0.s, z0.s, #255 |
| ; CHECK-STREAMING-NEXT: cmphi p0.s, p0/z, z1.s, z0.s |
| ; CHECK-STREAMING-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-STREAMING-NEXT: // kill: def $d0 killed $d0 killed $z0 |
| ; CHECK-STREAMING-NEXT: ret |
| %active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i8(i8 %index, i8 %TC) |
| ret <2 x i1> %active.lane.mask |
| } |
| |
| define <vscale x 4 x i1> @lane_mask_nxv4i1_imm3() { |
| ; CHECK-LABEL: lane_mask_nxv4i1_imm3: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ptrue p0.s, vl3 |
| ; CHECK-NEXT: ret |
| entry: |
| %active.lane.mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 3) |
| ret <vscale x 4 x i1> %active.lane.mask |
| } |
| |
| define <vscale x 4 x i1> @lane_mask_nxv4i1_imm5() { |
| ; CHECK-LABEL: lane_mask_nxv4i1_imm5: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov w8, #5 // =0x5 |
| ; CHECK-NEXT: whilelo p0.s, xzr, x8 |
| ; CHECK-NEXT: ret |
| entry: |
| %active.lane.mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 5) |
| ret <vscale x 4 x i1> %active.lane.mask |
| } |
| |
| define <vscale x 4 x i1> @lane_mask_nxv4i1_imm4() { |
| ; CHECK-LABEL: lane_mask_nxv4i1_imm4: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ptrue p0.s, vl4 |
| ; CHECK-NEXT: ret |
| entry: |
| %active.lane.mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 10, i64 14) |
| ret <vscale x 4 x i1> %active.lane.mask |
| } |
| |
| define <vscale x 16 x i1> @lane_mask_nxv16i1_imm10() { |
| ; CHECK-LABEL: lane_mask_nxv16i1_imm10: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov w8, #10 // =0xa |
| ; CHECK-NEXT: whilelo p0.b, xzr, x8 |
| ; CHECK-NEXT: ret |
| entry: |
| %active.lane.mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 10) |
| ret <vscale x 16 x i1> %active.lane.mask |
| } |
| |
| define <vscale x 16 x i1> @lane_mask_nxv16i1_imm256() vscale_range(16, 16) { |
| ; CHECK-LABEL: lane_mask_nxv16i1_imm256: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ptrue p0.b, vl256 |
| ; CHECK-NEXT: ret |
| entry: |
| %active.lane.mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 256) |
| ret <vscale x 16 x i1> %active.lane.mask |
| } |
| |
| define <8 x i1> @lane_mask_v8i1_imm3() { |
| ; CHECK-LABEL: lane_mask_v8i1_imm3: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ptrue p0.b, vl3 |
| ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 |
| ; CHECK-NEXT: ret |
| entry: |
| %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 0, i64 3) |
| ret <8 x i1> %active.lane.mask |
| } |
| |
| declare <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i32(i32, i32) |
| declare <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32, i32) |
| declare <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32, i32) |
| declare <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32, i32) |
| declare <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32, i32) |
| declare <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i32(i32, i32) |
| |
| declare <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64, i64) |
| declare <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64, i64) |
| declare <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64, i64) |
| declare <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64, i64) |
| declare <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64, i64) |
| declare <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64, i64) |
| |
| declare <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i8(i8, i8) |
| declare <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i8(i8, i8) |
| declare <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i8(i8, i8) |
| declare <vscale x 7 x i1> @llvm.get.active.lane.mask.nxv7i1.i8(i8, i8) |
| declare <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i8(i8, i8) |
| declare <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i8(i8, i8) |
| |
| declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32) |
| declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) |
| declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) |
| declare <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32, i32) |
| |
| declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64, i64) |
| declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64, i64) |
| declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64, i64) |
| declare <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64, i64) |
| |
| declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i8(i8, i8) |
| declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i8(i8, i8) |
| declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i8(i8, i8) |
| declare <2 x i1> @llvm.get.active.lane.mask.v2i1.i8(i8, i8) |