| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc -mtriple=aarch64 -mattr=+sve2 %s -o - | FileCheck %s |
| |
| define <vscale x 16 x i1> @whilewr_8(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilewr_8: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: whilewr p0.b, x0, x1 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 16 x i1> @llvm.loop.dependence.war.mask.nxv16i1(ptr %a, ptr %b, i64 1) |
| ret <vscale x 16 x i1> %0 |
| } |
| |
| define <vscale x 8 x i1> @whilewr_16(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilewr_16: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: whilewr p0.h, x0, x1 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 8 x i1> @llvm.loop.dependence.war.mask.nxv8i1(ptr %a, ptr %b, i64 2) |
| ret <vscale x 8 x i1> %0 |
| } |
| |
| define <vscale x 4 x i1> @whilewr_32(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilewr_32: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: whilewr p0.s, x0, x1 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 4 x i1> @llvm.loop.dependence.war.mask.nxv4i1(ptr %a, ptr %b, i64 4) |
| ret <vscale x 4 x i1> %0 |
| } |
| |
| define <vscale x 2 x i1> @whilewr_64(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilewr_64: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: whilewr p0.d, x0, x1 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.nxv2i1(ptr %a, ptr %b, i64 8) |
| ret <vscale x 2 x i1> %0 |
| } |
| |
| define <vscale x 16 x i1> @whilerw_8(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilerw_8: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: whilerw p0.b, x0, x1 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 16 x i1> @llvm.loop.dependence.raw.mask.nxv16i1(ptr %a, ptr %b, i64 1) |
| ret <vscale x 16 x i1> %0 |
| } |
| |
| define <vscale x 8 x i1> @whilerw_16(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilerw_16: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: whilerw p0.h, x0, x1 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 8 x i1> @llvm.loop.dependence.raw.mask.nxv8i1(ptr %a, ptr %b, i64 2) |
| ret <vscale x 8 x i1> %0 |
| } |
| |
| define <vscale x 4 x i1> @whilerw_32(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilerw_32: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: whilerw p0.s, x0, x1 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 4 x i1> @llvm.loop.dependence.raw.mask.nxv4i1(ptr %a, ptr %b, i64 4) |
| ret <vscale x 4 x i1> %0 |
| } |
| |
| define <vscale x 2 x i1> @whilerw_64(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilerw_64: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: whilerw p0.d, x0, x1 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 2 x i1> @llvm.loop.dependence.raw.mask.nxv2i1(ptr %a, ptr %b, i64 8) |
| ret <vscale x 2 x i1> %0 |
| } |
| |
| define <vscale x 32 x i1> @whilewr_8_split(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilewr_8_split: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: sub x9, x1, x0 |
| ; CHECK-NEXT: rdvl x8, #1 |
| ; CHECK-NEXT: cmp x9, #1 |
| ; CHECK-NEXT: csinv x9, x9, xzr, ge |
| ; CHECK-NEXT: whilewr p0.b, x0, x1 |
| ; CHECK-NEXT: whilelo p1.b, x8, x9 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 32 x i1> @llvm.loop.dependence.war.mask.nxv32i1(ptr %a, ptr %b, i64 1) |
| ret <vscale x 32 x i1> %0 |
| } |
| |
| define <vscale x 64 x i1> @whilewr_8_split2(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilewr_8_split2: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: sub x9, x1, x0 |
| ; CHECK-NEXT: rdvl x8, #1 |
| ; CHECK-NEXT: rdvl x10, #2 |
| ; CHECK-NEXT: cmp x9, #1 |
| ; CHECK-NEXT: csinv x9, x9, xzr, ge |
| ; CHECK-NEXT: whilewr p0.b, x0, x1 |
| ; CHECK-NEXT: whilelo p1.b, x8, x9 |
| ; CHECK-NEXT: rdvl x8, #3 |
| ; CHECK-NEXT: whilelo p2.b, x10, x9 |
| ; CHECK-NEXT: whilelo p3.b, x8, x9 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 64 x i1> @llvm.loop.dependence.war.mask.nxv64i1(ptr %a, ptr %b, i64 1) |
| ret <vscale x 64 x i1> %0 |
| } |
| |
| define <vscale x 16 x i1> @whilewr_16_expand(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilewr_16_expand: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: sub x8, x1, x0 |
| ; CHECK-NEXT: add x8, x8, x8, lsr #63 |
| ; CHECK-NEXT: asr x8, x8, #1 |
| ; CHECK-NEXT: cmp x8, #1 |
| ; CHECK-NEXT: csinv x8, x8, xzr, ge |
| ; CHECK-NEXT: whilelo p0.b, xzr, x8 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 16 x i1> @llvm.loop.dependence.war.mask.nxv16i1(ptr %a, ptr %b, i64 2) |
| ret <vscale x 16 x i1> %0 |
| } |
| |
| define <vscale x 32 x i1> @whilewr_16_expand2(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilewr_16_expand2: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: sub x9, x1, x0 |
| ; CHECK-NEXT: rdvl x8, #1 |
| ; CHECK-NEXT: add x9, x9, x9, lsr #63 |
| ; CHECK-NEXT: asr x9, x9, #1 |
| ; CHECK-NEXT: cmp x9, #1 |
| ; CHECK-NEXT: csinv x9, x9, xzr, ge |
| ; CHECK-NEXT: whilelo p0.b, xzr, x9 |
| ; CHECK-NEXT: whilelo p1.b, x8, x9 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 32 x i1> @llvm.loop.dependence.war.mask.nxv32i1(ptr %a, ptr %b, i64 2) |
| ret <vscale x 32 x i1> %0 |
| } |
| |
| define <vscale x 8 x i1> @whilewr_32_expand(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilewr_32_expand: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: subs x8, x1, x0 |
| ; CHECK-NEXT: add x9, x8, #3 |
| ; CHECK-NEXT: csel x8, x9, x8, mi |
| ; CHECK-NEXT: asr x8, x8, #2 |
| ; CHECK-NEXT: cmp x8, #1 |
| ; CHECK-NEXT: csinv x8, x8, xzr, ge |
| ; CHECK-NEXT: whilelo p0.h, xzr, x8 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 8 x i1> @llvm.loop.dependence.war.mask.nxv8i1(ptr %a, ptr %b, i64 4) |
| ret <vscale x 8 x i1> %0 |
| } |
| |
| define <vscale x 16 x i1> @whilewr_32_expand2(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilewr_32_expand2: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: subs x8, x1, x0 |
| ; CHECK-NEXT: add x9, x8, #3 |
| ; CHECK-NEXT: csel x8, x9, x8, mi |
| ; CHECK-NEXT: asr x8, x8, #2 |
| ; CHECK-NEXT: cmp x8, #1 |
| ; CHECK-NEXT: csinv x8, x8, xzr, ge |
| ; CHECK-NEXT: whilelo p0.b, xzr, x8 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 16 x i1> @llvm.loop.dependence.war.mask.nxv16i1(ptr %a, ptr %b, i64 4) |
| ret <vscale x 16 x i1> %0 |
| } |
| |
| define <vscale x 32 x i1> @whilewr_32_expand3(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilewr_32_expand3: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: subs x9, x1, x0 |
| ; CHECK-NEXT: rdvl x8, #1 |
| ; CHECK-NEXT: add x10, x9, #3 |
| ; CHECK-NEXT: csel x9, x10, x9, mi |
| ; CHECK-NEXT: asr x9, x9, #2 |
| ; CHECK-NEXT: cmp x9, #1 |
| ; CHECK-NEXT: csinv x9, x9, xzr, ge |
| ; CHECK-NEXT: whilelo p0.b, xzr, x9 |
| ; CHECK-NEXT: whilelo p1.b, x8, x9 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 32 x i1> @llvm.loop.dependence.war.mask.nxv32i1(ptr %a, ptr %b, i64 4) |
| ret <vscale x 32 x i1> %0 |
| } |
| |
| define <vscale x 4 x i1> @whilewr_64_expand(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilewr_64_expand: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: subs x8, x1, x0 |
| ; CHECK-NEXT: add x9, x8, #7 |
| ; CHECK-NEXT: csel x8, x9, x8, mi |
| ; CHECK-NEXT: asr x8, x8, #3 |
| ; CHECK-NEXT: cmp x8, #1 |
| ; CHECK-NEXT: csinv x8, x8, xzr, ge |
| ; CHECK-NEXT: whilelo p0.s, xzr, x8 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 4 x i1> @llvm.loop.dependence.war.mask.nxv4i1(ptr %a, ptr %b, i64 8) |
| ret <vscale x 4 x i1> %0 |
| } |
| |
| define <vscale x 8 x i1> @whilewr_64_expand2(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilewr_64_expand2: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: subs x8, x1, x0 |
| ; CHECK-NEXT: add x9, x8, #7 |
| ; CHECK-NEXT: csel x8, x9, x8, mi |
| ; CHECK-NEXT: asr x8, x8, #3 |
| ; CHECK-NEXT: cmp x8, #1 |
| ; CHECK-NEXT: csinv x8, x8, xzr, ge |
| ; CHECK-NEXT: whilelo p0.h, xzr, x8 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 8 x i1> @llvm.loop.dependence.war.mask.nxv8i1(ptr %a, ptr %b, i64 8) |
| ret <vscale x 8 x i1> %0 |
| } |
| |
| define <vscale x 16 x i1> @whilewr_64_expand3(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilewr_64_expand3: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: subs x8, x1, x0 |
| ; CHECK-NEXT: add x9, x8, #7 |
| ; CHECK-NEXT: csel x8, x9, x8, mi |
| ; CHECK-NEXT: asr x8, x8, #3 |
| ; CHECK-NEXT: cmp x8, #1 |
| ; CHECK-NEXT: csinv x8, x8, xzr, ge |
| ; CHECK-NEXT: whilelo p0.b, xzr, x8 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 16 x i1> @llvm.loop.dependence.war.mask.nxv16i1(ptr %a, ptr %b, i64 8) |
| ret <vscale x 16 x i1> %0 |
| } |
| |
| define <vscale x 32 x i1> @whilewr_64_expand4(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilewr_64_expand4: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: subs x9, x1, x0 |
| ; CHECK-NEXT: rdvl x8, #1 |
| ; CHECK-NEXT: add x10, x9, #7 |
| ; CHECK-NEXT: csel x9, x10, x9, mi |
| ; CHECK-NEXT: asr x9, x9, #3 |
| ; CHECK-NEXT: cmp x9, #1 |
| ; CHECK-NEXT: csinv x9, x9, xzr, ge |
| ; CHECK-NEXT: whilelo p0.b, xzr, x9 |
| ; CHECK-NEXT: whilelo p1.b, x8, x9 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 32 x i1> @llvm.loop.dependence.war.mask.nxv32i1(ptr %a, ptr %b, i64 8) |
| ret <vscale x 32 x i1> %0 |
| } |
| |
| define <vscale x 9 x i1> @whilewr_8_widen(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilewr_8_widen: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: whilewr p0.b, x0, x1 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 9 x i1> @llvm.loop.dependence.war.mask.nxv9i1(ptr %a, ptr %b, i64 1) |
| ret <vscale x 9 x i1> %0 |
| } |
| |
| define <vscale x 7 x i1> @whilewr_16_widen(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilewr_16_widen: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: whilewr p0.h, x0, x1 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 7 x i1> @llvm.loop.dependence.war.mask.nxv7i1(ptr %a, ptr %b, i64 2) |
| ret <vscale x 7 x i1> %0 |
| } |
| |
| define <vscale x 3 x i1> @whilewr_32_widen(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilewr_32_widen: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: whilewr p0.s, x0, x1 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 3 x i1> @llvm.loop.dependence.war.mask.nxv3i1(ptr %a, ptr %b, i64 4) |
| ret <vscale x 3 x i1> %0 |
| } |
| |
| define <vscale x 16 x i1> @whilewr_badimm(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilewr_badimm: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov x8, #6148914691236517205 // =0x5555555555555555 |
| ; CHECK-NEXT: sub x9, x1, x0 |
| ; CHECK-NEXT: movk x8, #21846 |
| ; CHECK-NEXT: smulh x8, x9, x8 |
| ; CHECK-NEXT: add x8, x8, x8, lsr #63 |
| ; CHECK-NEXT: cmp x8, #1 |
| ; CHECK-NEXT: csinv x8, x8, xzr, ge |
| ; CHECK-NEXT: whilelo p0.b, xzr, x8 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 16 x i1> @llvm.loop.dependence.war.mask.nxv16i1(ptr %a, ptr %b, i64 3) |
| ret <vscale x 16 x i1> %0 |
| } |
| |
| define <vscale x 8 x i1> @whilewr_extract_nxv8i1(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilewr_extract_nxv8i1: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: whilewr p0.b, x0, x1 |
| ; CHECK-NEXT: punpklo p0.h, p0.b |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 8 x i1> @llvm.loop.dependence.war.mask.nxv8i1(ptr %a, ptr %b, i64 1) |
| ret <vscale x 8 x i1> %0 |
| } |
| |
| define <vscale x 4 x i1> @whilewr_extract_nxv4i1(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilewr_extract_nxv4i1: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: whilewr p0.b, x0, x1 |
| ; CHECK-NEXT: punpklo p0.h, p0.b |
| ; CHECK-NEXT: punpklo p0.h, p0.b |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 4 x i1> @llvm.loop.dependence.war.mask.nxv4i1(ptr %a, ptr %b, i64 1) |
| ret <vscale x 4 x i1> %0 |
| } |
| |
| |
| define <vscale x 2 x i1> @whilewr_extract_nxv2i1(ptr %a, ptr %b) { |
| ; CHECK-LABEL: whilewr_extract_nxv2i1: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: whilewr p0.s, x0, x1 |
| ; CHECK-NEXT: punpklo p0.h, p0.b |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.nxv2i1(ptr %a, ptr %b, i64 4) |
| ret <vscale x 2 x i1> %0 |
| } |