| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve2p2 < %s | FileCheck %s |
| ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -mattr=+sme2p2 < %s | FileCheck %s |
| ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -mattr=+sme2p2 -force-streaming < %s | FileCheck %s |
| |
| define <vscale x 2 x i64> @test_expandload(ptr %base, <vscale x 2 x i64> %passthru, <vscale x 2 x i1> %pred) { |
| ; CHECK-LABEL: test_expandload: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cntp x8, p0, p0.d |
| ; CHECK-NEXT: whilelo p1.d, xzr, x8 |
| ; CHECK-NEXT: ld1d { z1.d }, p1/z, [x0] |
| ; CHECK-NEXT: expand z1.d, p0, z1.d |
| ; CHECK-NEXT: mov z0.d, p0/m, z1.d |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 2 x i64> @llvm.masked.expandload(ptr align 1 %base, <vscale x 2 x i1> %pred, <vscale x 2 x i64> %passthru) |
| ret <vscale x 2 x i64> %res |
| } |
| |
| define <vscale x 8 x float> @test_split_expandload(ptr %base, <vscale x 8 x float> %passthru, <vscale x 8 x i1> %pred) { |
| ; CHECK-LABEL: test_split_expandload: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: punpklo p1.h, p0.b |
| ; CHECK-NEXT: punpkhi p0.h, p0.b |
| ; CHECK-NEXT: cntp x8, p1, p1.s |
| ; CHECK-NEXT: cntp x9, p0, p0.s |
| ; CHECK-NEXT: whilelo p2.s, xzr, x8 |
| ; CHECK-NEXT: whilelo p3.s, xzr, x9 |
| ; CHECK-NEXT: ld1w { z2.s }, p2/z, [x0] |
| ; CHECK-NEXT: ld1w { z3.s }, p3/z, [x0, x8, lsl #2] |
| ; CHECK-NEXT: expand z2.s, p1, z2.s |
| ; CHECK-NEXT: expand z3.s, p0, z3.s |
| ; CHECK-NEXT: mov z0.s, p1/m, z2.s |
| ; CHECK-NEXT: mov z1.s, p0/m, z3.s |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 8 x float> @llvm.masked.expandload(ptr align 1 %base, <vscale x 8 x i1> %pred, <vscale x 8 x float> %passthru) |
| ret <vscale x 8 x float> %res |
| } |
| |
| define <vscale x 1 x i64> @test_widen_expandload(ptr %base, <vscale x 1 x i64> %passthru, <vscale x 1 x i1> %pred) { |
| ; CHECK-LABEL: test_widen_expandload: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: pfalse p1.b |
| ; CHECK-NEXT: uzp1 p0.d, p0.d, p1.d |
| ; CHECK-NEXT: cntp x8, p0, p0.d |
| ; CHECK-NEXT: whilelo p1.d, xzr, x8 |
| ; CHECK-NEXT: ld1d { z1.d }, p1/z, [x0] |
| ; CHECK-NEXT: expand z1.d, p0, z1.d |
| ; CHECK-NEXT: mov z0.d, p0/m, z1.d |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 1 x i64> @llvm.masked.expandload(ptr align 1 %base, <vscale x 1 x i1> %pred, <vscale x 1 x i64> %passthru) |
| ret <vscale x 1 x i64> %res |
| } |
| |
| define <vscale x 4 x i8> @test_promote_expandload(ptr %base, <vscale x 4 x i1> %pred, <vscale x 4 x i8> %passthru) { |
| ; CHECK-LABEL: test_promote_expandload: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cntp x8, p0, p0.s |
| ; CHECK-NEXT: whilelo p1.s, xzr, x8 |
| ; CHECK-NEXT: ld1b { z1.s }, p1/z, [x0] |
| ; CHECK-NEXT: expand z1.s, p0, z1.s |
| ; CHECK-NEXT: mov z0.s, p0/m, z1.s |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 4 x i8> @llvm.masked.expandload(ptr align 1 %base, <vscale x 4 x i1> %pred, <vscale x 4 x i8> %passthru) |
| ret <vscale x 4 x i8> %res |
| } |
| |
| define <vscale x 8 x bfloat> @test_expandload_zero_passthru(ptr %base, <vscale x 8 x i1> %pred) { |
| ; CHECK-LABEL: test_expandload_zero_passthru: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cntp x8, p0, p0.h |
| ; CHECK-NEXT: whilelo p1.h, xzr, x8 |
| ; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0] |
| ; CHECK-NEXT: expand z0.h, p0, z0.h |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 8 x bfloat> @llvm.masked.expandload(ptr align 1 %base, <vscale x 8 x i1> %pred, <vscale x 8 x bfloat> zeroinitializer) |
| ret <vscale x 8 x bfloat> %res |
| } |
| |
| define <vscale x 16 x i8> @test_expandload_poison_passthru(ptr %base, <vscale x 16 x i1> %pred) { |
| ; CHECK-LABEL: test_expandload_poison_passthru: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cntp x8, p0, p0.b |
| ; CHECK-NEXT: whilelo p1.b, xzr, x8 |
| ; CHECK-NEXT: ld1b { z0.b }, p1/z, [x0] |
| ; CHECK-NEXT: expand z0.b, p0, z0.b |
| ; CHECK-NEXT: ret |
| %res = call <vscale x 16 x i8> @llvm.masked.expandload(ptr align 1 %base, <vscale x 16 x i1> %pred, <vscale x 16 x i8> poison) |
| ret <vscale x 16 x i8> %res |
| } |