| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s |
| |
| define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_s16(i8* %base, <8 x i16> %offset) { |
| ; CHECK-LABEL: test_vldrbq_gather_offset_s16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrb.s16 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 0) |
| ret <8 x i16> %0 |
| } |
| |
| declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8*, <8 x i16>, i32, i32, i32) |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_s32(i8* %base, <4 x i32> %offset) { |
| ; CHECK-LABEL: test_vldrbq_gather_offset_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrb.s32 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 0) |
| ret <4 x i32> %0 |
| } |
| |
| declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8*, <4 x i32>, i32, i32, i32) |
| |
| define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_s8(i8* %base, <16 x i8> %offset) { |
| ; CHECK-LABEL: test_vldrbq_gather_offset_s8: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrb.u8 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 0) |
| ret <16 x i8> %0 |
| } |
| |
| declare <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8*, <16 x i8>, i32, i32, i32) |
| |
| define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_u16(i8* %base, <8 x i16> %offset) { |
| ; CHECK-LABEL: test_vldrbq_gather_offset_u16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrb.u16 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 1) |
| ret <8 x i16> %0 |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_u32(i8* %base, <4 x i32> %offset) { |
| ; CHECK-LABEL: test_vldrbq_gather_offset_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrb.u32 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 1) |
| ret <4 x i32> %0 |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_u8(i8* %base, <16 x i8> %offset) { |
| ; CHECK-LABEL: test_vldrbq_gather_offset_u8: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrb.u8 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 1) |
| ret <16 x i8> %0 |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_z_s16(i8* %base, <8 x i16> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrbq_gather_offset_z_s16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrbt.s16 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) |
| %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 0, <8 x i1> %1) |
| ret <8 x i16> %2 |
| } |
| |
| declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) |
| |
| declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8*, <8 x i16>, i32, i32, i32, <8 x i1>) |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_z_s32(i8* %base, <4 x i32> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrbq_gather_offset_z_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrbt.s32 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 0, <4 x i1> %1) |
| ret <4 x i32> %2 |
| } |
| |
| declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) |
| |
| declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8*, <4 x i32>, i32, i32, i32, <4 x i1>) |
| |
| define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_z_s8(i8* %base, <16 x i8> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrbq_gather_offset_z_s8: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrbt.u8 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) |
| %2 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 0, <16 x i1> %1) |
| ret <16 x i8> %2 |
| } |
| |
| declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) |
| |
| declare <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8*, <16 x i8>, i32, i32, i32, <16 x i1>) |
| |
| define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_z_u16(i8* %base, <8 x i16> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrbq_gather_offset_z_u16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrbt.u16 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) |
| %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 1, <8 x i1> %1) |
| ret <8 x i16> %2 |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_z_u32(i8* %base, <4 x i32> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrbq_gather_offset_z_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrbt.u32 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 1, <4 x i1> %1) |
| ret <4 x i32> %2 |
| } |
| |
| define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_z_u8(i8* %base, <16 x i8> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrbq_gather_offset_z_u8: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrbt.u8 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) |
| %2 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 1, <16 x i1> %1) |
| ret <16 x i8> %2 |
| } |
| |
| define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_s64(<2 x i64> %addr) { |
| ; CHECK-LABEL: test_vldrdq_gather_base_s64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrd.u64 q1, [q0, #616] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> %addr, i32 616) |
| ret <2 x i64> %0 |
| } |
| |
| declare <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64>, i32) |
| |
| define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_u64(<2 x i64> %addr) { |
| ; CHECK-LABEL: test_vldrdq_gather_base_u64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrd.u64 q1, [q0, #-336] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> %addr, i32 -336) |
| ret <2 x i64> %0 |
| } |
| |
| define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_s64(<2 x i64>* %addr) { |
| ; CHECK-LABEL: test_vldrdq_gather_base_wb_s64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [r0] |
| ; CHECK-NEXT: vldrd.u64 q0, [q1, #576]! |
| ; CHECK-NEXT: vstrw.32 q1, [r0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = load <2 x i64>, <2 x i64>* %addr, align 8 |
| %1 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> %0, i32 576) |
| %2 = extractvalue { <2 x i64>, <2 x i64> } %1, 1 |
| store <2 x i64> %2, <2 x i64>* %addr, align 8 |
| %3 = extractvalue { <2 x i64>, <2 x i64> } %1, 0 |
| ret <2 x i64> %3 |
| } |
| |
| declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64>, i32) |
| |
| define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_u64(<2 x i64>* %addr) { |
| ; CHECK-LABEL: test_vldrdq_gather_base_wb_u64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [r0] |
| ; CHECK-NEXT: vldrd.u64 q0, [q1, #-328]! |
| ; CHECK-NEXT: vstrw.32 q1, [r0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = load <2 x i64>, <2 x i64>* %addr, align 8 |
| %1 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> %0, i32 -328) |
| %2 = extractvalue { <2 x i64>, <2 x i64> } %1, 1 |
| store <2 x i64> %2, <2 x i64>* %addr, align 8 |
| %3 = extractvalue { <2 x i64>, <2 x i64> } %1, 0 |
| ret <2 x i64> %3 |
| } |
| |
| define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_s64(<2 x i64>* %addr, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrdq_gather_base_wb_z_s64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vldrw.u32 q1, [r0] |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrdt.u64 q0, [q1, #664]! |
| ; CHECK-NEXT: vstrw.32 q1, [r0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = load <2 x i64>, <2 x i64>* %addr, align 8 |
| %1 = zext i16 %p to i32 |
| %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) |
| %3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 664, <4 x i1> %2) |
| %4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1 |
| store <2 x i64> %4, <2 x i64>* %addr, align 8 |
| %5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0 |
| ret <2 x i64> %5 |
| } |
| |
| declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>) |
| |
| define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_u64(<2 x i64>* %addr, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrdq_gather_base_wb_z_u64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vldrw.u32 q1, [r0] |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrdt.u64 q0, [q1, #656]! |
| ; CHECK-NEXT: vstrw.32 q1, [r0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = load <2 x i64>, <2 x i64>* %addr, align 8 |
| %1 = zext i16 %p to i32 |
| %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) |
| %3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 656, <4 x i1> %2) |
| %4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1 |
| store <2 x i64> %4, <2 x i64>* %addr, align 8 |
| %5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0 |
| ret <2 x i64> %5 |
| } |
| |
| define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_s64(<2 x i64> %addr, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrdq_gather_base_z_s64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrdt.u64 q1, [q0, #888] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <4 x i1> %1) |
| ret <2 x i64> %2 |
| } |
| |
| declare <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>) |
| |
| define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_u64(<2 x i64> %addr, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrdq_gather_base_z_u64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrdt.u64 q1, [q0, #-1000] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 -1000, <4 x i1> %1) |
| ret <2 x i64> %2 |
| } |
| |
| define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_s64(i64* %base, <2 x i64> %offset) { |
| ; CHECK-LABEL: test_vldrdq_gather_offset_s64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrd.u64 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 0) |
| ret <2 x i64> %0 |
| } |
| |
| declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64*, <2 x i64>, i32, i32, i32) |
| |
| define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_u64(i64* %base, <2 x i64> %offset) { |
| ; CHECK-LABEL: test_vldrdq_gather_offset_u64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrd.u64 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 1) |
| ret <2 x i64> %0 |
| } |
| |
| define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_s64(i64* %base, <2 x i64> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrdq_gather_offset_z_s64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrdt.u64 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 0, <4 x i1> %1) |
| ret <2 x i64> %2 |
| } |
| |
| declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64*, <2 x i64>, i32, i32, i32, <4 x i1>) |
| |
| define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_u64(i64* %base, <2 x i64> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrdq_gather_offset_z_u64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrdt.u64 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 1, <4 x i1> %1) |
| ret <2 x i64> %2 |
| } |
| |
| define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_s64(i64* %base, <2 x i64> %offset) { |
| ; CHECK-LABEL: test_vldrdq_gather_shifted_offset_s64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrd.u64 q1, [r0, q0, uxtw #3] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 0) |
| ret <2 x i64> %0 |
| } |
| |
| define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_u64(i64* %base, <2 x i64> %offset) { |
| ; CHECK-LABEL: test_vldrdq_gather_shifted_offset_u64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrd.u64 q1, [r0, q0, uxtw #3] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 1) |
| ret <2 x i64> %0 |
| } |
| |
| define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_z_s64(i64* %base, <2 x i64> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrdq_gather_shifted_offset_z_s64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrdt.u64 q1, [r0, q0, uxtw #3] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 0, <4 x i1> %1) |
| ret <2 x i64> %2 |
| } |
| |
| define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_z_u64(i64* %base, <2 x i64> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrdq_gather_shifted_offset_z_u64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrdt.u64 q1, [r0, q0, uxtw #3] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 1, <4 x i1> %1) |
| ret <2 x i64> %2 |
| } |
| |
| define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_offset_f16(half* %base, <8 x i16> %offset) { |
| ; CHECK-LABEL: test_vldrhq_gather_offset_f16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrh.u16 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half* %base, <8 x i16> %offset, i32 16, i32 0, i32 0) |
| ret <8 x half> %0 |
| } |
| |
| declare <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half*, <8 x i16>, i32, i32, i32) |
| |
| define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_s16(i16* %base, <8 x i16> %offset) { |
| ; CHECK-LABEL: test_vldrhq_gather_offset_s16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrh.u16 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 0) |
| ret <8 x i16> %0 |
| } |
| |
| declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16*, <8 x i16>, i32, i32, i32) |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_s32(i16* %base, <4 x i32> %offset) { |
| ; CHECK-LABEL: test_vldrhq_gather_offset_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrh.s32 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 0) |
| ret <4 x i32> %0 |
| } |
| |
| declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16*, <4 x i32>, i32, i32, i32) |
| |
| define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_u16(i16* %base, <8 x i16> %offset) { |
| ; CHECK-LABEL: test_vldrhq_gather_offset_u16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrh.u16 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 1) |
| ret <8 x i16> %0 |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_u32(i16* %base, <4 x i32> %offset) { |
| ; CHECK-LABEL: test_vldrhq_gather_offset_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrh.u32 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 1) |
| ret <4 x i32> %0 |
| } |
| |
| define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_offset_z_f16(half* %base, <8 x i16> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrhq_gather_offset_z_f16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrht.u16 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) |
| %2 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half* %base, <8 x i16> %offset, i32 16, i32 0, i32 0, <8 x i1> %1) |
| ret <8 x half> %2 |
| } |
| |
| declare <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half*, <8 x i16>, i32, i32, i32, <8 x i1>) |
| |
| define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_z_s16(i16* %base, <8 x i16> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrhq_gather_offset_z_s16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrht.u16 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) |
| %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 0, <8 x i1> %1) |
| ret <8 x i16> %2 |
| } |
| |
| declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16*, <8 x i16>, i32, i32, i32, <8 x i1>) |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_z_s32(i16* %base, <4 x i32> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrhq_gather_offset_z_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrht.s32 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 0, <4 x i1> %1) |
| ret <4 x i32> %2 |
| } |
| |
| declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16*, <4 x i32>, i32, i32, i32, <4 x i1>) |
| |
| define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_z_u16(i16* %base, <8 x i16> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrhq_gather_offset_z_u16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrht.u16 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) |
| %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 1, <8 x i1> %1) |
| ret <8 x i16> %2 |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_z_u32(i16* %base, <4 x i32> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrhq_gather_offset_z_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrht.u32 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 1, <4 x i1> %1) |
| ret <4 x i32> %2 |
| } |
| |
| define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_shifted_offset_f16(half* %base, <8 x i16> %offset) { |
| ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_f16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half* %base, <8 x i16> %offset, i32 16, i32 1, i32 0) |
| ret <8 x half> %0 |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_s16(i16* %base, <8 x i16> %offset) { |
| ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_s16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 0) |
| ret <8 x i16> %0 |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_s32(i16* %base, <4 x i32> %offset) { |
| ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrh.s32 q1, [r0, q0, uxtw #1] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 0) |
| ret <4 x i32> %0 |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_u16(i16* %base, <8 x i16> %offset) { |
| ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_u16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 1) |
| ret <8 x i16> %0 |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_u32(i16* %base, <4 x i32> %offset) { |
| ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrh.u32 q1, [r0, q0, uxtw #1] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 1) |
| ret <4 x i32> %0 |
| } |
| |
| define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_shifted_offset_z_f16(half* %base, <8 x i16> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_f16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) |
| %2 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half* %base, <8 x i16> %offset, i32 16, i32 1, i32 0, <8 x i1> %1) |
| ret <8 x half> %2 |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_z_s16(i16* %base, <8 x i16> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_s16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) |
| %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 0, <8 x i1> %1) |
| ret <8 x i16> %2 |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_z_s32(i16* %base, <4 x i32> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrht.s32 q1, [r0, q0, uxtw #1] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 0, <4 x i1> %1) |
| ret <4 x i32> %2 |
| } |
| |
| define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_z_u16(i16* %base, <8 x i16> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_u16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) |
| %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 1, <8 x i1> %1) |
| ret <8 x i16> %2 |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_z_u32(i16* %base, <4 x i32> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrht.u32 q1, [r0, q0, uxtw #1] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 1, <4 x i1> %1) |
| ret <4 x i32> %2 |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_f32(<4 x i32> %addr) { |
| ; CHECK-LABEL: test_vldrwq_gather_base_f32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [q0, #12] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32> %addr, i32 12) |
| ret <4 x float> %0 |
| } |
| |
| declare <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32>, i32) |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_s32(<4 x i32> %addr) { |
| ; CHECK-LABEL: test_vldrwq_gather_base_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [q0, #400] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> %addr, i32 400) |
| ret <4 x i32> %0 |
| } |
| |
| declare <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32>, i32) |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_u32(<4 x i32> %addr) { |
| ; CHECK-LABEL: test_vldrwq_gather_base_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [q0, #284] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> %addr, i32 284) |
| ret <4 x i32> %0 |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_f32(<4 x i32>* %addr) { |
| ; CHECK-LABEL: test_vldrwq_gather_base_wb_f32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [r0] |
| ; CHECK-NEXT: vldrw.u32 q0, [q1, #-64]! |
| ; CHECK-NEXT: vstrw.32 q1, [r0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = load <4 x i32>, <4 x i32>* %addr, align 8 |
| %1 = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32> %0, i32 -64) |
| %2 = extractvalue { <4 x float>, <4 x i32> } %1, 1 |
| store <4 x i32> %2, <4 x i32>* %addr, align 8 |
| %3 = extractvalue { <4 x float>, <4 x i32> } %1, 0 |
| ret <4 x float> %3 |
| } |
| |
| declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32>, i32) |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_s32(<4 x i32>* %addr) { |
| ; CHECK-LABEL: test_vldrwq_gather_base_wb_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [r0] |
| ; CHECK-NEXT: vldrw.u32 q0, [q1, #80]! |
| ; CHECK-NEXT: vstrw.32 q1, [r0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = load <4 x i32>, <4 x i32>* %addr, align 8 |
| %1 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> %0, i32 80) |
| %2 = extractvalue { <4 x i32>, <4 x i32> } %1, 1 |
| store <4 x i32> %2, <4 x i32>* %addr, align 8 |
| %3 = extractvalue { <4 x i32>, <4 x i32> } %1, 0 |
| ret <4 x i32> %3 |
| } |
| |
| declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32>, i32) |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_u32(<4 x i32>* %addr) { |
| ; CHECK-LABEL: test_vldrwq_gather_base_wb_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [r0] |
| ; CHECK-NEXT: vldrw.u32 q0, [q1, #480]! |
| ; CHECK-NEXT: vstrw.32 q1, [r0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = load <4 x i32>, <4 x i32>* %addr, align 8 |
| %1 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> %0, i32 480) |
| %2 = extractvalue { <4 x i32>, <4 x i32> } %1, 1 |
| store <4 x i32> %2, <4 x i32>* %addr, align 8 |
| %3 = extractvalue { <4 x i32>, <4 x i32> } %1, 0 |
| ret <4 x i32> %3 |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_z_f32(<4 x i32>* %addr, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrwq_gather_base_wb_z_f32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vldrw.u32 q1, [r0] |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrwt.u32 q0, [q1, #-352]! |
| ; CHECK-NEXT: vstrw.32 q1, [r0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = load <4 x i32>, <4 x i32>* %addr, align 8 |
| %1 = zext i16 %p to i32 |
| %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) |
| %3 = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %0, i32 -352, <4 x i1> %2) |
| %4 = extractvalue { <4 x float>, <4 x i32> } %3, 1 |
| store <4 x i32> %4, <4 x i32>* %addr, align 8 |
| %5 = extractvalue { <4 x float>, <4 x i32> } %3, 0 |
| ret <4 x float> %5 |
| } |
| |
| declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>) |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_z_s32(<4 x i32>* %addr, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrwq_gather_base_wb_z_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vldrw.u32 q1, [r0] |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrwt.u32 q0, [q1, #276]! |
| ; CHECK-NEXT: vstrw.32 q1, [r0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = load <4 x i32>, <4 x i32>* %addr, align 8 |
| %1 = zext i16 %p to i32 |
| %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) |
| %3 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 276, <4 x i1> %2) |
| %4 = extractvalue { <4 x i32>, <4 x i32> } %3, 1 |
| store <4 x i32> %4, <4 x i32>* %addr, align 8 |
| %5 = extractvalue { <4 x i32>, <4 x i32> } %3, 0 |
| ret <4 x i32> %5 |
| } |
| |
| declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>) |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_z_u32(<4 x i32>* %addr, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrwq_gather_base_wb_z_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vldrw.u32 q1, [r0] |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrwt.u32 q0, [q1, #88]! |
| ; CHECK-NEXT: vstrw.32 q1, [r0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = load <4 x i32>, <4 x i32>* %addr, align 8 |
| %1 = zext i16 %p to i32 |
| %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) |
| %3 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 88, <4 x i1> %2) |
| %4 = extractvalue { <4 x i32>, <4 x i32> } %3, 1 |
| store <4 x i32> %4, <4 x i32>* %addr, align 8 |
| %5 = extractvalue { <4 x i32>, <4 x i32> } %3, 0 |
| ret <4 x i32> %5 |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_z_f32(<4 x i32> %addr, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrwq_gather_base_z_f32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrwt.u32 q1, [q0, #-300] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32> %addr, i32 -300, <4 x i1> %1) |
| ret <4 x float> %2 |
| } |
| |
| declare <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>) |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_z_s32(<4 x i32> %addr, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrwq_gather_base_z_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrwt.u32 q1, [q0, #440] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 440, <4 x i1> %1) |
| ret <4 x i32> %2 |
| } |
| |
| declare <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>) |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_z_u32(<4 x i32> %addr, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrwq_gather_base_z_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrwt.u32 q1, [q0, #300] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 300, <4 x i1> %1) |
| ret <4 x i32> %2 |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_offset_f32(float* %base, <4 x i32> %offset) { |
| ; CHECK-LABEL: test_vldrwq_gather_offset_f32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float* %base, <4 x i32> %offset, i32 32, i32 0, i32 0) |
| ret <4 x float> %0 |
| } |
| |
| declare <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float*, <4 x i32>, i32, i32, i32) |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_s32(i32* %base, <4 x i32> %offset) { |
| ; CHECK-LABEL: test_vldrwq_gather_offset_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 0) |
| ret <4 x i32> %0 |
| } |
| |
| declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32*, <4 x i32>, i32, i32, i32) |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_u32(i32* %base, <4 x i32> %offset) { |
| ; CHECK-LABEL: test_vldrwq_gather_offset_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 1) |
| ret <4 x i32> %0 |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_offset_z_f32(float* %base, <4 x i32> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrwq_gather_offset_z_f32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float* %base, <4 x i32> %offset, i32 32, i32 0, i32 0, <4 x i1> %1) |
| ret <4 x float> %2 |
| } |
| |
| declare <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float*, <4 x i32>, i32, i32, i32, <4 x i1>) |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_z_s32(i32* %base, <4 x i32> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrwq_gather_offset_z_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 0, <4 x i1> %1) |
| ret <4 x i32> %2 |
| } |
| |
| declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32*, <4 x i32>, i32, i32, i32, <4 x i1>) |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_z_u32(i32* %base, <4 x i32> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrwq_gather_offset_z_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 1, <4 x i1> %1) |
| ret <4 x i32> %2 |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_shifted_offset_f32(float* %base, <4 x i32> %offset) { |
| ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_f32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float* %base, <4 x i32> %offset, i32 32, i32 2, i32 0) |
| ret <4 x float> %0 |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_s32(i32* %base, <4 x i32> %offset) { |
| ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 0) |
| ret <4 x i32> %0 |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_u32(i32* %base, <4 x i32> %offset) { |
| ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 1) |
| ret <4 x i32> %0 |
| } |
| |
| define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_shifted_offset_z_f32(float* %base, <4 x i32> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_f32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float* %base, <4 x i32> %offset, i32 32, i32 2, i32 0, <4 x i1> %1) |
| ret <4 x float> %2 |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_z_s32(i32* %base, <4 x i32> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 0, <4 x i1> %1) |
| ret <4 x i32> %2 |
| } |
| |
| define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_z_u32(i32* %base, <4 x i32> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2] |
| ; CHECK-NEXT: vmov q0, q1 |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 1, <4 x i1> %1) |
| ret <4 x i32> %2 |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrbq_scatter_offset_p_s16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrbt.16 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0, <8 x i1> %1) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8*, <8 x i16>, <8 x i16>, i32, i32, <8 x i1>) |
| |
| define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrbq_scatter_offset_p_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrbt.32 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0, <4 x i1> %1) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8*, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>) |
| |
| define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrbq_scatter_offset_p_s8: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrbt.8 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0, <16 x i1> %1) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8*, <16 x i8>, <16 x i8>, i32, i32, <16 x i1>) |
| |
| define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrbq_scatter_offset_p_u16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrbt.16 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0, <8 x i1> %1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrbq_scatter_offset_p_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrbt.32 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0, <4 x i1> %1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrbq_scatter_offset_p_u8: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrbt.8 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0, <16 x i1> %1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s16(i8* %base, <8 x i16> %offset, <8 x i16> %value) { |
| ; CHECK-LABEL: test_vstrbq_scatter_offset_s16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrb.16 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) |
| |
| define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s32(i8* %base, <4 x i32> %offset, <4 x i32> %value) { |
| ; CHECK-LABEL: test_vstrbq_scatter_offset_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrb.32 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) |
| |
| define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s8(i8* %base, <16 x i8> %offset, <16 x i8> %value) { |
| ; CHECK-LABEL: test_vstrbq_scatter_offset_s8: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrb.8 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32) |
| |
| define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u16(i8* %base, <8 x i16> %offset, <8 x i16> %value) { |
| ; CHECK-LABEL: test_vstrbq_scatter_offset_u16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrb.16 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u32(i8* %base, <4 x i32> %offset, <4 x i32> %value) { |
| ; CHECK-LABEL: test_vstrbq_scatter_offset_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrb.32 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u8(i8* %base, <16 x i8> %offset, <16 x i8> %value) { |
| ; CHECK-LABEL: test_vstrbq_scatter_offset_u8: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrb.8 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_s64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrdq_scatter_base_p_s64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrdt.64 q1, [q0, #888] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <2 x i64> %value, <4 x i1> %1) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>) |
| |
| define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_u64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrdq_scatter_base_p_u64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrdt.64 q1, [q0, #264] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 264, <2 x i64> %value, <4 x i1> %1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_s64(<2 x i64> %addr, <2 x i64> %value) { |
| ; CHECK-LABEL: test_vstrdq_scatter_base_s64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrd.64 q1, [q0, #408] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> %addr, i32 408, <2 x i64> %value) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64>, i32, <2 x i64>) |
| |
| define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_u64(<2 x i64> %addr, <2 x i64> %value) { |
| ; CHECK-LABEL: test_vstrdq_scatter_base_u64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrd.64 q1, [q0, #-472] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> %addr, i32 -472, <2 x i64> %value) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_s64(<2 x i64>* %addr, <2 x i64> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrdq_scatter_base_wb_p_s64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [r0] |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrdt.64 q0, [q1, #248]! |
| ; CHECK-NEXT: vstrw.32 q1, [r0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = load <2 x i64>, <2 x i64>* %addr, align 8 |
| %1 = zext i16 %p to i32 |
| %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) |
| %3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 248, <2 x i64> %value, <4 x i1> %2) |
| store <2 x i64> %3, <2 x i64>* %addr, align 8 |
| ret void |
| } |
| |
| declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>) |
| |
| define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_u64(<2 x i64>* %addr, <2 x i64> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrdq_scatter_base_wb_p_u64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [r0] |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrdt.64 q0, [q1, #136]! |
| ; CHECK-NEXT: vstrw.32 q1, [r0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = load <2 x i64>, <2 x i64>* %addr, align 8 |
| %1 = zext i16 %p to i32 |
| %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) |
| %3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 136, <2 x i64> %value, <4 x i1> %2) |
| store <2 x i64> %3, <2 x i64>* %addr, align 8 |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_s64(<2 x i64>* %addr, <2 x i64> %value) { |
| ; CHECK-LABEL: test_vstrdq_scatter_base_wb_s64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [r0] |
| ; CHECK-NEXT: vstrd.64 q0, [q1, #208]! |
| ; CHECK-NEXT: vstrw.32 q1, [r0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = load <2 x i64>, <2 x i64>* %addr, align 8 |
| %1 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> %0, i32 208, <2 x i64> %value) |
| store <2 x i64> %1, <2 x i64>* %addr, align 8 |
| ret void |
| } |
| |
| declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64>, i32, <2 x i64>) |
| |
| define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_u64(<2 x i64>* %addr, <2 x i64> %value) { |
| ; CHECK-LABEL: test_vstrdq_scatter_base_wb_u64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [r0] |
| ; CHECK-NEXT: vstrd.64 q0, [q1, #-168]! |
| ; CHECK-NEXT: vstrw.32 q1, [r0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = load <2 x i64>, <2 x i64>* %addr, align 8 |
| %1 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> %0, i32 -168, <2 x i64> %value) |
| store <2 x i64> %1, <2 x i64>* %addr, align 8 |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrdq_scatter_offset_p_s64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrdt.64 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <4 x i1> %1) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64*, <2 x i64>, <2 x i64>, i32, i32, <4 x i1>) |
| |
| define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrdq_scatter_offset_p_u64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrdt.64 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <4 x i1> %1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value) { |
| ; CHECK-LABEL: test_vstrdq_scatter_offset_s64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrd.64 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64*, <2 x i64>, <2 x i64>, i32, i32) |
| |
| define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value) { |
| ; CHECK-LABEL: test_vstrdq_scatter_offset_u64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrd.64 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_p_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_p_s64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrdt.64 q1, [r0, q0, uxtw #3] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3, <4 x i1> %1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_p_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_p_u64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrdt.64 q1, [r0, q0, uxtw #3] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3, <4 x i1> %1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value) { |
| ; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_s64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrd.64 q1, [r0, q0, uxtw #3] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value) { |
| ; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_u64: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrd.64 q1, [r0, q0, uxtw #3] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_f16(half* %base, <8 x i16> %offset, <8 x half> %value) { |
| ; CHECK-LABEL: test_vstrhq_scatter_offset_f16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrh.16 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 0) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half*, <8 x i16>, <8 x half>, i32, i32) |
| |
| define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_f16(half* %base, <8 x i16> %offset, <8 x half> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrhq_scatter_offset_p_f16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrht.16 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 0, <8 x i1> %1) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half*, <8 x i16>, <8 x half>, i32, i32, <8 x i1>) |
| |
| define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrhq_scatter_offset_p_s16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrht.16 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0, <8 x i1> %1) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16*, <8 x i16>, <8 x i16>, i32, i32, <8 x i1>) |
| |
| define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrhq_scatter_offset_p_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrht.32 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0, <4 x i1> %1) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16*, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>) |
| |
| define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrhq_scatter_offset_p_u16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrht.16 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0, <8 x i1> %1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrhq_scatter_offset_p_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrht.32 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0, <4 x i1> %1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value) { |
| ; CHECK-LABEL: test_vstrhq_scatter_offset_s16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrh.16 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16*, <8 x i16>, <8 x i16>, i32, i32) |
| |
| define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value) { |
| ; CHECK-LABEL: test_vstrhq_scatter_offset_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrh.32 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16*, <4 x i32>, <4 x i32>, i32, i32) |
| |
| define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value) { |
| ; CHECK-LABEL: test_vstrhq_scatter_offset_u16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrh.16 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value) { |
| ; CHECK-LABEL: test_vstrhq_scatter_offset_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrh.32 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_f16(half* %base, <8 x i16> %offset, <8 x half> %value) { |
| ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_f16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_f16(half* %base, <8 x i16> %offset, <8 x half> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_f16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 1, <8 x i1> %1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_s16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1, <8 x i1> %1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrht.32 q1, [r0, q0, uxtw #1] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1, <4 x i1> %1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_u16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1, <8 x i1> %1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrht.32 q1, [r0, q0, uxtw #1] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1, <4 x i1> %1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value) { |
| ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_s16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value) { |
| ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrh.32 q1, [r0, q0, uxtw #1] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value) { |
| ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_u16: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value) { |
| ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrh.32 q1, [r0, q0, uxtw #1] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_f32(<4 x i32> %addr, <4 x float> %value) { |
| ; CHECK-LABEL: test_vstrwq_scatter_base_f32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrw.32 q1, [q0, #380] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32> %addr, i32 380, <4 x float> %value) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32>, i32, <4 x float>) |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_f32(<4 x i32> %addr, <4 x float> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrwq_scatter_base_p_f32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrwt.32 q1, [q0, #-400] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4f32.v4i1(<4 x i32> %addr, i32 -400, <4 x float> %value, <4 x i1> %1) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4f32.v4i1(<4 x i32>, i32, <4 x float>, <4 x i1>) |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_s32(<4 x i32> %addr, <4 x i32> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrwq_scatter_base_p_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrwt.32 q1, [q0, #48] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 48, <4 x i32> %value, <4 x i1> %1) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i32>, <4 x i1>) |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_u32(<4 x i32> %addr, <4 x i32> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrwq_scatter_base_p_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r0 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrwt.32 q1, [q0, #-376] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 -376, <4 x i32> %value, <4 x i1> %1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_s32(<4 x i32> %addr, <4 x i32> %value) { |
| ; CHECK-LABEL: test_vstrwq_scatter_base_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrw.32 q1, [q0, #156] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> %addr, i32 156, <4 x i32> %value) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32>, i32, <4 x i32>) |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_u32(<4 x i32> %addr, <4 x i32> %value) { |
| ; CHECK-LABEL: test_vstrwq_scatter_base_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrw.32 q1, [q0, #212] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> %addr, i32 212, <4 x i32> %value) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_f32(<4 x i32>* %addr, <4 x float> %value) { |
| ; CHECK-LABEL: test_vstrwq_scatter_base_wb_f32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [r0] |
| ; CHECK-NEXT: vstrw.32 q0, [q1, #-412]! |
| ; CHECK-NEXT: vstrw.32 q1, [r0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = load <4 x i32>, <4 x i32>* %addr, align 8 |
| %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4f32(<4 x i32> %0, i32 -412, <4 x float> %value) |
| store <4 x i32> %1, <4 x i32>* %addr, align 8 |
| ret void |
| } |
| |
| declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4f32(<4 x i32>, i32, <4 x float>) |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_f32(<4 x i32>* %addr, <4 x float> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_f32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [r0] |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrwt.32 q0, [q1, #236]! |
| ; CHECK-NEXT: vstrw.32 q1, [r0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = load <4 x i32>, <4 x i32>* %addr, align 8 |
| %1 = zext i16 %p to i32 |
| %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) |
| %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4f32.v4i1(<4 x i32> %0, i32 236, <4 x float> %value, <4 x i1> %2) |
| store <4 x i32> %3, <4 x i32>* %addr, align 8 |
| ret void |
| } |
| |
| declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4f32.v4i1(<4 x i32>, i32, <4 x float>, <4 x i1>) |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_s32(<4 x i32>* %addr, <4 x i32> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [r0] |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrwt.32 q0, [q1, #328]! |
| ; CHECK-NEXT: vstrw.32 q1, [r0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = load <4 x i32>, <4 x i32>* %addr, align 8 |
| %1 = zext i16 %p to i32 |
| %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) |
| %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 328, <4 x i32> %value, <4 x i1> %2) |
| store <4 x i32> %3, <4 x i32>* %addr, align 8 |
| ret void |
| } |
| |
| declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i32>, <4 x i1>) |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_u32(<4 x i32>* %addr, <4 x i32> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [r0] |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrwt.32 q0, [q1, #412]! |
| ; CHECK-NEXT: vstrw.32 q1, [r0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = load <4 x i32>, <4 x i32>* %addr, align 8 |
| %1 = zext i16 %p to i32 |
| %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) |
| %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 412, <4 x i32> %value, <4 x i1> %2) |
| store <4 x i32> %3, <4 x i32>* %addr, align 8 |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_s32(<4 x i32>* %addr, <4 x i32> %value) { |
| ; CHECK-LABEL: test_vstrwq_scatter_base_wb_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [r0] |
| ; CHECK-NEXT: vstrw.32 q0, [q1, #-152]! |
| ; CHECK-NEXT: vstrw.32 q1, [r0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = load <4 x i32>, <4 x i32>* %addr, align 8 |
| %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> %0, i32 -152, <4 x i32> %value) |
| store <4 x i32> %1, <4 x i32>* %addr, align 8 |
| ret void |
| } |
| |
| declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32>, i32, <4 x i32>) |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_u32(<4 x i32>* %addr, <4 x i32> %value) { |
| ; CHECK-LABEL: test_vstrwq_scatter_base_wb_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vldrw.u32 q1, [r0] |
| ; CHECK-NEXT: vstrw.32 q0, [q1, #64]! |
| ; CHECK-NEXT: vstrw.32 q1, [r0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = load <4 x i32>, <4 x i32>* %addr, align 8 |
| %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> %0, i32 64, <4 x i32> %value) |
| store <4 x i32> %1, <4 x i32>* %addr, align 8 |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_f32(float* %base, <4 x i32> %offset, <4 x float> %value) { |
| ; CHECK-LABEL: test_vstrwq_scatter_offset_f32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrw.32 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 0) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float*, <4 x i32>, <4 x float>, i32, i32) |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_f32(float* %base, <4 x i32> %offset, <4 x float> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrwq_scatter_offset_p_f32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrwt.32 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 0, <4 x i1> %1) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float*, <4 x i32>, <4 x float>, i32, i32, <4 x i1>) |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrwq_scatter_offset_p_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrwt.32 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0, <4 x i1> %1) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32*, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>) |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrwq_scatter_offset_p_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrwt.32 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0, <4 x i1> %1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value) { |
| ; CHECK-LABEL: test_vstrwq_scatter_offset_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrw.32 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0) |
| ret void |
| } |
| |
| declare void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32*, <4 x i32>, <4 x i32>, i32, i32) |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value) { |
| ; CHECK-LABEL: test_vstrwq_scatter_offset_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrw.32 q1, [r0, q0] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_f32(float* %base, <4 x i32> %offset, <4 x float> %value) { |
| ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_f32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 2) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_f32(float* %base, <4 x i32> %offset, <4 x float> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_f32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 2, <4 x i1> %1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2, <4 x i1> %1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vmsr p0, r1 |
| ; CHECK-NEXT: vpst |
| ; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2] |
| ; CHECK-NEXT: bx lr |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2, <4 x i1> %1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value) { |
| ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_s32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value) { |
| ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_u32: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2] |
| ; CHECK-NEXT: bx lr |
| entry: |
| call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2) |
| ret void |
| } |