| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt -S -o - %s | FileCheck %s |
| |
| declare <4 x i1> @llvm.arm.mve.vctp64(i32) |
| declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) |
| declare i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1>) |
| declare <2 x i64> @llvm.arm.mve.mull.int.predicated.v2i64.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, i32, <4 x i1>, <2 x i64>) |
| declare <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <2 x i64>) |
| |
| declare <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>) |
| declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>) |
| declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1(ptr, <2 x i64>, i32, i32, i32, <4 x i1>) |
| declare void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>) |
| declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>) |
| declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1(ptr, <2 x i64>, <2 x i64>, i32, i32, <4 x i1>) |
| |
| declare <2 x i64> @llvm.arm.cde.vcx1q.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, i32 immarg, <4 x i1>) |
| declare <2 x i64> @llvm.arm.cde.vcx1qa.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, i32 immarg, <4 x i1>) |
| declare <2 x i64> @llvm.arm.cde.vcx2q.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, i32 immarg, <4 x i1>) |
| declare <2 x i64> @llvm.arm.cde.vcx2qa.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, i32 immarg, <4 x i1>) |
| declare <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, <16 x i8>, i32 immarg, <4 x i1>) |
| declare <2 x i64> @llvm.arm.cde.vcx3qa.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, <16 x i8>, i32 immarg, <4 x i1>) |
| |
| define arm_aapcs_vfpcc zeroext i16 @test_vctp64q(i32 %a) { |
| ; CHECK-LABEL: @test_vctp64q( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = call <2 x i1> @llvm.arm.mve.vctp64(i32 [[A:%.*]]) |
| ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v2i1(<2 x i1> [[TMP0]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) |
| ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]]) |
| ; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 |
| ; CHECK-NEXT: ret i16 [[TMP4]] |
| ; |
| entry: |
| %0 = call <4 x i1> @llvm.arm.mve.vctp64(i32 %a) |
| %1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %0) |
| %2 = trunc i32 %1 to i16 |
| ret i16 %2 |
| } |
| |
| define arm_aapcs_vfpcc zeroext i16 @test_vctp64q_m(i32 %a, i16 zeroext %p) { |
| ; CHECK-LABEL: @test_vctp64q_m( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i1> @llvm.arm.mve.vctp64(i32 [[A:%.*]]) |
| ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v2i1(<2 x i1> [[TMP2]]) |
| ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]]) |
| ; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP1]], [[TMP4]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP5]]) |
| ; CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16 |
| ; CHECK-NEXT: ret i16 [[TMP7]] |
| ; |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <4 x i1> @llvm.arm.mve.vctp64(i32 %a) |
| %3 = and <4 x i1> %1, %2 |
| %4 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %3) |
| %5 = trunc i32 %4 to i16 |
| ret i16 %5 |
| } |
| |
| define arm_aapcs_vfpcc <2 x i64> @test_vmullbq_int_m_s32(<2 x i64> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { |
| ; CHECK-LABEL: @test_vmullbq_int_m_s32( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 |
| ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) |
| ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.mve.mull.int.predicated.v2i64.v4i32.v2i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, i32 0, <2 x i1> [[TMP3]], <2 x i64> [[INACTIVE:%.*]]) |
| ; CHECK-NEXT: ret <2 x i64> [[TMP4]] |
| ; |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = tail call <2 x i64> @llvm.arm.mve.mull.int.predicated.v2i64.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, i32 0, <4 x i1> %1, <2 x i64> %inactive) |
| ret <2 x i64> %2 |
| } |
| |
| define arm_aapcs_vfpcc <2 x i64> @test_vqdmullbq_m_s32(<2 x i64> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { |
| ; CHECK-LABEL: @test_vqdmullbq_m_s32( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) |
| ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v2i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <2 x i1> [[TMP3]], <2 x i64> [[INACTIVE:%.*]]) |
| ; CHECK-NEXT: ret <2 x i64> [[TMP4]] |
| ; |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, <4 x i1> %1, <2 x i64> %inactive) |
| ret <2 x i64> %2 |
| } |
| |
| define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_s64(<2 x i64> %addr, i16 zeroext %p) { |
| ; CHECK-LABEL: @test_vldrdq_gather_base_z_s64( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) |
| ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v2i1(<2 x i64> [[ADDR:%.*]], i32 888, <2 x i1> [[TMP3]]) |
| ; CHECK-NEXT: ret <2 x i64> [[TMP4]] |
| ; |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <4 x i1> %1) |
| ret <2 x i64> %2 |
| } |
| |
| define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_s64(ptr %addr, i16 zeroext %p) { |
| ; CHECK-LABEL: @test_vldrdq_gather_base_wb_z_s64( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[ADDR:%.*]], align 8 |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 |
| ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) |
| ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]]) |
| ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]]) |
| ; CHECK-NEXT: [[TMP5:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> [[TMP0]], i32 664, <2 x i1> [[TMP4]]) |
| ; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP5]], 1 |
| ; CHECK-NEXT: store <2 x i64> [[TMP6]], ptr [[ADDR]], align 8 |
| ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP5]], 0 |
| ; CHECK-NEXT: ret <2 x i64> [[TMP7]] |
| ; |
| entry: |
| %0 = load <2 x i64>, ptr %addr, align 8 |
| %1 = zext i16 %p to i32 |
| %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) |
| %3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 664, <4 x i1> %2) |
| %4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1 |
| store <2 x i64> %4, ptr %addr, align 8 |
| %5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0 |
| ret <2 x i64> %5 |
| } |
| |
| define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_s64(ptr %base, <2 x i64> %offset, i16 zeroext %p) { |
| ; CHECK-LABEL: @test_vldrdq_gather_offset_z_s64( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) |
| ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v2i1(ptr [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], i32 64, i32 0, i32 0, <2 x i1> [[TMP3]]) |
| ; CHECK-NEXT: ret <2 x i64> [[TMP4]] |
| ; |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1(ptr %base, <2 x i64> %offset, i32 64, i32 0, i32 0, <4 x i1> %1) |
| ret <2 x i64> %2 |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_s64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: @test_vstrdq_scatter_base_p_s64( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) |
| ; CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v2i1(<2 x i64> [[ADDR:%.*]], i32 888, <2 x i64> [[VALUE:%.*]], <2 x i1> [[TMP3]]) |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <2 x i64> %value, <4 x i1> %1) |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_s64(ptr %addr, <2 x i64> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: @test_vstrdq_scatter_base_wb_p_s64( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[ADDR:%.*]], align 8 |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 |
| ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) |
| ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]]) |
| ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]]) |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> [[TMP0]], i32 248, <2 x i64> [[VALUE:%.*]], <2 x i1> [[TMP4]]) |
| ; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr [[ADDR]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %0 = load <2 x i64>, ptr %addr, align 8 |
| %1 = zext i16 %p to i32 |
| %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) |
| %3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 248, <2 x i64> %value, <4 x i1> %2) |
| store <2 x i64> %3, ptr %addr, align 8 |
| ret void |
| } |
| |
| define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_s64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) { |
| ; CHECK-LABEL: @test_vstrdq_scatter_offset_p_s64( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) |
| ; CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v2i1(ptr [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], <2 x i64> [[VALUE:%.*]], i32 64, i32 0, <2 x i1> [[TMP3]]) |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <4 x i1> %1) |
| ret void |
| } |
| |
| define <8 x i16> @test_vcx1q_m(<2 x i64> %inactive, i16 zeroext %p) { |
| ; CHECK-LABEL: @test_vcx1q_m( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 |
| ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) |
| ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.cde.vcx1q.predicated.v2i64.v2i1(i32 0, <2 x i64> [[INACTIVE:%.*]], i32 1111, <2 x i1> [[TMP3]]) |
| ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <8 x i16> |
| ; CHECK-NEXT: ret <8 x i16> [[TMP5]] |
| ; |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = tail call <2 x i64> @llvm.arm.cde.vcx1q.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, i32 1111, <4 x i1> %1) |
| %3 = bitcast <2 x i64> %2 to <8 x i16> |
| ret <8 x i16> %3 |
| } |
| |
| define <16 x i8> @test_vcx1qa_m(<2 x i64> %acc, i16 zeroext %p) { |
| ; CHECK-LABEL: @test_vcx1qa_m( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 |
| ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) |
| ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.cde.vcx1qa.predicated.v2i64.v2i1(i32 0, <2 x i64> [[ACC:%.*]], i32 1112, <2 x i1> [[TMP3]]) |
| ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> |
| ; CHECK-NEXT: ret <16 x i8> [[TMP5]] |
| ; |
| entry: |
| %0 = zext i16 %p to i32 |
| %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) |
| %2 = tail call <2 x i64> @llvm.arm.cde.vcx1qa.predicated.v2i64.v4i1(i32 0, <2 x i64> %acc, i32 1112, <4 x i1> %1) |
| %3 = bitcast <2 x i64> %2 to <16 x i8> |
| ret <16 x i8> %3 |
| } |
| |
| define <4 x i32> @test_vcx2q_m(<2 x i64> %inactive, <4 x float> %n, i16 zeroext %p) { |
| ; CHECK-LABEL: @test_vcx2q_m( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[N:%.*]] to <16 x i8> |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 |
| ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) |
| ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]]) |
| ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]]) |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.arm.cde.vcx2q.predicated.v2i64.v2i1(i32 0, <2 x i64> [[INACTIVE:%.*]], <16 x i8> [[TMP0]], i32 111, <2 x i1> [[TMP4]]) |
| ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <4 x i32> |
| ; CHECK-NEXT: ret <4 x i32> [[TMP6]] |
| ; |
| entry: |
| %0 = bitcast <4 x float> %n to <16 x i8> |
| %1 = zext i16 %p to i32 |
| %2 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) |
| %3 = tail call <2 x i64> @llvm.arm.cde.vcx2q.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, <16 x i8> %0, i32 111, <4 x i1> %2) |
| %4 = bitcast <2 x i64> %3 to <4 x i32> |
| ret <4 x i32> %4 |
| } |
| |
| define <4 x float> @test_vcx2qa_m(<2 x i64> %acc, <8 x half> %n, i16 zeroext %p) { |
| ; CHECK-LABEL: @test_vcx2qa_m( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[N:%.*]] to <16 x i8> |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 |
| ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) |
| ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]]) |
| ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]]) |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.arm.cde.vcx2qa.predicated.v2i64.v2i1(i32 0, <2 x i64> [[ACC:%.*]], <16 x i8> [[TMP0]], i32 112, <2 x i1> [[TMP4]]) |
| ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <4 x float> |
| ; CHECK-NEXT: ret <4 x float> [[TMP6]] |
| ; |
| entry: |
| %0 = bitcast <8 x half> %n to <16 x i8> |
| %1 = zext i16 %p to i32 |
| %2 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) |
| %3 = tail call <2 x i64> @llvm.arm.cde.vcx2qa.predicated.v2i64.v4i1(i32 0, <2 x i64> %acc, <16 x i8> %0, i32 112, <4 x i1> %2) |
| %4 = bitcast <2 x i64> %3 to <4 x float> |
| ret <4 x float> %4 |
| } |
| |
| define <2 x i64> @test_vcx3q_m(<2 x i64> %inactive, <4 x float> %n, <16 x i8> %m, i16 zeroext %p) { |
| ; CHECK-LABEL: @test_vcx3q_m( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[N:%.*]] to <16 x i8> |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 |
| ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) |
| ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]]) |
| ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]]) |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v2i1(i32 0, <2 x i64> [[INACTIVE:%.*]], <16 x i8> [[TMP0]], <16 x i8> [[M:%.*]], i32 11, <2 x i1> [[TMP4]]) |
| ; CHECK-NEXT: ret <2 x i64> [[TMP5]] |
| ; |
| entry: |
| %0 = bitcast <4 x float> %n to <16 x i8> |
| %1 = zext i16 %p to i32 |
| %2 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) |
| %3 = tail call <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, <16 x i8> %0, <16 x i8> %m, i32 11, <4 x i1> %2) |
| ret <2 x i64> %3 |
| } |
| |
| define <8 x half> @test_vcx3qa_m(<2 x i64> %inactive, <8 x half> %n, <4 x i32> %m, i16 zeroext %p) { |
| ; CHECK-LABEL: @test_vcx3qa_m( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[N:%.*]] to <16 x i8> |
| ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[M:%.*]] to <16 x i8> |
| ; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 |
| ; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]]) |
| ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]]) |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP4]]) |
| ; CHECK-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.arm.cde.vcx3qa.predicated.v2i64.v2i1(i32 0, <2 x i64> [[INACTIVE:%.*]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 12, <2 x i1> [[TMP5]]) |
| ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <8 x half> |
| ; CHECK-NEXT: ret <8 x half> [[TMP7]] |
| ; |
| entry: |
| %0 = bitcast <8 x half> %n to <16 x i8> |
| %1 = bitcast <4 x i32> %m to <16 x i8> |
| %2 = zext i16 %p to i32 |
| %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2) |
| %4 = tail call <2 x i64> @llvm.arm.cde.vcx3qa.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, <16 x i8> %0, <16 x i8> %1, i32 12, <4 x i1> %3) |
| %5 = bitcast <2 x i64> %4 to <8 x half> |
| ret <8 x half> %5 |
| } |