llvm/test/CodeGen/Thumb2/cde-vec.ll - llvm-project - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+cdecp0 -mattr=+cdecp1 -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s

 declare <16 x i8> @llvm.arm.cde.vcx1q(i32 immarg, i32 immarg)
 declare <16 x i8> @llvm.arm.cde.vcx1qa(i32 immarg, <16 x i8>, i32 immarg)
 declare <16 x i8> @llvm.arm.cde.vcx2q(i32 immarg, <16 x i8>, i32 immarg)
 declare <16 x i8> @llvm.arm.cde.vcx2qa(i32 immarg, <16 x i8>, <16 x i8>, i32 immarg)
 declare <16 x i8> @llvm.arm.cde.vcx3q(i32 immarg, <16 x i8>, <16 x i8>, i32 immarg)
 declare <16 x i8> @llvm.arm.cde.vcx3qa(i32 immarg, <16 x i8>, <16 x i8>, <16 x i8>, i32 immarg)

 define arm_aapcs_vfpcc <16 x i8> @test_vcx1q_u8() {
 ; CHECK-LABEL: test_vcx1q_u8:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vcx1 p0, q0, #1111
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = call <16 x i8> @llvm.arm.cde.vcx1q(i32 0, i32 1111)
   ret <16 x i8> %0
 }

 define arm_aapcs_vfpcc <16 x i8> @test_vcx1qa_1(<16 x i8> %acc) {
 ; CHECK-LABEL: test_vcx1qa_1:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vcx1a p1, q0, #1112
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = call <16 x i8> @llvm.arm.cde.vcx1qa(i32 1, <16 x i8> %acc, i32 1112)
   ret <16 x i8> %0
 }

 define arm_aapcs_vfpcc <4 x i32> @test_vcx1qa_2(<4 x i32> %acc) {
 ; CHECK-LABEL: test_vcx1qa_2:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vcx1a p0, q0, #1113
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = bitcast <4 x i32> %acc to <16 x i8>
   %1 = call <16 x i8> @llvm.arm.cde.vcx1qa(i32 0, <16 x i8> %0, i32 1113)
   %2 = bitcast <16 x i8> %1 to <4 x i32>
   ret <4 x i32> %2
 }

 define arm_aapcs_vfpcc <16 x i8> @test_vcx2q_u8(<8 x half> %n) {
 ; CHECK-LABEL: test_vcx2q_u8:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vcx2 p1, q0, q0, #111
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = bitcast <8 x half> %n to <16 x i8>
   %1 = call <16 x i8> @llvm.arm.cde.vcx2q(i32 1, <16 x i8> %0, i32 111)
   ret <16 x i8> %1
 }

 define arm_aapcs_vfpcc <4 x float> @test_vcx2q(<4 x float> %n) {
 ; CHECK-LABEL: test_vcx2q:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vcx2 p1, q0, q0, #112
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = bitcast <4 x float> %n to <16 x i8>
   %1 = call <16 x i8> @llvm.arm.cde.vcx2q(i32 1, <16 x i8> %0, i32 112)
   %2 = bitcast <16 x i8> %1 to <4 x float>
   ret <4 x float> %2
 }

 define arm_aapcs_vfpcc <4 x float> @test_vcx2qa(<4 x float> %acc, <2 x i64> %n) {
 ; CHECK-LABEL: test_vcx2qa:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vcx2a p0, q0, q1, #113
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = bitcast <4 x float> %acc to <16 x i8>
   %1 = bitcast <2 x i64> %n to <16 x i8>
   %2 = call <16 x i8> @llvm.arm.cde.vcx2qa(i32 0, <16 x i8> %0, <16 x i8> %1, i32 113)
   %3 = bitcast <16 x i8> %2 to <4 x float>
   ret <4 x float> %3
 }

 define arm_aapcs_vfpcc <16 x i8> @test_vcx3q_u8(<8 x i16> %n, <4 x i32> %m) {
 ; CHECK-LABEL: test_vcx3q_u8:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vcx3 p0, q0, q0, q1, #11
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = bitcast <8 x i16> %n to <16 x i8>
   %1 = bitcast <4 x i32> %m to <16 x i8>
   %2 = call <16 x i8> @llvm.arm.cde.vcx3q(i32 0, <16 x i8> %0, <16 x i8> %1, i32 11)
   ret <16 x i8> %2
 }

 define arm_aapcs_vfpcc <2 x i64> @test_vcx3q(<2 x i64> %n, <4 x float> %m) {
 ; CHECK-LABEL: test_vcx3q:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vcx3 p1, q0, q0, q1, #12
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = bitcast <2 x i64> %n to <16 x i8>
   %1 = bitcast <4 x float> %m to <16 x i8>
   %2 = call <16 x i8> @llvm.arm.cde.vcx3q(i32 1, <16 x i8> %0, <16 x i8> %1, i32 12)
   %3 = bitcast <16 x i8> %2 to <2 x i64>
   ret <2 x i64> %3
 }

 define arm_aapcs_vfpcc <16 x i8> @test_vcx3qa(<16 x i8> %acc, <8 x i16> %n, <4 x float> %m) {
 ; CHECK-LABEL: test_vcx3qa:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vcx3a p1, q0, q1, q2, #13
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = bitcast <8 x i16> %n to <16 x i8>
   %1 = bitcast <4 x float> %m to <16 x i8>
   %2 = call <16 x i8> @llvm.arm.cde.vcx3qa(i32 1, <16 x i8> %acc, <16 x i8> %0, <16 x i8> %1, i32 13)
   ret <16 x i8> %2
 }

 declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
 declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
 declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
 declare <8 x i16> @llvm.arm.cde.vcx1q.predicated.v8i16.v8i1(i32 immarg, <8 x i16>, i32 immarg, <8 x i1>)
 declare <16 x i8> @llvm.arm.cde.vcx1qa.predicated.v16i8.v16i1(i32 immarg, <16 x i8>, i32 immarg, <16 x i1>)
 declare <4 x i32> @llvm.arm.cde.vcx2q.predicated.v4i32.v4i1(i32 immarg, <4 x i32>, <16 x i8>, i32 immarg, <4 x i1>)
 declare <4 x float> @llvm.arm.cde.vcx2qa.predicated.v4f32.v4i1(i32 immarg, <4 x float>, <16 x i8>, i32 immarg, <4 x i1>)
 declare <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, <16 x i8>, i32 immarg, <4 x i1>)
 declare <4 x float> @llvm.arm.cde.vcx3qa.predicated.v4f32.v4i1(i32 immarg, <4 x float>, <16 x i8>, <16 x i8>, i32 immarg, <4 x i1>)

 define arm_aapcs_vfpcc <8 x i16> @test_vcx1q_m(<8 x i16> %inactive, i16 zeroext %p) {
 ; CHECK-LABEL: test_vcx1q_m:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmsr p0, r0
 ; CHECK-NEXT:    vpst
 ; CHECK-NEXT:    vcx1t p0, q0, #1111
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = zext i16 %p to i32
   %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
   %2 = call <8 x i16> @llvm.arm.cde.vcx1q.predicated.v8i16.v8i1(i32 0, <8 x i16> %inactive, i32 1111, <8 x i1> %1)
   ret <8 x i16> %2
 }

 define arm_aapcs_vfpcc <16 x i8> @test_vcx1qa_m(<16 x i8> %acc, i16 zeroext %p) {
 ; CHECK-LABEL: test_vcx1qa_m:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmsr p0, r0
 ; CHECK-NEXT:    vpst
 ; CHECK-NEXT:    vcx1at p1, q0, #1112
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = zext i16 %p to i32
   %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
   %2 = call <16 x i8> @llvm.arm.cde.vcx1qa.predicated.v16i8.v16i1(i32 1, <16 x i8> %acc, i32 1112, <16 x i1> %1)
   ret <16 x i8> %2
 }

 define arm_aapcs_vfpcc <4 x i32> @test_vcx2q_m(<4 x i32> %inactive, <4 x float> %n, i16 zeroext %p) {
 ; CHECK-LABEL: test_vcx2q_m:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmsr p0, r0
 ; CHECK-NEXT:    vpst
 ; CHECK-NEXT:    vcx2t p0, q0, q1, #111
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = bitcast <4 x float> %n to <16 x i8>
   %1 = zext i16 %p to i32
   %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
   %3 = call <4 x i32> @llvm.arm.cde.vcx2q.predicated.v4i32.v4i1(i32 0, <4 x i32> %inactive, <16 x i8> %0, i32 111, <4 x i1> %2)
   ret <4 x i32> %3
 }

 define arm_aapcs_vfpcc <4 x float> @test_vcx2qa_m(<4 x float> %acc, <8 x half> %n, i16 zeroext %p) {
 ; CHECK-LABEL: test_vcx2qa_m:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmsr p0, r0
 ; CHECK-NEXT:    vpst
 ; CHECK-NEXT:    vcx2at p0, q0, q1, #112
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = bitcast <8 x half> %n to <16 x i8>
   %1 = zext i16 %p to i32
   %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
   %3 = call <4 x float> @llvm.arm.cde.vcx2qa.predicated.v4f32.v4i1(i32 0, <4 x float> %acc, <16 x i8> %0, i32 112, <4 x i1> %2)
   ret <4 x float> %3
 }

 define arm_aapcs_vfpcc <2 x i64> @test_vcx3q_m(<2 x i64> %inactive, <4 x float> %n, <16 x i8> %m, i16 zeroext %p) {
 ; CHECK-LABEL: test_vcx3q_m:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmsr p0, r0
 ; CHECK-NEXT:    vpst
 ; CHECK-NEXT:    vcx3t p0, q0, q1, q2, #11
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = bitcast <4 x float> %n to <16 x i8>
   %1 = zext i16 %p to i32
   %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
   %3 = call <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, <16 x i8> %0, <16 x i8> %m, i32 11, <4 x i1> %2)
   ret <2 x i64> %3
 }

 define arm_aapcs_vfpcc <8 x half> @test_vcx3qa_m(<4 x float> %inactive, <8 x half> %n, <4 x i32> %m, i16 zeroext %p) {
 ; CHECK-LABEL: test_vcx3qa_m:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmsr p0, r0
 ; CHECK-NEXT:    vpst
 ; CHECK-NEXT:    vcx3at p0, q0, q1, q2, #12
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = bitcast <8 x half> %n to <16 x i8>
   %1 = bitcast <4 x i32> %m to <16 x i8>
   %2 = zext i16 %p to i32
   %3 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2)
   %4 = call <4 x float> @llvm.arm.cde.vcx3qa.predicated.v4f32.v4i1(i32 0, <4 x float> %inactive, <16 x i8> %0, <16 x i8> %1, i32 12, <4 x i1> %3)
   %5 = bitcast <4 x float> %4 to <8 x half>
   ret <8 x half> %5
 }
	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -mtriple=thumbv8.1m.main -mattr=+cdecp0 -mattr=+cdecp1 -mattr=+mve.fp -verify-machineinstrs -o - %s \| FileCheck %s

	declare <16 x i8> @llvm.arm.cde.vcx1q(i32 immarg, i32 immarg)
	declare <16 x i8> @llvm.arm.cde.vcx1qa(i32 immarg, <16 x i8>, i32 immarg)
	declare <16 x i8> @llvm.arm.cde.vcx2q(i32 immarg, <16 x i8>, i32 immarg)
	declare <16 x i8> @llvm.arm.cde.vcx2qa(i32 immarg, <16 x i8>, <16 x i8>, i32 immarg)
	declare <16 x i8> @llvm.arm.cde.vcx3q(i32 immarg, <16 x i8>, <16 x i8>, i32 immarg)
	declare <16 x i8> @llvm.arm.cde.vcx3qa(i32 immarg, <16 x i8>, <16 x i8>, <16 x i8>, i32 immarg)

	define arm_aapcs_vfpcc <16 x i8> @test_vcx1q_u8() {
	; CHECK-LABEL: test_vcx1q_u8:
	; CHECK: @ %bb.0: @ %entry
	; CHECK-NEXT: vcx1 p0, q0, #1111
	; CHECK-NEXT: bx lr
	entry:
	%0 = call <16 x i8> @llvm.arm.cde.vcx1q(i32 0, i32 1111)
	ret <16 x i8> %0
	}

	define arm_aapcs_vfpcc <16 x i8> @test_vcx1qa_1(<16 x i8> %acc) {
	; CHECK-LABEL: test_vcx1qa_1:
	; CHECK: @ %bb.0: @ %entry
	; CHECK-NEXT: vcx1a p1, q0, #1112
	; CHECK-NEXT: bx lr
	entry:
	%0 = call <16 x i8> @llvm.arm.cde.vcx1qa(i32 1, <16 x i8> %acc, i32 1112)
	ret <16 x i8> %0
	}

	define arm_aapcs_vfpcc <4 x i32> @test_vcx1qa_2(<4 x i32> %acc) {
	; CHECK-LABEL: test_vcx1qa_2:
	; CHECK: @ %bb.0: @ %entry
	; CHECK-NEXT: vcx1a p0, q0, #1113
	; CHECK-NEXT: bx lr
	entry:
	%0 = bitcast <4 x i32> %acc to <16 x i8>
	%1 = call <16 x i8> @llvm.arm.cde.vcx1qa(i32 0, <16 x i8> %0, i32 1113)
	%2 = bitcast <16 x i8> %1 to <4 x i32>
	ret <4 x i32> %2
	}

	define arm_aapcs_vfpcc <16 x i8> @test_vcx2q_u8(<8 x half> %n) {
	; CHECK-LABEL: test_vcx2q_u8:
	; CHECK: @ %bb.0: @ %entry
	; CHECK-NEXT: vcx2 p1, q0, q0, #111
	; CHECK-NEXT: bx lr
	entry:
	%0 = bitcast <8 x half> %n to <16 x i8>
	%1 = call <16 x i8> @llvm.arm.cde.vcx2q(i32 1, <16 x i8> %0, i32 111)
	ret <16 x i8> %1
	}

	define arm_aapcs_vfpcc <4 x float> @test_vcx2q(<4 x float> %n) {
	; CHECK-LABEL: test_vcx2q:
	; CHECK: @ %bb.0: @ %entry
	; CHECK-NEXT: vcx2 p1, q0, q0, #112
	; CHECK-NEXT: bx lr
	entry:
	%0 = bitcast <4 x float> %n to <16 x i8>
	%1 = call <16 x i8> @llvm.arm.cde.vcx2q(i32 1, <16 x i8> %0, i32 112)
	%2 = bitcast <16 x i8> %1 to <4 x float>
	ret <4 x float> %2
	}

	define arm_aapcs_vfpcc <4 x float> @test_vcx2qa(<4 x float> %acc, <2 x i64> %n) {
	; CHECK-LABEL: test_vcx2qa:
	; CHECK: @ %bb.0: @ %entry
	; CHECK-NEXT: vcx2a p0, q0, q1, #113
	; CHECK-NEXT: bx lr
	entry:
	%0 = bitcast <4 x float> %acc to <16 x i8>
	%1 = bitcast <2 x i64> %n to <16 x i8>
	%2 = call <16 x i8> @llvm.arm.cde.vcx2qa(i32 0, <16 x i8> %0, <16 x i8> %1, i32 113)
	%3 = bitcast <16 x i8> %2 to <4 x float>
	ret <4 x float> %3
	}

	define arm_aapcs_vfpcc <16 x i8> @test_vcx3q_u8(<8 x i16> %n, <4 x i32> %m) {
	; CHECK-LABEL: test_vcx3q_u8:
	; CHECK: @ %bb.0: @ %entry
	; CHECK-NEXT: vcx3 p0, q0, q0, q1, #11
	; CHECK-NEXT: bx lr
	entry:
	%0 = bitcast <8 x i16> %n to <16 x i8>
	%1 = bitcast <4 x i32> %m to <16 x i8>
	%2 = call <16 x i8> @llvm.arm.cde.vcx3q(i32 0, <16 x i8> %0, <16 x i8> %1, i32 11)
	ret <16 x i8> %2
	}

	define arm_aapcs_vfpcc <2 x i64> @test_vcx3q(<2 x i64> %n, <4 x float> %m) {
	; CHECK-LABEL: test_vcx3q:
	; CHECK: @ %bb.0: @ %entry
	; CHECK-NEXT: vcx3 p1, q0, q0, q1, #12
	; CHECK-NEXT: bx lr
	entry:
	%0 = bitcast <2 x i64> %n to <16 x i8>
	%1 = bitcast <4 x float> %m to <16 x i8>
	%2 = call <16 x i8> @llvm.arm.cde.vcx3q(i32 1, <16 x i8> %0, <16 x i8> %1, i32 12)
	%3 = bitcast <16 x i8> %2 to <2 x i64>
	ret <2 x i64> %3
	}

	define arm_aapcs_vfpcc <16 x i8> @test_vcx3qa(<16 x i8> %acc, <8 x i16> %n, <4 x float> %m) {
	; CHECK-LABEL: test_vcx3qa:
	; CHECK: @ %bb.0: @ %entry
	; CHECK-NEXT: vcx3a p1, q0, q1, q2, #13
	; CHECK-NEXT: bx lr
	entry:
	%0 = bitcast <8 x i16> %n to <16 x i8>
	%1 = bitcast <4 x float> %m to <16 x i8>
	%2 = call <16 x i8> @llvm.arm.cde.vcx3qa(i32 1, <16 x i8> %acc, <16 x i8> %0, <16 x i8> %1, i32 13)
	ret <16 x i8> %2
	}

	declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
	declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
	declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
	declare <8 x i16> @llvm.arm.cde.vcx1q.predicated.v8i16.v8i1(i32 immarg, <8 x i16>, i32 immarg, <8 x i1>)
	declare <16 x i8> @llvm.arm.cde.vcx1qa.predicated.v16i8.v16i1(i32 immarg, <16 x i8>, i32 immarg, <16 x i1>)
	declare <4 x i32> @llvm.arm.cde.vcx2q.predicated.v4i32.v4i1(i32 immarg, <4 x i32>, <16 x i8>, i32 immarg, <4 x i1>)
	declare <4 x float> @llvm.arm.cde.vcx2qa.predicated.v4f32.v4i1(i32 immarg, <4 x float>, <16 x i8>, i32 immarg, <4 x i1>)
	declare <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, <16 x i8>, i32 immarg, <4 x i1>)
	declare <4 x float> @llvm.arm.cde.vcx3qa.predicated.v4f32.v4i1(i32 immarg, <4 x float>, <16 x i8>, <16 x i8>, i32 immarg, <4 x i1>)

	define arm_aapcs_vfpcc <8 x i16> @test_vcx1q_m(<8 x i16> %inactive, i16 zeroext %p) {
	; CHECK-LABEL: test_vcx1q_m:
	; CHECK: @ %bb.0: @ %entry
	; CHECK-NEXT: vmsr p0, r0
	; CHECK-NEXT: vpst
	; CHECK-NEXT: vcx1t p0, q0, #1111
	; CHECK-NEXT: bx lr
	entry:
	%0 = zext i16 %p to i32
	%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
	%2 = call <8 x i16> @llvm.arm.cde.vcx1q.predicated.v8i16.v8i1(i32 0, <8 x i16> %inactive, i32 1111, <8 x i1> %1)
	ret <8 x i16> %2
	}

	define arm_aapcs_vfpcc <16 x i8> @test_vcx1qa_m(<16 x i8> %acc, i16 zeroext %p) {
	; CHECK-LABEL: test_vcx1qa_m:
	; CHECK: @ %bb.0: @ %entry
	; CHECK-NEXT: vmsr p0, r0
	; CHECK-NEXT: vpst
	; CHECK-NEXT: vcx1at p1, q0, #1112
	; CHECK-NEXT: bx lr
	entry:
	%0 = zext i16 %p to i32
	%1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
	%2 = call <16 x i8> @llvm.arm.cde.vcx1qa.predicated.v16i8.v16i1(i32 1, <16 x i8> %acc, i32 1112, <16 x i1> %1)
	ret <16 x i8> %2
	}

	define arm_aapcs_vfpcc <4 x i32> @test_vcx2q_m(<4 x i32> %inactive, <4 x float> %n, i16 zeroext %p) {
	; CHECK-LABEL: test_vcx2q_m:
	; CHECK: @ %bb.0: @ %entry
	; CHECK-NEXT: vmsr p0, r0
	; CHECK-NEXT: vpst
	; CHECK-NEXT: vcx2t p0, q0, q1, #111
	; CHECK-NEXT: bx lr
	entry:
	%0 = bitcast <4 x float> %n to <16 x i8>
	%1 = zext i16 %p to i32
	%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
	%3 = call <4 x i32> @llvm.arm.cde.vcx2q.predicated.v4i32.v4i1(i32 0, <4 x i32> %inactive, <16 x i8> %0, i32 111, <4 x i1> %2)
	ret <4 x i32> %3
	}

	define arm_aapcs_vfpcc <4 x float> @test_vcx2qa_m(<4 x float> %acc, <8 x half> %n, i16 zeroext %p) {
	; CHECK-LABEL: test_vcx2qa_m:
	; CHECK: @ %bb.0: @ %entry
	; CHECK-NEXT: vmsr p0, r0
	; CHECK-NEXT: vpst
	; CHECK-NEXT: vcx2at p0, q0, q1, #112
	; CHECK-NEXT: bx lr
	entry:
	%0 = bitcast <8 x half> %n to <16 x i8>
	%1 = zext i16 %p to i32
	%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
	%3 = call <4 x float> @llvm.arm.cde.vcx2qa.predicated.v4f32.v4i1(i32 0, <4 x float> %acc, <16 x i8> %0, i32 112, <4 x i1> %2)
	ret <4 x float> %3
	}

	define arm_aapcs_vfpcc <2 x i64> @test_vcx3q_m(<2 x i64> %inactive, <4 x float> %n, <16 x i8> %m, i16 zeroext %p) {
	; CHECK-LABEL: test_vcx3q_m:
	; CHECK: @ %bb.0: @ %entry
	; CHECK-NEXT: vmsr p0, r0
	; CHECK-NEXT: vpst
	; CHECK-NEXT: vcx3t p0, q0, q1, q2, #11
	; CHECK-NEXT: bx lr
	entry:
	%0 = bitcast <4 x float> %n to <16 x i8>
	%1 = zext i16 %p to i32
	%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
	%3 = call <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, <16 x i8> %0, <16 x i8> %m, i32 11, <4 x i1> %2)
	ret <2 x i64> %3
	}

	define arm_aapcs_vfpcc <8 x half> @test_vcx3qa_m(<4 x float> %inactive, <8 x half> %n, <4 x i32> %m, i16 zeroext %p) {
	; CHECK-LABEL: test_vcx3qa_m:
	; CHECK: @ %bb.0: @ %entry
	; CHECK-NEXT: vmsr p0, r0
	; CHECK-NEXT: vpst
	; CHECK-NEXT: vcx3at p0, q0, q1, q2, #12
	; CHECK-NEXT: bx lr
	entry:
	%0 = bitcast <8 x half> %n to <16 x i8>
	%1 = bitcast <4 x i32> %m to <16 x i8>
	%2 = zext i16 %p to i32
	%3 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2)
	%4 = call <4 x float> @llvm.arm.cde.vcx3qa.predicated.v4f32.v4i1(i32 0, <4 x float> %inactive, <16 x i8> %0, <16 x i8> %1, i32 12, <4 x i1> %3)
	%5 = bitcast <4 x float> %4 to <8 x half>
	ret <8 x half> %5
	}