llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll - llvm-project - Git at Google

 ; RUN: opt -S -aarch64-sve-intrinsic-opts -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck --check-prefix OPT %s

 define <vscale x 8 x i1> @reinterpret_test_h(<vscale x 8 x i1> %a) {
 ; OPT-LABEL: @reinterpret_test_h(
 ; OPT-NOT: convert
 ; OPT: ret <vscale x 8 x i1> %a
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
   %2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %1)
   ret <vscale x 8 x i1> %2
 }

 ; Reinterprets are not redundant because the second reinterpret zeros the
 ; lanes that don't exist within its input.
 define <vscale x 16 x i1> @reinterpret_test_h_rev(<vscale x 16 x i1> %a) {
 ; OPT-LABEL: @reinterpret_test_h_rev(
 ; OPT: %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %a)
 ; OPT-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
 ; OPT-NEXT: ret <vscale x 16 x i1> %2
   %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %a)
   %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
   ret <vscale x 16 x i1> %2
 }

 define <vscale x 4 x i1> @reinterpret_test_w(<vscale x 4 x i1> %a) {
 ; OPT-LABEL: @reinterpret_test_w(
 ; OPT-NOT: convert
 ; OPT: ret <vscale x 4 x i1> %a
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a)
   %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
   ret <vscale x 4 x i1> %2
 }

 ; Reinterprets are not redundant because the second reinterpret zeros the
 ; lanes that don't exist within its input.
 define <vscale x 16 x i1> @reinterpret_test_w_rev(<vscale x 16 x i1> %a) {
 ; OPT-LABEL: @reinterpret_test_w_rev(
 ; OPT: %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %a)
 ; OPT-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
 ; OPT-NEXT: ret <vscale x 16 x i1> %2
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %a)
   %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
   ret <vscale x 16 x i1> %2
 }

 define <vscale x 2 x i1> @reinterpret_test_d(<vscale x 2 x i1> %a) {
 ; OPT-LABEL: @reinterpret_test_d(
 ; OPT-NOT: convert
 ; OPT: ret <vscale x 2 x i1> %a
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
   %2 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %1)
   ret <vscale x 2 x i1> %2
 }

 ; Reinterprets are not redundant because the second reinterpret zeros the
 ; lanes that don't exist within its input.
 define <vscale x 16 x i1> @reinterpret_test_d_rev(<vscale x 16 x i1> %a) {
 ; OPT-LABEL: @reinterpret_test_d_rev(
 ; OPT: %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %a)
 ; OPT-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %1)
 ; OPT-NEXT: ret <vscale x 16 x i1> %2
   %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %a)
   %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %1)
   ret <vscale x 16 x i1> %2
 }

 define <vscale x 2 x i1> @reinterpret_test_full_chain(<vscale x 2 x i1> %a) {
 ; OPT-LABEL: @reinterpret_test_full_chain(
 ; OPT: ret <vscale x 2 x i1> %a
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
   %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
   %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
   %4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
   %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %4)
   %6 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %5)
   ret <vscale x 2 x i1> %6
 }

 ; The last two reinterprets are not necessary, since they are doing the same
 ; work as the first two.
 define <vscale x 4 x i1> @reinterpret_test_partial_chain(<vscale x 2 x i1> %a) {
 ; OPT-LABEL: @reinterpret_test_partial_chain(
 ; OPT: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
 ; OPT-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
 ; OPT-NEXT: ret <vscale x 4 x i1> %2
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
   %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
   %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
   %4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
   ret <vscale x 4 x i1> %4
 }

 ; The chain cannot be reduced because of the second reinterpret, which causes
 ; zeroing.
 define <vscale x 8 x i1> @reinterpret_test_irreducible_chain(<vscale x 8 x i1> %a) {
 ; OPT-LABEL: @reinterpret_test_irreducible_chain(
 ; OPT: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
 ; OPT-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
 ; OPT-NEXT: %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
 ; OPT-NEXT: %4 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %3)
 ; OPT-NEXT: ret <vscale x 8 x i1> %4
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
   %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
   %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
   %4 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %3)
   ret <vscale x 8 x i1> %4
 }

 ; Here, the candidate list is larger than the number of instructions that we
 ; end up removing.
 define <vscale x 4 x i1> @reinterpret_test_keep_some_candidates(<vscale x 8 x i1> %a) {
 ; OPT-LABEL: @reinterpret_test_keep_some_candidates(
 ; OPT: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
 ; OPT-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
 ; OPT-NEXT: ret <vscale x 4 x i1> %2
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
   %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
   %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
   %4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
   ret <vscale x 4 x i1> %4
 }

 define <vscale x 2 x i1> @reinterpret_reductions(i32 %cond, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c) {
 ; OPT-LABEL: reinterpret_reductions
 ; OPT-NOT: convert
 ; OPT-NOT: phi <vscale x 16 x i1>
 ; OPT: phi <vscale x 2 x i1> [ %a, %br_phi_a ], [ %b, %br_phi_b ], [ %c, %br_phi_c ]
 ; OPT-NOT: convert
 ; OPT: ret

 entry:
   switch i32 %cond, label %br_phi_c [
          i32 43, label %br_phi_a
          i32 45, label %br_phi_b
   ]

 br_phi_a:
   %a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
   br label %join

 br_phi_b:
   %b1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %b)
   br label %join

 br_phi_c:
   %c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
   br label %join

 join:
   %pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
   %pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
   ret <vscale x 2 x i1> %pg1
 }

 ; No transform as the reinterprets are converting from different types (nxv2i1 & nxv4i1)
 ; As the incoming values to the phi must all be the same type, we cannot remove the reinterprets.
 define <vscale x 2 x i1> @reinterpret_reductions_1(i32 %cond, <vscale x 2 x i1> %a, <vscale x 4 x i1> %b, <vscale x 2 x i1> %c) {
 ; OPT-LABEL: reinterpret_reductions_1
 ; OPT: convert
 ; OPT: phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
 ; OPT-NOT: phi <vscale x 2 x i1>
 ; OPT: tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
 ; OPT: ret

 entry:
   switch i32 %cond, label %br_phi_c [
          i32 43, label %br_phi_a
          i32 45, label %br_phi_b
   ]

 br_phi_a:
   %a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
   br label %join

 br_phi_b:
   %b1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %b)
   br label %join

 br_phi_c:
   %c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
   br label %join

 join:
   %pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
   %pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
   ret <vscale x 2 x i1> %pg1
 }

 ; No transform. Similar to the the test above, but here only two of the arguments need to
 ; be converted to svbool.
 define <vscale x 2 x i1> @reinterpret_reductions_2(i32 %cond, <vscale x 2 x i1> %a, <vscale x 16 x i1> %b, <vscale x 2 x i1> %c) {
 ; OPT-LABEL: reinterpret_reductions_2
 ; OPT: convert
 ; OPT: phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b, %br_phi_b ], [ %c1, %br_phi_c ]
 ; OPT-NOT: phi <vscale x 2 x i1>
 ; OPT: tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
 ; OPT: ret

 entry:
   switch i32 %cond, label %br_phi_c [
          i32 43, label %br_phi_a
          i32 45, label %br_phi_b
   ]

 br_phi_a:
   %a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
   br label %join

 br_phi_b:
   br label %join

 br_phi_c:
   %c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
   br label %join

 join:
   %pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b, %br_phi_b ], [ %c1, %br_phi_c ]
   %pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
   ret <vscale x 2 x i1> %pg1
 }

 ; Similar to reinterpret_reductions but the reinterprets remain because the
 ; original phi cannot be removed (i.e. prefer reinterprets over multiple phis).
 define <vscale x 16 x i1> @reinterpret_reductions3(i32 %cond, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c) {
 ; OPT-LABEL: reinterpret_reductions3
 ; OPT: phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
 ; OPT-NOT: phi <vscale x 2 x i1>
 ; OPT: tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
 ; OPT-NEXT: ret <vscale x 16 x i1> %pg

 entry:
   switch i32 %cond, label %br_phi_c [
          i32 43, label %br_phi_a
          i32 45, label %br_phi_b
   ]

 br_phi_a:
   %a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
   br label %join

 br_phi_b:
   %b1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %b)
   br label %join

 br_phi_c:
   %c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
   br label %join

 join:
   %pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
   %pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
   ret <vscale x 16 x i1> %pg
 }

 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
 declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
 declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
 declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
	; RUN: opt -S -aarch64-sve-intrinsic-opts -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck --check-prefix OPT %s

	define <vscale x 8 x i1> @reinterpret_test_h(<vscale x 8 x i1> %a) {
	; OPT-LABEL: @reinterpret_test_h(
	; OPT-NOT: convert
	; OPT: ret <vscale x 8 x i1> %a
	%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
	%2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %1)
	ret <vscale x 8 x i1> %2
	}

	; Reinterprets are not redundant because the second reinterpret zeros the
	; lanes that don't exist within its input.
	define <vscale x 16 x i1> @reinterpret_test_h_rev(<vscale x 16 x i1> %a) {
	; OPT-LABEL: @reinterpret_test_h_rev(
	; OPT: %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %a)
	; OPT-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
	; OPT-NEXT: ret <vscale x 16 x i1> %2
	%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %a)
	%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
	ret <vscale x 16 x i1> %2
	}

	define <vscale x 4 x i1> @reinterpret_test_w(<vscale x 4 x i1> %a) {
	; OPT-LABEL: @reinterpret_test_w(
	; OPT-NOT: convert
	; OPT: ret <vscale x 4 x i1> %a
	%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a)
	%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
	ret <vscale x 4 x i1> %2
	}

	; Reinterprets are not redundant because the second reinterpret zeros the
	; lanes that don't exist within its input.
	define <vscale x 16 x i1> @reinterpret_test_w_rev(<vscale x 16 x i1> %a) {
	; OPT-LABEL: @reinterpret_test_w_rev(
	; OPT: %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %a)
	; OPT-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
	; OPT-NEXT: ret <vscale x 16 x i1> %2
	%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %a)
	%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
	ret <vscale x 16 x i1> %2
	}

	define <vscale x 2 x i1> @reinterpret_test_d(<vscale x 2 x i1> %a) {
	; OPT-LABEL: @reinterpret_test_d(
	; OPT-NOT: convert
	; OPT: ret <vscale x 2 x i1> %a
	%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
	%2 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %1)
	ret <vscale x 2 x i1> %2
	}

	; Reinterprets are not redundant because the second reinterpret zeros the
	; lanes that don't exist within its input.
	define <vscale x 16 x i1> @reinterpret_test_d_rev(<vscale x 16 x i1> %a) {
	; OPT-LABEL: @reinterpret_test_d_rev(
	; OPT: %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %a)
	; OPT-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %1)
	; OPT-NEXT: ret <vscale x 16 x i1> %2
	%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %a)
	%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %1)
	ret <vscale x 16 x i1> %2
	}

	define <vscale x 2 x i1> @reinterpret_test_full_chain(<vscale x 2 x i1> %a) {
	; OPT-LABEL: @reinterpret_test_full_chain(
	; OPT: ret <vscale x 2 x i1> %a
	%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
	%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
	%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
	%4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
	%5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %4)
	%6 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %5)
	ret <vscale x 2 x i1> %6
	}

	; The last two reinterprets are not necessary, since they are doing the same
	; work as the first two.
	define <vscale x 4 x i1> @reinterpret_test_partial_chain(<vscale x 2 x i1> %a) {
	; OPT-LABEL: @reinterpret_test_partial_chain(
	; OPT: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
	; OPT-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
	; OPT-NEXT: ret <vscale x 4 x i1> %2
	%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
	%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
	%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
	%4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
	ret <vscale x 4 x i1> %4
	}

	; The chain cannot be reduced because of the second reinterpret, which causes
	; zeroing.
	define <vscale x 8 x i1> @reinterpret_test_irreducible_chain(<vscale x 8 x i1> %a) {
	; OPT-LABEL: @reinterpret_test_irreducible_chain(
	; OPT: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
	; OPT-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
	; OPT-NEXT: %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
	; OPT-NEXT: %4 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %3)
	; OPT-NEXT: ret <vscale x 8 x i1> %4
	%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
	%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
	%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
	%4 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %3)
	ret <vscale x 8 x i1> %4
	}

	; Here, the candidate list is larger than the number of instructions that we
	; end up removing.
	define <vscale x 4 x i1> @reinterpret_test_keep_some_candidates(<vscale x 8 x i1> %a) {
	; OPT-LABEL: @reinterpret_test_keep_some_candidates(
	; OPT: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
	; OPT-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
	; OPT-NEXT: ret <vscale x 4 x i1> %2
	%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
	%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
	%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
	%4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %3)
	ret <vscale x 4 x i1> %4
	}

	define <vscale x 2 x i1> @reinterpret_reductions(i32 %cond, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c) {
	; OPT-LABEL: reinterpret_reductions
	; OPT-NOT: convert
	; OPT-NOT: phi <vscale x 16 x i1>
	; OPT: phi <vscale x 2 x i1> [ %a, %br_phi_a ], [ %b, %br_phi_b ], [ %c, %br_phi_c ]
	; OPT-NOT: convert
	; OPT: ret

	entry:
	switch i32 %cond, label %br_phi_c [
	i32 43, label %br_phi_a
	i32 45, label %br_phi_b
	]

	br_phi_a:
	%a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
	br label %join

	br_phi_b:
	%b1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %b)
	br label %join

	br_phi_c:
	%c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
	br label %join

	join:
	%pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
	%pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
	ret <vscale x 2 x i1> %pg1
	}

	; No transform as the reinterprets are converting from different types (nxv2i1 & nxv4i1)
	; As the incoming values to the phi must all be the same type, we cannot remove the reinterprets.
	define <vscale x 2 x i1> @reinterpret_reductions_1(i32 %cond, <vscale x 2 x i1> %a, <vscale x 4 x i1> %b, <vscale x 2 x i1> %c) {
	; OPT-LABEL: reinterpret_reductions_1
	; OPT: convert
	; OPT: phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
	; OPT-NOT: phi <vscale x 2 x i1>
	; OPT: tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
	; OPT: ret

	entry:
	switch i32 %cond, label %br_phi_c [
	i32 43, label %br_phi_a
	i32 45, label %br_phi_b
	]

	br_phi_a:
	%a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
	br label %join

	br_phi_b:
	%b1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %b)
	br label %join

	br_phi_c:
	%c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
	br label %join

	join:
	%pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
	%pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
	ret <vscale x 2 x i1> %pg1
	}

	; No transform. Similar to the the test above, but here only two of the arguments need to
	; be converted to svbool.
	define <vscale x 2 x i1> @reinterpret_reductions_2(i32 %cond, <vscale x 2 x i1> %a, <vscale x 16 x i1> %b, <vscale x 2 x i1> %c) {
	; OPT-LABEL: reinterpret_reductions_2
	; OPT: convert
	; OPT: phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b, %br_phi_b ], [ %c1, %br_phi_c ]
	; OPT-NOT: phi <vscale x 2 x i1>
	; OPT: tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
	; OPT: ret

	entry:
	switch i32 %cond, label %br_phi_c [
	i32 43, label %br_phi_a
	i32 45, label %br_phi_b
	]

	br_phi_a:
	%a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
	br label %join

	br_phi_b:
	br label %join

	br_phi_c:
	%c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
	br label %join

	join:
	%pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b, %br_phi_b ], [ %c1, %br_phi_c ]
	%pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
	ret <vscale x 2 x i1> %pg1
	}

	; Similar to reinterpret_reductions but the reinterprets remain because the
	; original phi cannot be removed (i.e. prefer reinterprets over multiple phis).
	define <vscale x 16 x i1> @reinterpret_reductions3(i32 %cond, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c) {
	; OPT-LABEL: reinterpret_reductions3
	; OPT: phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
	; OPT-NOT: phi <vscale x 2 x i1>
	; OPT: tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
	; OPT-NEXT: ret <vscale x 16 x i1> %pg

	entry:
	switch i32 %cond, label %br_phi_c [
	i32 43, label %br_phi_a
	i32 45, label %br_phi_b
	]

	br_phi_a:
	%a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
	br label %join

	br_phi_b:
	%b1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %b)
	br label %join

	br_phi_c:
	%c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
	br label %join

	join:
	%pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
	%pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
	ret <vscale x 16 x i1> %pg
	}

	declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
	declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
	declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
	declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
	declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
	declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)