test/CodeGen/AArch64/sve-extract-vector-to-predicate-store.ll - llvm-project/llvm - Git at Google

 ; RUN: opt -S -aarch64-sve-intrinsic-opts < %s | FileCheck %s

 target triple = "aarch64-unknown-linux-gnu"

 define void @pred_store_v2i8(<vscale x 16 x i1> %pred, <2 x i8>* %addr) #0 {
 ; CHECK-LABEL: @pred_store_v2i8(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i8>* %addr to <vscale x 16 x i1>*
 ; CHECK-NEXT:    store <vscale x 16 x i1> %pred, <vscale x 16 x i1>* [[TMP1]]
 ; CHECK-NEXT:    ret void
   %bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
   %extract = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
   store <2 x i8> %extract, <2 x i8>* %addr, align 4
   ret void
 }

 define void @pred_store_v4i8(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #1 {
 ; CHECK-LABEL: @pred_store_v4i8(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i8>* %addr to <vscale x 16 x i1>*
 ; CHECK-NEXT:    store <vscale x 16 x i1> %pred, <vscale x 16 x i1>* [[TMP1]]
 ; CHECK-NEXT:    ret void
   %bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
   %extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
   store <4 x i8> %extract, <4 x i8>* %addr, align 4
   ret void
 }

 define void @pred_store_v8i8(<vscale x 16 x i1> %pred, <8 x i8>* %addr) #2 {
 ; CHECK-LABEL: @pred_store_v8i8(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8>* %addr to <vscale x 16 x i1>*
 ; CHECK-NEXT:    store <vscale x 16 x i1> %pred, <vscale x 16 x i1>* [[TMP1]]
 ; CHECK-NEXT:    ret void
   %bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
   %extract = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
   store <8 x i8> %extract, <8 x i8>* %addr, align 4
   ret void
 }


 ; Check that too small of a vscale prevents optimization
 define void @pred_store_neg1(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #0 {
 ; CHECK-LABEL: @pred_store_neg1(
 ; CHECK:         call <4 x i8> @llvm.vector.extract
   %bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
   %extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
   store <4 x i8> %extract, <4 x i8>* %addr, align 4
   ret void
 }

 ; Check that too large of a vscale prevents optimization
 define void @pred_store_neg2(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #2 {
 ; CHECK-LABEL: @pred_store_neg2(
 ; CHECK:         call <4 x i8> @llvm.vector.extract
   %bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
   %extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
   store <4 x i8> %extract, <4 x i8>* %addr, align 4
   ret void
 }

 ; Check that a non-zero index prevents optimization
 define void @pred_store_neg3(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #1 {
 ; CHECK-LABEL: @pred_store_neg3(
 ; CHECK:         call <4 x i8> @llvm.vector.extract
   %bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
   %extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 4)
   store <4 x i8> %extract, <4 x i8>* %addr, align 4
   ret void
 }

 ; Check that differing vscale min/max prevents optimization
 define void @pred_store_neg4(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #3 {
 ; CHECK-LABEL: @pred_store_neg4(
 ; CHECK:         call <4 x i8> @llvm.vector.extract
   %bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
   %extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
   store <4 x i8> %extract, <4 x i8>* %addr, align 4
   ret void
 }

 declare <2 x i8> @llvm.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8>, i64)
 declare <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8>, i64)
 declare <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8>, i64)

 attributes #0 = { "target-features"="+sve" vscale_range(1,1) }
 attributes #1 = { "target-features"="+sve" vscale_range(2,2) }
 attributes #2 = { "target-features"="+sve" vscale_range(4,4) }
 attributes #3 = { "target-features"="+sve" vscale_range(2,4) }
	; RUN: opt -S -aarch64-sve-intrinsic-opts < %s \| FileCheck %s

	target triple = "aarch64-unknown-linux-gnu"

	define void @pred_store_v2i8(<vscale x 16 x i1> %pred, <2 x i8>* %addr) #0 {
	; CHECK-LABEL: @pred_store_v2i8(
	; CHECK-NEXT: [[TMP1:%.]] = bitcast <2 x i8> %addr to <vscale x 16 x i1>*
	; CHECK-NEXT: store <vscale x 16 x i1> %pred, <vscale x 16 x i1>* [[TMP1]]
	; CHECK-NEXT: ret void
	%bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
	%extract = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
	store <2 x i8> %extract, <2 x i8>* %addr, align 4
	ret void
	}

	define void @pred_store_v4i8(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #1 {
	; CHECK-LABEL: @pred_store_v4i8(
	; CHECK-NEXT: [[TMP1:%.]] = bitcast <4 x i8> %addr to <vscale x 16 x i1>*
	; CHECK-NEXT: store <vscale x 16 x i1> %pred, <vscale x 16 x i1>* [[TMP1]]
	; CHECK-NEXT: ret void
	%bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
	%extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
	store <4 x i8> %extract, <4 x i8>* %addr, align 4
	ret void
	}

	define void @pred_store_v8i8(<vscale x 16 x i1> %pred, <8 x i8>* %addr) #2 {
	; CHECK-LABEL: @pred_store_v8i8(
	; CHECK-NEXT: [[TMP1:%.]] = bitcast <8 x i8> %addr to <vscale x 16 x i1>*
	; CHECK-NEXT: store <vscale x 16 x i1> %pred, <vscale x 16 x i1>* [[TMP1]]
	; CHECK-NEXT: ret void
	%bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
	%extract = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
	store <8 x i8> %extract, <8 x i8>* %addr, align 4
	ret void
	}


	; Check that too small of a vscale prevents optimization
	define void @pred_store_neg1(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #0 {
	; CHECK-LABEL: @pred_store_neg1(
	; CHECK: call <4 x i8> @llvm.vector.extract
	%bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
	%extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
	store <4 x i8> %extract, <4 x i8>* %addr, align 4
	ret void
	}

	; Check that too large of a vscale prevents optimization
	define void @pred_store_neg2(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #2 {
	; CHECK-LABEL: @pred_store_neg2(
	; CHECK: call <4 x i8> @llvm.vector.extract
	%bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
	%extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
	store <4 x i8> %extract, <4 x i8>* %addr, align 4
	ret void
	}

	; Check that a non-zero index prevents optimization
	define void @pred_store_neg3(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #1 {
	; CHECK-LABEL: @pred_store_neg3(
	; CHECK: call <4 x i8> @llvm.vector.extract
	%bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
	%extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 4)
	store <4 x i8> %extract, <4 x i8>* %addr, align 4
	ret void
	}

	; Check that differing vscale min/max prevents optimization
	define void @pred_store_neg4(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #3 {
	; CHECK-LABEL: @pred_store_neg4(
	; CHECK: call <4 x i8> @llvm.vector.extract
	%bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
	%extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
	store <4 x i8> %extract, <4 x i8>* %addr, align 4
	ret void
	}

	declare <2 x i8> @llvm.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8>, i64)
	declare <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8>, i64)
	declare <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8>, i64)

	attributes #0 = { "target-features"="+sve" vscale_range(1,1) }
	attributes #1 = { "target-features"="+sve" vscale_range(2,2) }
	attributes #2 = { "target-features"="+sve" vscale_range(4,4) }
	attributes #3 = { "target-features"="+sve" vscale_range(2,4) }