[AArch64] Basic demand elements for some intrinsics
A lot of neon intrinsics work lane-wise, meaning that non-demanded
elements in and not demanded out. This teaches that to
AArch64TTIImpl::simplifyDemandedVectorEltsIntrinsic for some simple
single-input truncate intrinsics, which can help remove unnecessary
instructions.
Differential Revision: https://reviews.llvm.org/D117097
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 0e8c5b8..1e27674 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1168,6 +1168,32 @@
return None;
}
+Optional<Value *> AArch64TTIImpl::simplifyDemandedVectorEltsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt OrigDemandedElts,
+ APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ SimplifyAndSetOp) const {
+ switch (II.getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::aarch64_neon_fcvtxn:
+ case Intrinsic::aarch64_neon_rshrn:
+ case Intrinsic::aarch64_neon_sqrshrn:
+ case Intrinsic::aarch64_neon_sqrshrun:
+ case Intrinsic::aarch64_neon_sqshrn:
+ case Intrinsic::aarch64_neon_sqshrun:
+ case Intrinsic::aarch64_neon_sqxtn:
+ case Intrinsic::aarch64_neon_sqxtun:
+ case Intrinsic::aarch64_neon_uqrshrn:
+ case Intrinsic::aarch64_neon_uqshrn:
+ case Intrinsic::aarch64_neon_uqxtn:
+ SimplifyAndSetOp(&II, 0, OrigDemandedElts, UndefElts);
+ break;
+ }
+
+ return None;
+}
+
bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
ArrayRef<const Value *> Args) {
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index c3e1735..7ba645b 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -106,6 +106,12 @@
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const;
+ Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
+ APInt &UndefElts2, APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ SimplifyAndSetOp) const;
+
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
switch (K) {
case TargetTransformInfo::RGK_Scalar: