[AArch64] Improve vector funnel shift by constant costs. (#130044)
We now have better codegen, and can have better costs to match. The
generated code should now produce a shl+usra and can be seen in
testcases such as:
https://github.com/llvm/llvm-project/blob/7e5821bae80db3f3f0fe0d5f8ce62f79e548eed5/llvm/test/CodeGen/AArch64/fsh.ll#L3941.
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index ba019e1..53c6a02 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -884,12 +884,11 @@
const auto LegalisationCost = getTypeLegalizationCost(RetTy);
if (OpInfoZ.isUniform()) {
- // FIXME: The costs could be lower if the codegen is better.
static const CostTblEntry FshlTbl[] = {
- {Intrinsic::fshl, MVT::v4i32, 3}, // ushr + shl + orr
- {Intrinsic::fshl, MVT::v2i64, 3}, {Intrinsic::fshl, MVT::v16i8, 4},
- {Intrinsic::fshl, MVT::v8i16, 4}, {Intrinsic::fshl, MVT::v2i32, 3},
- {Intrinsic::fshl, MVT::v8i8, 4}, {Intrinsic::fshl, MVT::v4i16, 4}};
+ {Intrinsic::fshl, MVT::v4i32, 2}, // shl + usra
+ {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
+ {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
+ {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
// Costs for both fshl & fshr are the same, so just pass Intrinsic::fshl
// to avoid having to duplicate the costs.
const auto *Entry =