[Analysis][AArch64] Make fixed-width ordered reductions slightly more expensive For tight loops like this: float r = 0; for (int i = 0; i < n; i++) { r += a[i]; } it's better not to vectorise at -O3 using fixed-width ordered reductions on AArch64 targets. Although the resulting number of instructions in the generated code ends up being comparable to not vectorising at all, there may be additional costs on some CPUs, for example perhaps the scheduling is worse. It makes sense to deter vectorisation in tight loops. Differential Revision: https://reviews.llvm.org/D108292

commit: 219d4518fce9aafcb5eba9b92fb778837f0a4827 [log] [tgz]
author: David Sherwood <david.sherwood@arm.com> Wed Aug 18 09:40:21 2021 +0100
committer: David Sherwood <david.sherwood@arm.com> Wed Aug 18 17:01:56 2021 +0100
tree: 9b5b13f75e9f160783327c45a351b2facfae459a
parent: 13d8f000d7271226e5dfc6c0dc25b91cf6233349 [diff] [blame]
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 1c20ddd..869c2c1 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

@@ -1999,8 +1999,13 @@
                                            Optional<FastMathFlags> FMF,
                                            TTI::TargetCostKind CostKind) {
   if (TTI::requiresOrderedReduction(FMF)) {
-    if (!isa<ScalableVectorType>(ValTy))
-      return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
+    if (auto *FixedVTy = dyn_cast<FixedVectorType>(ValTy)) {
+      InstructionCost BaseCost =
+          BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
+      // Add on extra cost to reflect the extra overhead on some CPUs. We still
+      // end up vectorizing for more computationally intensive loops.
+      return BaseCost + FixedVTy->getNumElements();
+    }
 
     if (Opcode != Instruction::FAdd)
       return InstructionCost::getInvalid();
commit	219d4518fce9aafcb5eba9b92fb778837f0a4827	[log] [tgz]
author	David Sherwood <david.sherwood@arm.com>	Wed Aug 18 09:40:21 2021 +0100
committer	David Sherwood <david.sherwood@arm.com>	Wed Aug 18 17:01:56 2021 +0100
tree	9b5b13f75e9f160783327c45a351b2facfae459a
parent	13d8f000d7271226e5dfc6c0dc25b91cf6233349 [diff] [blame]