|  | //===- ExpandReductions.cpp - Expand reduction intrinsics -----------------===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // This pass implements IR expansion for reduction intrinsics, allowing targets | 
|  | // to enable the intrinsics until just before codegen. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "llvm/CodeGen/ExpandReductions.h" | 
|  | #include "llvm/Analysis/TargetTransformInfo.h" | 
|  | #include "llvm/CodeGen/Passes.h" | 
|  | #include "llvm/IR/IRBuilder.h" | 
|  | #include "llvm/IR/InstIterator.h" | 
|  | #include "llvm/IR/IntrinsicInst.h" | 
|  | #include "llvm/IR/Intrinsics.h" | 
|  | #include "llvm/InitializePasses.h" | 
|  | #include "llvm/Pass.h" | 
|  | #include "llvm/Transforms/Utils/LoopUtils.h" | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | bool expandReductions(Function &F, const TargetTransformInfo *TTI) { | 
|  | bool Changed = false; | 
|  | SmallVector<IntrinsicInst *, 4> Worklist; | 
|  | for (auto &I : instructions(F)) { | 
|  | if (auto *II = dyn_cast<IntrinsicInst>(&I)) { | 
|  | switch (II->getIntrinsicID()) { | 
|  | default: break; | 
|  | case Intrinsic::vector_reduce_fadd: | 
|  | case Intrinsic::vector_reduce_fmul: | 
|  | case Intrinsic::vector_reduce_add: | 
|  | case Intrinsic::vector_reduce_mul: | 
|  | case Intrinsic::vector_reduce_and: | 
|  | case Intrinsic::vector_reduce_or: | 
|  | case Intrinsic::vector_reduce_xor: | 
|  | case Intrinsic::vector_reduce_smax: | 
|  | case Intrinsic::vector_reduce_smin: | 
|  | case Intrinsic::vector_reduce_umax: | 
|  | case Intrinsic::vector_reduce_umin: | 
|  | case Intrinsic::vector_reduce_fmax: | 
|  | case Intrinsic::vector_reduce_fmin: | 
|  | if (TTI->shouldExpandReduction(II)) | 
|  | Worklist.push_back(II); | 
|  |  | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | for (auto *II : Worklist) { | 
|  | FastMathFlags FMF = | 
|  | isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{}; | 
|  | Intrinsic::ID ID = II->getIntrinsicID(); | 
|  | RecurKind RK = getMinMaxReductionRecurKind(ID); | 
|  |  | 
|  | Value *Rdx = nullptr; | 
|  | IRBuilder<> Builder(II); | 
|  | IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); | 
|  | Builder.setFastMathFlags(FMF); | 
|  | switch (ID) { | 
|  | default: llvm_unreachable("Unexpected intrinsic!"); | 
|  | case Intrinsic::vector_reduce_fadd: | 
|  | case Intrinsic::vector_reduce_fmul: { | 
|  | // FMFs must be attached to the call, otherwise it's an ordered reduction | 
|  | // and it can't be handled by generating a shuffle sequence. | 
|  | Value *Acc = II->getArgOperand(0); | 
|  | Value *Vec = II->getArgOperand(1); | 
|  | unsigned RdxOpcode = getArithmeticReductionInstruction(ID); | 
|  | if (!FMF.allowReassoc()) | 
|  | Rdx = getOrderedReduction(Builder, Acc, Vec, RdxOpcode, RK); | 
|  | else { | 
|  | if (!isPowerOf2_32( | 
|  | cast<FixedVectorType>(Vec->getType())->getNumElements())) | 
|  | continue; | 
|  | Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RK); | 
|  | Rdx = Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, Acc, Rdx, | 
|  | "bin.rdx"); | 
|  | } | 
|  | break; | 
|  | } | 
|  | case Intrinsic::vector_reduce_and: | 
|  | case Intrinsic::vector_reduce_or: { | 
|  | // Canonicalize logical or/and reductions: | 
|  | // Or reduction for i1 is represented as: | 
|  | // %val = bitcast <ReduxWidth x i1> to iReduxWidth | 
|  | // %res = cmp ne iReduxWidth %val, 0 | 
|  | // And reduction for i1 is represented as: | 
|  | // %val = bitcast <ReduxWidth x i1> to iReduxWidth | 
|  | // %res = cmp eq iReduxWidth %val, 11111 | 
|  | Value *Vec = II->getArgOperand(0); | 
|  | auto *FTy = cast<FixedVectorType>(Vec->getType()); | 
|  | unsigned NumElts = FTy->getNumElements(); | 
|  | if (!isPowerOf2_32(NumElts)) | 
|  | continue; | 
|  |  | 
|  | if (FTy->getElementType() == Builder.getInt1Ty()) { | 
|  | Rdx = Builder.CreateBitCast(Vec, Builder.getIntNTy(NumElts)); | 
|  | if (ID == Intrinsic::vector_reduce_and) { | 
|  | Rdx = Builder.CreateICmpEQ( | 
|  | Rdx, ConstantInt::getAllOnesValue(Rdx->getType())); | 
|  | } else { | 
|  | assert(ID == Intrinsic::vector_reduce_or && "Expected or reduction."); | 
|  | Rdx = Builder.CreateIsNotNull(Rdx); | 
|  | } | 
|  | break; | 
|  | } | 
|  | unsigned RdxOpcode = getArithmeticReductionInstruction(ID); | 
|  | Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RK); | 
|  | break; | 
|  | } | 
|  | case Intrinsic::vector_reduce_add: | 
|  | case Intrinsic::vector_reduce_mul: | 
|  | case Intrinsic::vector_reduce_xor: | 
|  | case Intrinsic::vector_reduce_smax: | 
|  | case Intrinsic::vector_reduce_smin: | 
|  | case Intrinsic::vector_reduce_umax: | 
|  | case Intrinsic::vector_reduce_umin: { | 
|  | Value *Vec = II->getArgOperand(0); | 
|  | if (!isPowerOf2_32( | 
|  | cast<FixedVectorType>(Vec->getType())->getNumElements())) | 
|  | continue; | 
|  | unsigned RdxOpcode = getArithmeticReductionInstruction(ID); | 
|  | Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RK); | 
|  | break; | 
|  | } | 
|  | case Intrinsic::vector_reduce_fmax: | 
|  | case Intrinsic::vector_reduce_fmin: { | 
|  | // We require "nnan" to use a shuffle reduction; "nsz" is implied by the | 
|  | // semantics of the reduction. | 
|  | Value *Vec = II->getArgOperand(0); | 
|  | if (!isPowerOf2_32( | 
|  | cast<FixedVectorType>(Vec->getType())->getNumElements()) || | 
|  | !FMF.noNaNs()) | 
|  | continue; | 
|  | unsigned RdxOpcode = getArithmeticReductionInstruction(ID); | 
|  | Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RK); | 
|  | break; | 
|  | } | 
|  | } | 
|  | II->replaceAllUsesWith(Rdx); | 
|  | II->eraseFromParent(); | 
|  | Changed = true; | 
|  | } | 
|  | return Changed; | 
|  | } | 
|  |  | 
|  | class ExpandReductions : public FunctionPass { | 
|  | public: | 
|  | static char ID; | 
|  | ExpandReductions() : FunctionPass(ID) { | 
|  | initializeExpandReductionsPass(*PassRegistry::getPassRegistry()); | 
|  | } | 
|  |  | 
|  | bool runOnFunction(Function &F) override { | 
|  | const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); | 
|  | return expandReductions(F, TTI); | 
|  | } | 
|  |  | 
|  | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
|  | AU.addRequired<TargetTransformInfoWrapperPass>(); | 
|  | AU.setPreservesCFG(); | 
|  | } | 
|  | }; | 
|  | } | 
|  |  | 
|  | char ExpandReductions::ID; | 
|  | INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions", | 
|  | "Expand reduction intrinsics", false, false) | 
|  | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) | 
|  | INITIALIZE_PASS_END(ExpandReductions, "expand-reductions", | 
|  | "Expand reduction intrinsics", false, false) | 
|  |  | 
|  | FunctionPass *llvm::createExpandReductionsPass() { | 
|  | return new ExpandReductions(); | 
|  | } | 
|  |  | 
|  | PreservedAnalyses ExpandReductionsPass::run(Function &F, | 
|  | FunctionAnalysisManager &AM) { | 
|  | const auto &TTI = AM.getResult<TargetIRAnalysis>(F); | 
|  | if (!expandReductions(F, &TTI)) | 
|  | return PreservedAnalyses::all(); | 
|  | PreservedAnalyses PA; | 
|  | PA.preserveSet<CFGAnalyses>(); | 
|  | return PA; | 
|  | } |