|  | //=== ReplaceWithVeclib.cpp - Replace vector intrinsics with veclib calls -===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // Replaces calls to LLVM Intrinsics with matching calls to functions from a | 
|  | // vector library (e.g libmvec, SVML) using TargetLibraryInfo interface. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "llvm/CodeGen/ReplaceWithVeclib.h" | 
|  | #include "llvm/ADT/STLExtras.h" | 
|  | #include "llvm/ADT/Statistic.h" | 
|  | #include "llvm/ADT/StringRef.h" | 
|  | #include "llvm/Analysis/DemandedBits.h" | 
|  | #include "llvm/Analysis/GlobalsModRef.h" | 
|  | #include "llvm/Analysis/OptimizationRemarkEmitter.h" | 
|  | #include "llvm/Analysis/TargetLibraryInfo.h" | 
|  | #include "llvm/Analysis/VectorUtils.h" | 
|  | #include "llvm/CodeGen/Passes.h" | 
|  | #include "llvm/IR/DerivedTypes.h" | 
|  | #include "llvm/IR/IRBuilder.h" | 
|  | #include "llvm/IR/InstIterator.h" | 
|  | #include "llvm/IR/IntrinsicInst.h" | 
|  | #include "llvm/IR/VFABIDemangler.h" | 
|  | #include "llvm/Support/TypeSize.h" | 
|  | #include "llvm/Transforms/Utils/ModuleUtils.h" | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | #define DEBUG_TYPE "replace-with-veclib" | 
|  |  | 
|  | STATISTIC(NumCallsReplaced, | 
|  | "Number of calls to intrinsics that have been replaced."); | 
|  |  | 
|  | STATISTIC(NumTLIFuncDeclAdded, | 
|  | "Number of vector library function declarations added."); | 
|  |  | 
|  | STATISTIC(NumFuncUsedAdded, | 
|  | "Number of functions added to `llvm.compiler.used`"); | 
|  |  | 
|  | /// Returns a vector Function that it adds to the Module \p M. When an \p | 
|  | /// ScalarFunc is not null, it copies its attributes to the newly created | 
|  | /// Function. | 
|  | Function *getTLIFunction(Module *M, FunctionType *VectorFTy, | 
|  | const StringRef TLIName, | 
|  | Function *ScalarFunc = nullptr) { | 
|  | Function *TLIFunc = M->getFunction(TLIName); | 
|  | if (!TLIFunc) { | 
|  | TLIFunc = | 
|  | Function::Create(VectorFTy, Function::ExternalLinkage, TLIName, *M); | 
|  | if (ScalarFunc) | 
|  | TLIFunc->copyAttributesFrom(ScalarFunc); | 
|  |  | 
|  | LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added vector library function `" | 
|  | << TLIName << "` of type `" << *(TLIFunc->getType()) | 
|  | << "` to module.\n"); | 
|  |  | 
|  | ++NumTLIFuncDeclAdded; | 
|  | // Add the freshly created function to llvm.compiler.used, similar to as it | 
|  | // is done in InjectTLIMappings. | 
|  | appendToCompilerUsed(*M, {TLIFunc}); | 
|  | LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << TLIName | 
|  | << "` to `@llvm.compiler.used`.\n"); | 
|  | ++NumFuncUsedAdded; | 
|  | } | 
|  | return TLIFunc; | 
|  | } | 
|  |  | 
|  | /// Replace the intrinsic call \p II to \p TLIVecFunc, which is the | 
|  | /// corresponding function from the vector library. | 
|  | static void replaceWithTLIFunction(IntrinsicInst *II, VFInfo &Info, | 
|  | Function *TLIVecFunc) { | 
|  | IRBuilder<> IRBuilder(II); | 
|  | SmallVector<Value *> Args(II->args()); | 
|  | if (auto OptMaskpos = Info.getParamIndexForOptionalMask()) { | 
|  | auto *MaskTy = | 
|  | VectorType::get(Type::getInt1Ty(II->getContext()), Info.Shape.VF); | 
|  | Args.insert(Args.begin() + OptMaskpos.value(), | 
|  | Constant::getAllOnesValue(MaskTy)); | 
|  | } | 
|  |  | 
|  | // Preserve the operand bundles. | 
|  | SmallVector<OperandBundleDef, 1> OpBundles; | 
|  | II->getOperandBundlesAsDefs(OpBundles); | 
|  |  | 
|  | auto *Replacement = IRBuilder.CreateCall(TLIVecFunc, Args, OpBundles); | 
|  | II->replaceAllUsesWith(Replacement); | 
|  | // Preserve fast math flags for FP math. | 
|  | if (isa<FPMathOperator>(Replacement)) | 
|  | Replacement->copyFastMathFlags(II); | 
|  | } | 
|  |  | 
|  | /// Returns true when successfully replaced \p II, which is a call to a | 
|  | /// vectorized intrinsic, with a suitable function taking vector arguments, | 
|  | /// based on available mappings in the \p TLI. | 
|  | static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, | 
|  | IntrinsicInst *II) { | 
|  | assert(II != nullptr && "Intrinsic cannot be null"); | 
|  | Intrinsic::ID IID = II->getIntrinsicID(); | 
|  | Type *RetTy = II->getType(); | 
|  | Type *ScalarRetTy = RetTy->getScalarType(); | 
|  | // At the moment VFABI assumes the return type is always widened unless it is | 
|  | // a void type. | 
|  | auto *VTy = dyn_cast<VectorType>(RetTy); | 
|  | ElementCount EC(VTy ? VTy->getElementCount() : ElementCount::getFixed(0)); | 
|  |  | 
|  | // OloadTys collects types used in scalar intrinsic overload name. | 
|  | SmallVector<Type *, 3> OloadTys; | 
|  | if (!RetTy->isVoidTy() && | 
|  | isVectorIntrinsicWithOverloadTypeAtArg(IID, -1, /*TTI=*/nullptr)) | 
|  | OloadTys.push_back(ScalarRetTy); | 
|  |  | 
|  | // Compute the argument types of the corresponding scalar call and check that | 
|  | // all vector operands match the previously found EC. | 
|  | SmallVector<Type *, 8> ScalarArgTypes; | 
|  | for (auto Arg : enumerate(II->args())) { | 
|  | auto *ArgTy = Arg.value()->getType(); | 
|  | bool IsOloadTy = isVectorIntrinsicWithOverloadTypeAtArg(IID, Arg.index(), | 
|  | /*TTI=*/nullptr); | 
|  | if (isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index(), /*TTI=*/nullptr)) { | 
|  | ScalarArgTypes.push_back(ArgTy); | 
|  | if (IsOloadTy) | 
|  | OloadTys.push_back(ArgTy); | 
|  | } else if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) { | 
|  | auto *ScalarArgTy = VectorArgTy->getElementType(); | 
|  | ScalarArgTypes.push_back(ScalarArgTy); | 
|  | if (IsOloadTy) | 
|  | OloadTys.push_back(ScalarArgTy); | 
|  | // When return type is void, set EC to the first vector argument, and | 
|  | // disallow vector arguments with different ECs. | 
|  | if (EC.isZero()) | 
|  | EC = VectorArgTy->getElementCount(); | 
|  | else if (EC != VectorArgTy->getElementCount()) | 
|  | return false; | 
|  | } else | 
|  | // Exit when it is supposed to be a vector argument but it isn't. | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // Try to reconstruct the name for the scalar version of the instruction, | 
|  | // using scalar argument types. | 
|  | std::string ScalarName = | 
|  | Intrinsic::isOverloaded(IID) | 
|  | ? Intrinsic::getName(IID, OloadTys, II->getModule()) | 
|  | : Intrinsic::getName(IID).str(); | 
|  |  | 
|  | // Try to find the mapping for the scalar version of this intrinsic and the | 
|  | // exact vector width of the call operands in the TargetLibraryInfo. First, | 
|  | // check with a non-masked variant, and if that fails try with a masked one. | 
|  | const VecDesc *VD = | 
|  | TLI.getVectorMappingInfo(ScalarName, EC, /*Masked*/ false); | 
|  | if (!VD && !(VD = TLI.getVectorMappingInfo(ScalarName, EC, /*Masked*/ true))) | 
|  | return false; | 
|  |  | 
|  | LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI mapping from: `" << ScalarName | 
|  | << "` and vector width " << EC << " to: `" | 
|  | << VD->getVectorFnName() << "`.\n"); | 
|  |  | 
|  | // Replace the call to the intrinsic with a call to the vector library | 
|  | // function. | 
|  | FunctionType *ScalarFTy = | 
|  | FunctionType::get(ScalarRetTy, ScalarArgTypes, /*isVarArg*/ false); | 
|  | const std::string MangledName = VD->getVectorFunctionABIVariantString(); | 
|  | auto OptInfo = VFABI::tryDemangleForVFABI(MangledName, ScalarFTy); | 
|  | if (!OptInfo) | 
|  | return false; | 
|  |  | 
|  | // There is no guarantee that the vectorized instructions followed the VFABI | 
|  | // specification when being created, this is why we need to add extra check to | 
|  | // make sure that the operands of the vector function obtained via VFABI match | 
|  | // the operands of the original vector instruction. | 
|  | for (auto &VFParam : OptInfo->Shape.Parameters) { | 
|  | if (VFParam.ParamKind == VFParamKind::GlobalPredicate) | 
|  | continue; | 
|  |  | 
|  | // tryDemangleForVFABI must return valid ParamPos, otherwise it could be | 
|  | // a bug in the VFABI parser. | 
|  | assert(VFParam.ParamPos < II->arg_size() && "ParamPos has invalid range"); | 
|  | Type *OrigTy = II->getArgOperand(VFParam.ParamPos)->getType(); | 
|  | if (OrigTy->isVectorTy() != (VFParam.ParamKind == VFParamKind::Vector)) { | 
|  | LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Will not replace: " << ScalarName | 
|  | << ". Wrong type at index " << VFParam.ParamPos << ": " | 
|  | << *OrigTy << "\n"); | 
|  | return false; | 
|  | } | 
|  | } | 
|  |  | 
|  | FunctionType *VectorFTy = VFABI::createFunctionType(*OptInfo, ScalarFTy); | 
|  | if (!VectorFTy) | 
|  | return false; | 
|  |  | 
|  | Function *TLIFunc = | 
|  | getTLIFunction(II->getModule(), VectorFTy, VD->getVectorFnName(), | 
|  | II->getCalledFunction()); | 
|  | replaceWithTLIFunction(II, *OptInfo, TLIFunc); | 
|  | LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `" << ScalarName | 
|  | << "` with call to `" << TLIFunc->getName() << "`.\n"); | 
|  | ++NumCallsReplaced; | 
|  | return true; | 
|  | } | 
|  |  | 
|  | static bool runImpl(const TargetLibraryInfo &TLI, Function &F) { | 
|  | SmallVector<Instruction *> ReplacedCalls; | 
|  | for (auto &I : instructions(F)) { | 
|  | // Process only intrinsic calls that return void or a vector. | 
|  | if (auto *II = dyn_cast<IntrinsicInst>(&I)) { | 
|  | if (II->getIntrinsicID() == Intrinsic::not_intrinsic) | 
|  | continue; | 
|  | if (!II->getType()->isVectorTy() && !II->getType()->isVoidTy()) | 
|  | continue; | 
|  |  | 
|  | if (replaceWithCallToVeclib(TLI, II)) | 
|  | ReplacedCalls.push_back(&I); | 
|  | } | 
|  | } | 
|  | // Erase any intrinsic calls that were replaced with vector library calls. | 
|  | for (auto *I : ReplacedCalls) | 
|  | I->eraseFromParent(); | 
|  | return !ReplacedCalls.empty(); | 
|  | } | 
|  |  | 
|  | //////////////////////////////////////////////////////////////////////////////// | 
|  | // New pass manager implementation. | 
|  | //////////////////////////////////////////////////////////////////////////////// | 
|  | PreservedAnalyses ReplaceWithVeclib::run(Function &F, | 
|  | FunctionAnalysisManager &AM) { | 
|  | const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F); | 
|  | auto Changed = runImpl(TLI, F); | 
|  | if (Changed) { | 
|  | LLVM_DEBUG(dbgs() << "Intrinsic calls replaced with vector libraries: " | 
|  | << NumCallsReplaced << "\n"); | 
|  |  | 
|  | PreservedAnalyses PA; | 
|  | PA.preserveSet<CFGAnalyses>(); | 
|  | PA.preserve<TargetLibraryAnalysis>(); | 
|  | PA.preserve<ScalarEvolutionAnalysis>(); | 
|  | PA.preserve<LoopAccessAnalysis>(); | 
|  | PA.preserve<DemandedBitsAnalysis>(); | 
|  | PA.preserve<OptimizationRemarkEmitterAnalysis>(); | 
|  | return PA; | 
|  | } | 
|  |  | 
|  | // The pass did not replace any calls, hence it preserves all analyses. | 
|  | return PreservedAnalyses::all(); | 
|  | } | 
|  |  | 
|  | //////////////////////////////////////////////////////////////////////////////// | 
|  | // Legacy PM Implementation. | 
|  | //////////////////////////////////////////////////////////////////////////////// | 
|  | bool ReplaceWithVeclibLegacy::runOnFunction(Function &F) { | 
|  | const TargetLibraryInfo &TLI = | 
|  | getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); | 
|  | return runImpl(TLI, F); | 
|  | } | 
|  |  | 
|  | void ReplaceWithVeclibLegacy::getAnalysisUsage(AnalysisUsage &AU) const { | 
|  | AU.setPreservesCFG(); | 
|  | AU.addRequired<TargetLibraryInfoWrapperPass>(); | 
|  | AU.addPreserved<TargetLibraryInfoWrapperPass>(); | 
|  | AU.addPreserved<ScalarEvolutionWrapperPass>(); | 
|  | AU.addPreserved<AAResultsWrapperPass>(); | 
|  | AU.addPreserved<OptimizationRemarkEmitterWrapperPass>(); | 
|  | AU.addPreserved<GlobalsAAWrapperPass>(); | 
|  | } | 
|  |  | 
|  | //////////////////////////////////////////////////////////////////////////////// | 
|  | // Legacy Pass manager initialization | 
|  | //////////////////////////////////////////////////////////////////////////////// | 
|  | char ReplaceWithVeclibLegacy::ID = 0; | 
|  |  | 
|  | INITIALIZE_PASS_BEGIN(ReplaceWithVeclibLegacy, DEBUG_TYPE, | 
|  | "Replace intrinsics with calls to vector library", false, | 
|  | false) | 
|  | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) | 
|  | INITIALIZE_PASS_END(ReplaceWithVeclibLegacy, DEBUG_TYPE, | 
|  | "Replace intrinsics with calls to vector library", false, | 
|  | false) | 
|  |  | 
|  | FunctionPass *llvm::createReplaceWithVeclibLegacyPass() { | 
|  | return new ReplaceWithVeclibLegacy(); | 
|  | } |