| //===--- ExpandFp.cpp - Expand fp instructions ----------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // This pass expands certain floating point instructions at the IR level. |
| // |
| // It expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’, ‘sitofp |
| // .. to’ instructions with a bitwidth above a threshold. This is |
| // useful for targets like x86_64 that cannot lower fp convertions |
| // with more than 128 bits. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/CodeGen/ExpandFp.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/Analysis/AssumptionCache.h" |
| #include "llvm/Analysis/GlobalsModRef.h" |
| #include "llvm/Analysis/SimplifyQuery.h" |
| #include "llvm/Analysis/ValueTracking.h" |
| #include "llvm/CodeGen/ISDOpcodes.h" |
| #include "llvm/CodeGen/Passes.h" |
| #include "llvm/CodeGen/TargetLowering.h" |
| #include "llvm/CodeGen/TargetPassConfig.h" |
| #include "llvm/CodeGen/TargetSubtargetInfo.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/IR/InstIterator.h" |
| #include "llvm/IR/Module.h" |
| #include "llvm/IR/PassManager.h" |
| #include "llvm/IR/RuntimeLibcalls.h" |
| #include "llvm/InitializePasses.h" |
| #include "llvm/Pass.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Target/TargetMachine.h" |
| #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| #include <optional> |
| |
| #define DEBUG_TYPE "expand-fp" |
| |
| using namespace llvm; |
| |
| static cl::opt<unsigned> |
| ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden, |
| cl::init(llvm::IntegerType::MAX_INT_BITS), |
| cl::desc("fp convert instructions on integers with " |
| "more than <N> bits are expanded.")); |
| |
| namespace { |
| /// This class implements a precise expansion of the frem instruction. |
| /// The generated code is based on the fmod implementation in the AMD device |
| /// libs. |
| class FRemExpander { |
| /// The IRBuilder to use for the expansion. |
| IRBuilder<> &B; |
| |
| /// Floating point type of the return value and the arguments of the FRem |
| /// instructions that should be expanded. |
| Type *FremTy; |
| |
| /// Floating point type to use for the computation. This may be |
| /// wider than the \p FremTy. |
| Type *ComputeFpTy; |
| |
| /// Integer type used to hold the exponents returned by frexp. |
| Type *ExTy; |
| |
| /// How many bits of the quotient to compute per iteration of the |
| /// algorithm, stored as a value of type \p ExTy. |
| Value *Bits; |
| |
| /// Constant 1 of type \p ExTy. |
| Value *One; |
| |
| public: |
| static bool canExpandType(Type *Ty) { |
| // TODO The expansion should work for other floating point types |
| // as well, but this would require additional testing. |
| return Ty->isIEEELikeFPTy() && !Ty->isBFloatTy() && !Ty->isFP128Ty(); |
| } |
| |
| static FRemExpander create(IRBuilder<> &B, Type *Ty) { |
| assert(canExpandType(Ty)); |
| |
| // The type to use for the computation of the remainder. This may be |
| // wider than the input/result type which affects the ... |
| Type *ComputeTy = Ty; |
| // ... maximum number of iterations of the remainder computation loop |
| // to use. This value is for the case in which the computation |
| // uses the same input/result type. |
| unsigned MaxIter = 2; |
| |
| if (Ty->isHalfTy()) { |
| // Use the wider type and less iterations. |
| ComputeTy = B.getFloatTy(); |
| MaxIter = 1; |
| } |
| |
| unsigned Precision = |
| llvm::APFloat::semanticsPrecision(Ty->getFltSemantics()); |
| return FRemExpander{B, Ty, Precision / MaxIter, ComputeTy}; |
| } |
| |
| /// Build the FRem expansion for the numerator \p X and the |
| /// denumerator \p Y. The type of X and Y must match \p FremTy. The |
| /// code will be generated at the insertion point of \p B and the |
| /// insertion point will be reset at exit. |
| Value *buildFRem(Value *X, Value *Y, std::optional<SimplifyQuery> &SQ) const; |
| |
| /// Build an approximate FRem expansion for the numerator \p X and |
| /// the denumerator \p Y at the insertion point of builder \p B. |
| /// The type of X and Y must match \p FremTy. |
| Value *buildApproxFRem(Value *X, Value *Y) const; |
| |
| private: |
| FRemExpander(IRBuilder<> &B, Type *FremTy, unsigned Bits, Type *ComputeFpTy) |
| : B(B), FremTy(FremTy), ComputeFpTy(ComputeFpTy), ExTy(B.getInt32Ty()), |
| Bits(ConstantInt::get(ExTy, Bits)), One(ConstantInt::get(ExTy, 1)) {}; |
| |
| Value *createRcp(Value *V, const Twine &Name) const { |
| // Leave it to later optimizations to turn this into an rcp |
| // instruction if available. |
| return B.CreateFDiv(ConstantFP::get(ComputeFpTy, 1.0), V, Name); |
| } |
| |
| // Helper function to build the UPDATE_AX code which is common to the |
| // loop body and the "final iteration". |
| Value *buildUpdateAx(Value *Ax, Value *Ay, Value *Ayinv) const { |
| // Build: |
| // float q = rint(ax * ayinv); |
| // ax = fma(-q, ay, ax); |
| // int clt = ax < 0.0f; |
| // float axp = ax + ay; |
| // ax = clt ? axp : ax; |
| Value *Q = B.CreateUnaryIntrinsic(Intrinsic::rint, B.CreateFMul(Ax, Ayinv), |
| {}, "q"); |
| Value *AxUpdate = B.CreateFMA(B.CreateFNeg(Q), Ay, Ax, {}, "ax"); |
| Value *Clt = B.CreateFCmp(CmpInst::FCMP_OLT, AxUpdate, |
| ConstantFP::getZero(ComputeFpTy), "clt"); |
| Value *Axp = B.CreateFAdd(AxUpdate, Ay, "axp"); |
| return B.CreateSelect(Clt, Axp, AxUpdate, "ax"); |
| } |
| |
| /// Build code to extract the exponent and mantissa of \p Src. |
| /// Return the exponent minus one for use as a loop bound and |
| /// the mantissa taken to the given \p NewExp power. |
| std::pair<Value *, Value *> buildExpAndPower(Value *Src, Value *NewExp, |
| const Twine &ExName, |
| const Twine &PowName) const { |
| // Build: |
| // ExName = frexp_exp(Src) - 1; |
| // PowName = fldexp(frexp_mant(ExName), NewExp); |
| Type *Ty = Src->getType(); |
| Type *ExTy = B.getInt32Ty(); |
| Value *Frexp = B.CreateIntrinsic(Intrinsic::frexp, {Ty, ExTy}, Src); |
| Value *Mant = B.CreateExtractValue(Frexp, {0}); |
| Value *Exp = B.CreateExtractValue(Frexp, {1}); |
| |
| Exp = B.CreateSub(Exp, One, ExName); |
| Value *Pow = B.CreateLdexp(Mant, NewExp, {}, PowName); |
| |
| return {Pow, Exp}; |
| } |
| |
| /// Build the main computation of the remainder for the case in which |
| /// Ax > Ay, where Ax = |X|, Ay = |Y|, and X is the numerator and Y the |
| /// denumerator. Add the incoming edge from the computation result |
| /// to \p RetPhi. |
| void buildRemainderComputation(Value *AxInitial, Value *AyInitial, Value *X, |
| PHINode *RetPhi, FastMathFlags FMF) const { |
| IRBuilder<>::FastMathFlagGuard Guard(B); |
| B.setFastMathFlags(FMF); |
| |
| // Build: |
| // ex = frexp_exp(ax) - 1; |
| // ax = fldexp(frexp_mant(ax), bits); |
| // ey = frexp_exp(ay) - 1; |
| // ay = fledxp(frexp_mant(ay), 1); |
| auto [Ax, Ex] = buildExpAndPower(AxInitial, Bits, "ex", "ax"); |
| auto [Ay, Ey] = buildExpAndPower(AyInitial, One, "ey", "ay"); |
| |
| // Build: |
| // int nb = ex - ey; |
| // float ayinv = 1.0/ay; |
| Value *Nb = B.CreateSub(Ex, Ey, "nb"); |
| Value *Ayinv = createRcp(Ay, "ayinv"); |
| |
| // Build: while (nb > bits) |
| BasicBlock *PreheaderBB = B.GetInsertBlock(); |
| Function *Fun = PreheaderBB->getParent(); |
| auto *LoopBB = BasicBlock::Create(B.getContext(), "frem.loop_body", Fun); |
| auto *ExitBB = BasicBlock::Create(B.getContext(), "frem.loop_exit", Fun); |
| |
| B.CreateCondBr(B.CreateICmp(CmpInst::ICMP_SGT, Nb, Bits), LoopBB, ExitBB); |
| |
| // Build loop body: |
| // UPDATE_AX |
| // ax = fldexp(ax, bits); |
| // nb -= bits; |
| // One iteration of the loop is factored out. The code shared by |
| // the loop and this "iteration" is denoted by UPDATE_AX. |
| B.SetInsertPoint(LoopBB); |
| PHINode *NbIv = B.CreatePHI(Nb->getType(), 2, "nb_iv"); |
| NbIv->addIncoming(Nb, PreheaderBB); |
| |
| auto *AxPhi = B.CreatePHI(ComputeFpTy, 2, "ax_loop_phi"); |
| AxPhi->addIncoming(Ax, PreheaderBB); |
| |
| Value *AxPhiUpdate = buildUpdateAx(AxPhi, Ay, Ayinv); |
| AxPhiUpdate = B.CreateLdexp(AxPhiUpdate, Bits, {}, "ax_update"); |
| AxPhi->addIncoming(AxPhiUpdate, LoopBB); |
| NbIv->addIncoming(B.CreateSub(NbIv, Bits, "nb_update"), LoopBB); |
| |
| B.CreateCondBr(B.CreateICmp(CmpInst::ICMP_SGT, NbIv, Bits), LoopBB, ExitBB); |
| |
| // Build final iteration |
| // ax = fldexp(ax, nb - bits + 1); |
| // UPDATE_AX |
| B.SetInsertPoint(ExitBB); |
| |
| auto *AxPhiExit = B.CreatePHI(ComputeFpTy, 2, "ax_exit_phi"); |
| AxPhiExit->addIncoming(Ax, PreheaderBB); |
| AxPhiExit->addIncoming(AxPhi, LoopBB); |
| auto *NbExitPhi = B.CreatePHI(Nb->getType(), 2, "nb_exit_phi"); |
| NbExitPhi->addIncoming(NbIv, LoopBB); |
| NbExitPhi->addIncoming(Nb, PreheaderBB); |
| |
| Value *AxFinal = B.CreateLdexp( |
| AxPhiExit, B.CreateAdd(B.CreateSub(NbExitPhi, Bits), One), {}, "ax"); |
| AxFinal = buildUpdateAx(AxFinal, Ay, Ayinv); |
| |
| // Build: |
| // ax = fldexp(ax, ey); |
| // ret = copysign(ax,x); |
| AxFinal = B.CreateLdexp(AxFinal, Ey, {}, "ax"); |
| if (ComputeFpTy != FremTy) |
| AxFinal = B.CreateFPTrunc(AxFinal, FremTy); |
| Value *Ret = B.CreateCopySign(AxFinal, X); |
| |
| RetPhi->addIncoming(Ret, ExitBB); |
| } |
| |
| /// Build the else-branch of the conditional in the FRem |
| /// expansion, i.e. the case in wich Ax <= Ay, where Ax = |X|, Ay |
| /// = |Y|, and X is the numerator and Y the denumerator. Add the |
| /// incoming edge from the result to \p RetPhi. |
| void buildElseBranch(Value *Ax, Value *Ay, Value *X, PHINode *RetPhi) const { |
| // Build: |
| // ret = ax == ay ? copysign(0.0f, x) : x; |
| Value *ZeroWithXSign = B.CreateCopySign(ConstantFP::getZero(FremTy), X); |
| Value *Ret = B.CreateSelect(B.CreateFCmpOEQ(Ax, Ay), ZeroWithXSign, X); |
| |
| RetPhi->addIncoming(Ret, B.GetInsertBlock()); |
| } |
| |
| /// Return a value that is NaN if one of the corner cases concerning |
| /// the inputs \p X and \p Y is detected, and \p Ret otherwise. |
| Value *handleInputCornerCases(Value *Ret, Value *X, Value *Y, |
| std::optional<SimplifyQuery> &SQ, |
| bool NoInfs) const { |
| // Build: |
| // ret = (y == 0.0f || isnan(y)) ? QNAN : ret; |
| // ret = isfinite(x) ? ret : QNAN; |
| Value *Nan = ConstantFP::getQNaN(FremTy); |
| Ret = B.CreateSelect(B.CreateFCmpUEQ(Y, ConstantFP::getZero(FremTy)), Nan, |
| Ret); |
| Value *XFinite = |
| NoInfs || (SQ && isKnownNeverInfinity(X, *SQ)) |
| ? B.getTrue() |
| : B.CreateFCmpULT(B.CreateUnaryIntrinsic(Intrinsic::fabs, X), |
| ConstantFP::getInfinity(FremTy)); |
| Ret = B.CreateSelect(XFinite, Ret, Nan); |
| |
| return Ret; |
| } |
| }; |
| |
| Value *FRemExpander::buildApproxFRem(Value *X, Value *Y) const { |
| IRBuilder<>::FastMathFlagGuard Guard(B); |
| // Propagating the approximate functions flag to the |
| // division leads to an unacceptable drop in precision |
| // on AMDGPU. |
| // TODO Find out if any flags might be worth propagating. |
| B.clearFastMathFlags(); |
| |
| Value *Quot = B.CreateFDiv(X, Y); |
| Value *Trunc = B.CreateUnaryIntrinsic(Intrinsic::trunc, Quot, {}); |
| Value *Neg = B.CreateFNeg(Trunc); |
| |
| return B.CreateFMA(Neg, Y, X); |
| } |
| |
| Value *FRemExpander::buildFRem(Value *X, Value *Y, |
| std::optional<SimplifyQuery> &SQ) const { |
| assert(X->getType() == FremTy && Y->getType() == FremTy); |
| |
| FastMathFlags FMF = B.getFastMathFlags(); |
| |
| // This function generates the following code structure: |
| // if (abs(x) > abs(y)) |
| // { ret = compute remainder } |
| // else |
| // { ret = x or 0 with sign of x } |
| // Adjust ret to NaN/inf in input |
| // return ret |
| Value *Ax = B.CreateUnaryIntrinsic(Intrinsic::fabs, X, {}, "ax"); |
| Value *Ay = B.CreateUnaryIntrinsic(Intrinsic::fabs, Y, {}, "ay"); |
| if (ComputeFpTy != X->getType()) { |
| Ax = B.CreateFPExt(Ax, ComputeFpTy, "ax"); |
| Ay = B.CreateFPExt(Ay, ComputeFpTy, "ay"); |
| } |
| Value *AxAyCmp = B.CreateFCmpOGT(Ax, Ay); |
| |
| PHINode *RetPhi = B.CreatePHI(FremTy, 2, "ret"); |
| Value *Ret = RetPhi; |
| |
| // We would return NaN in all corner cases handled here. |
| // Hence, if NaNs are excluded, keep the result as it is. |
| if (!FMF.noNaNs()) |
| Ret = handleInputCornerCases(Ret, X, Y, SQ, FMF.noInfs()); |
| |
| Function *Fun = B.GetInsertBlock()->getParent(); |
| auto *ThenBB = BasicBlock::Create(B.getContext(), "frem.compute", Fun); |
| auto *ElseBB = BasicBlock::Create(B.getContext(), "frem.else", Fun); |
| SplitBlockAndInsertIfThenElse(AxAyCmp, RetPhi, &ThenBB, &ElseBB); |
| |
| auto SavedInsertPt = B.GetInsertPoint(); |
| |
| // Build remainder computation for "then" branch |
| // |
| // The ordered comparison ensures that ax and ay are not NaNs |
| // in the then-branch. Furthermore, y cannot be an infinity and the |
| // check at the end of the function ensures that the result will not |
| // be used if x is an infinity. |
| FastMathFlags ComputeFMF = FMF; |
| ComputeFMF.setNoInfs(); |
| ComputeFMF.setNoNaNs(); |
| |
| B.SetInsertPoint(ThenBB); |
| buildRemainderComputation(Ax, Ay, X, RetPhi, FMF); |
| B.CreateBr(RetPhi->getParent()); |
| |
| // Build "else"-branch |
| B.SetInsertPoint(ElseBB); |
| buildElseBranch(Ax, Ay, X, RetPhi); |
| B.CreateBr(RetPhi->getParent()); |
| |
| B.SetInsertPoint(SavedInsertPt); |
| |
| return Ret; |
| } |
| } // namespace |
| |
| static bool expandFRem(BinaryOperator &I, std::optional<SimplifyQuery> &SQ) { |
| LLVM_DEBUG(dbgs() << "Expanding instruction: " << I << '\n'); |
| |
| Type *ReturnTy = I.getType(); |
| assert(FRemExpander::canExpandType(ReturnTy->getScalarType())); |
| |
| FastMathFlags FMF = I.getFastMathFlags(); |
| // TODO Make use of those flags for optimization? |
| FMF.setAllowReciprocal(false); |
| FMF.setAllowContract(false); |
| |
| IRBuilder<> B(&I); |
| B.setFastMathFlags(FMF); |
| B.SetCurrentDebugLocation(I.getDebugLoc()); |
| |
| Type *ElemTy = ReturnTy->getScalarType(); |
| const FRemExpander Expander = FRemExpander::create(B, ElemTy); |
| |
| Value *Ret; |
| if (ReturnTy->isFloatingPointTy()) |
| Ret = FMF.approxFunc() |
| ? Expander.buildApproxFRem(I.getOperand(0), I.getOperand(1)) |
| : Expander.buildFRem(I.getOperand(0), I.getOperand(1), SQ); |
| else { |
| auto *VecTy = cast<FixedVectorType>(ReturnTy); |
| |
| // This could use SplitBlockAndInsertForEachLane but the interface |
| // is a bit awkward for a constant number of elements and it will |
| // boil down to the same code. |
| // TODO Expand the FRem instruction only once and reuse the code. |
| Value *Nums = I.getOperand(0); |
| Value *Denums = I.getOperand(1); |
| Ret = PoisonValue::get(I.getType()); |
| for (int I = 0, E = VecTy->getNumElements(); I != E; ++I) { |
| Value *Num = B.CreateExtractElement(Nums, I); |
| Value *Denum = B.CreateExtractElement(Denums, I); |
| Value *Rem = FMF.approxFunc() ? Expander.buildApproxFRem(Num, Denum) |
| : Expander.buildFRem(Num, Denum, SQ); |
| Ret = B.CreateInsertElement(Ret, Rem, I); |
| } |
| } |
| |
| I.replaceAllUsesWith(Ret); |
| Ret->takeName(&I); |
| I.eraseFromParent(); |
| |
| return true; |
| } |
| // clang-format off: preserve formatting of the following example |
| |
| /// Generate code to convert a fp number to integer, replacing FPToS(U)I with |
| /// the generated code. This currently generates code similarly to compiler-rt's |
| /// implementations. |
| /// |
| /// An example IR generated from compiler-rt/fixsfdi.c looks like below: |
| /// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 { |
| /// entry: |
| /// %0 = bitcast float %a to i32 |
| /// %conv.i = zext i32 %0 to i64 |
| /// %tobool.not = icmp sgt i32 %0, -1 |
| /// %conv = select i1 %tobool.not, i64 1, i64 -1 |
| /// %and = lshr i64 %conv.i, 23 |
| /// %shr = and i64 %and, 255 |
| /// %and2 = and i64 %conv.i, 8388607 |
| /// %or = or i64 %and2, 8388608 |
| /// %cmp = icmp ult i64 %shr, 127 |
| /// br i1 %cmp, label %cleanup, label %if.end |
| /// |
| /// if.end: ; preds = %entry |
| /// %sub = add nuw nsw i64 %shr, 4294967169 |
| /// %conv5 = and i64 %sub, 4294967232 |
| /// %cmp6.not = icmp eq i64 %conv5, 0 |
| /// br i1 %cmp6.not, label %if.end12, label %if.then8 |
| /// |
| /// if.then8: ; preds = %if.end |
| /// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64 |
| /// -9223372036854775808 br label %cleanup |
| /// |
| /// if.end12: ; preds = %if.end |
| /// %cmp13 = icmp ult i64 %shr, 150 |
| /// br i1 %cmp13, label %if.then15, label %if.else |
| /// |
| /// if.then15: ; preds = %if.end12 |
| /// %sub16 = sub nuw nsw i64 150, %shr |
| /// %shr17 = lshr i64 %or, %sub16 |
| /// %mul = mul nsw i64 %shr17, %conv |
| /// br label %cleanup |
| /// |
| /// if.else: ; preds = %if.end12 |
| /// %sub18 = add nsw i64 %shr, -150 |
| /// %shl = shl i64 %or, %sub18 |
| /// %mul19 = mul nsw i64 %shl, %conv |
| /// br label %cleanup |
| /// |
| /// cleanup: ; preds = %entry, |
| /// %if.else, %if.then15, %if.then8 |
| /// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [ |
| /// %mul19, %if.else ], [ 0, %entry ] ret i64 %retval.0 |
| /// } |
| /// |
| /// Replace fp to integer with generated code. |
| static void expandFPToI(Instruction *FPToI) { |
| // clang-format on |
| IRBuilder<> Builder(FPToI); |
| auto *FloatVal = FPToI->getOperand(0); |
| IntegerType *IntTy = cast<IntegerType>(FPToI->getType()); |
| |
| unsigned BitWidth = FPToI->getType()->getIntegerBitWidth(); |
| unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1; |
| |
| // FIXME: fp16's range is covered by i32. So `fptoi half` can convert |
| // to i32 first following a sext/zext to target integer type. |
| Value *A1 = nullptr; |
| if (FloatVal->getType()->isHalfTy()) { |
| if (FPToI->getOpcode() == Instruction::FPToUI) { |
| Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getInt32Ty()); |
| A1 = Builder.CreateZExt(A0, IntTy); |
| } else { // FPToSI |
| Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getInt32Ty()); |
| A1 = Builder.CreateSExt(A0, IntTy); |
| } |
| FPToI->replaceAllUsesWith(A1); |
| FPToI->dropAllReferences(); |
| FPToI->eraseFromParent(); |
| return; |
| } |
| |
| // fp80 conversion is implemented by fpext to fp128 first then do the |
| // conversion. |
| FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth; |
| unsigned FloatWidth = |
| PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits()); |
| unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1; |
| unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1; |
| Value *ImplicitBit = Builder.CreateShl( |
| Builder.getIntN(BitWidth, 1), Builder.getIntN(BitWidth, FPMantissaWidth)); |
| Value *SignificandMask = |
| Builder.CreateSub(ImplicitBit, Builder.getIntN(BitWidth, 1)); |
| Value *NegOne = Builder.CreateSExt( |
| ConstantInt::getSigned(Builder.getInt32Ty(), -1), IntTy); |
| Value *NegInf = |
| Builder.CreateShl(ConstantInt::getSigned(IntTy, 1), |
| ConstantInt::getSigned(IntTy, BitWidth - 1)); |
| |
| BasicBlock *Entry = Builder.GetInsertBlock(); |
| Function *F = Entry->getParent(); |
| Entry->setName(Twine(Entry->getName(), "fp-to-i-entry")); |
| BasicBlock *End = |
| Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup"); |
| BasicBlock *IfEnd = |
| BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end", F, End); |
| BasicBlock *IfThen5 = |
| BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then5", F, End); |
| BasicBlock *IfEnd9 = |
| BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end9", F, End); |
| BasicBlock *IfThen12 = |
| BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then12", F, End); |
| BasicBlock *IfElse = |
| BasicBlock::Create(Builder.getContext(), "fp-to-i-if-else", F, End); |
| |
| Entry->getTerminator()->eraseFromParent(); |
| |
| // entry: |
| Builder.SetInsertPoint(Entry); |
| Value *FloatVal0 = FloatVal; |
| // fp80 conversion is implemented by fpext to fp128 first then do the |
| // conversion. |
| if (FloatVal->getType()->isX86_FP80Ty()) |
| FloatVal0 = |
| Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext())); |
| Value *ARep0 = |
| Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth)); |
| Value *ARep = Builder.CreateZExt(ARep0, FPToI->getType()); |
| Value *PosOrNeg = Builder.CreateICmpSGT( |
| ARep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1)); |
| Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1), |
| ConstantInt::getSigned(IntTy, -1)); |
| Value *And = |
| Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth)); |
| Value *And2 = Builder.CreateAnd( |
| And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1)); |
| Value *Abs = Builder.CreateAnd(ARep, SignificandMask); |
| Value *Or = Builder.CreateOr(Abs, ImplicitBit); |
| Value *Cmp = |
| Builder.CreateICmpULT(And2, Builder.getIntN(BitWidth, ExponentBias)); |
| Builder.CreateCondBr(Cmp, End, IfEnd); |
| |
| // if.end: |
| Builder.SetInsertPoint(IfEnd); |
| Value *Add1 = Builder.CreateAdd( |
| And2, ConstantInt::getSigned( |
| IntTy, -static_cast<int64_t>(ExponentBias + BitWidth))); |
| Value *Cmp3 = Builder.CreateICmpULT( |
| Add1, ConstantInt::getSigned(IntTy, -static_cast<int64_t>(BitWidth))); |
| Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9); |
| |
| // if.then5: |
| Builder.SetInsertPoint(IfThen5); |
| Value *PosInf = Builder.CreateXor(NegOne, NegInf); |
| Value *Cond8 = Builder.CreateSelect(PosOrNeg, PosInf, NegInf); |
| Builder.CreateBr(End); |
| |
| // if.end9: |
| Builder.SetInsertPoint(IfEnd9); |
| Value *Cmp10 = Builder.CreateICmpULT( |
| And2, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth)); |
| Builder.CreateCondBr(Cmp10, IfThen12, IfElse); |
| |
| // if.then12: |
| Builder.SetInsertPoint(IfThen12); |
| Value *Sub13 = Builder.CreateSub( |
| Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth), And2); |
| Value *Shr14 = Builder.CreateLShr(Or, Sub13); |
| Value *Mul = Builder.CreateMul(Shr14, Sign); |
| Builder.CreateBr(End); |
| |
| // if.else: |
| Builder.SetInsertPoint(IfElse); |
| Value *Sub15 = Builder.CreateAdd( |
| And2, ConstantInt::getSigned( |
| IntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth))); |
| Value *Shl = Builder.CreateShl(Or, Sub15); |
| Value *Mul16 = Builder.CreateMul(Shl, Sign); |
| Builder.CreateBr(End); |
| |
| // cleanup: |
| Builder.SetInsertPoint(End, End->begin()); |
| PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 4); |
| |
| Retval0->addIncoming(Cond8, IfThen5); |
| Retval0->addIncoming(Mul, IfThen12); |
| Retval0->addIncoming(Mul16, IfElse); |
| Retval0->addIncoming(Builder.getIntN(BitWidth, 0), Entry); |
| |
| FPToI->replaceAllUsesWith(Retval0); |
| FPToI->dropAllReferences(); |
| FPToI->eraseFromParent(); |
| } |
| |
| // clang-format off: preserve formatting of the following example |
| |
| /// Generate code to convert a fp number to integer, replacing S(U)IToFP with |
| /// the generated code. This currently generates code similarly to compiler-rt's |
| /// implementations. This implementation has an implicit assumption that integer |
| /// width is larger than fp. |
| /// |
| /// An example IR generated from compiler-rt/floatdisf.c looks like below: |
| /// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 { |
| /// entry: |
| /// %cmp = icmp eq i64 %a, 0 |
| /// br i1 %cmp, label %return, label %if.end |
| /// |
| /// if.end: ; preds = %entry |
| /// %shr = ashr i64 %a, 63 |
| /// %xor = xor i64 %shr, %a |
| /// %sub = sub nsw i64 %xor, %shr |
| /// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5 |
| /// %cast = trunc i64 %0 to i32 |
| /// %sub1 = sub nuw nsw i32 64, %cast |
| /// %sub2 = xor i32 %cast, 63 |
| /// %cmp3 = icmp ult i32 %cast, 40 |
| /// br i1 %cmp3, label %if.then4, label %if.else |
| /// |
| /// if.then4: ; preds = %if.end |
| /// switch i32 %sub1, label %sw.default [ |
| /// i32 25, label %sw.bb |
| /// i32 26, label %sw.epilog |
| /// ] |
| /// |
| /// sw.bb: ; preds = %if.then4 |
| /// %shl = shl i64 %sub, 1 |
| /// br label %sw.epilog |
| /// |
| /// sw.default: ; preds = %if.then4 |
| /// %sub5 = sub nsw i64 38, %0 |
| /// %sh_prom = and i64 %sub5, 4294967295 |
| /// %shr6 = lshr i64 %sub, %sh_prom |
| /// %shr9 = lshr i64 274877906943, %0 |
| /// %and = and i64 %shr9, %sub |
| /// %cmp10 = icmp ne i64 %and, 0 |
| /// %conv11 = zext i1 %cmp10 to i64 |
| /// %or = or i64 %shr6, %conv11 |
| /// br label %sw.epilog |
| /// |
| /// sw.epilog: ; preds = %sw.default, |
| /// %if.then4, %sw.bb |
| /// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl, |
| /// %sw.bb ] %1 = lshr i64 %a.addr.0, 2 %2 = and i64 %1, 1 %or16 = or i64 %2, |
| /// %a.addr.0 %inc = add nsw i64 %or16, 1 %3 = and i64 %inc, 67108864 |
| /// %tobool.not = icmp eq i64 %3, 0 |
| /// %spec.select.v = select i1 %tobool.not, i64 2, i64 3 |
| /// %spec.select = ashr i64 %inc, %spec.select.v |
| /// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1 |
| /// br label %if.end26 |
| /// |
| /// if.else: ; preds = %if.end |
| /// %sub23 = add nuw nsw i64 %0, 4294967256 |
| /// %sh_prom24 = and i64 %sub23, 4294967295 |
| /// %shl25 = shl i64 %sub, %sh_prom24 |
| /// br label %if.end26 |
| /// |
| /// if.end26: ; preds = %sw.epilog, |
| /// %if.else |
| /// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ] |
| /// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ] |
| /// %conv27 = trunc i64 %shr to i32 |
| /// %and28 = and i32 %conv27, -2147483648 |
| /// %add = shl nuw nsw i32 %e.0, 23 |
| /// %shl29 = add nuw nsw i32 %add, 1065353216 |
| /// %conv31 = trunc i64 %a.addr.1 to i32 |
| /// %and32 = and i32 %conv31, 8388607 |
| /// %or30 = or i32 %and32, %and28 |
| /// %or33 = or i32 %or30, %shl29 |
| /// %4 = bitcast i32 %or33 to float |
| /// br label %return |
| /// |
| /// return: ; preds = %entry, |
| /// %if.end26 |
| /// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ] |
| /// ret float %retval.0 |
| /// } |
| /// |
| /// Replace integer to fp with generated code. |
| static void expandIToFP(Instruction *IToFP) { |
| // clang-format on |
| IRBuilder<> Builder(IToFP); |
| auto *IntVal = IToFP->getOperand(0); |
| IntegerType *IntTy = cast<IntegerType>(IntVal->getType()); |
| |
| unsigned BitWidth = IntVal->getType()->getIntegerBitWidth(); |
| unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1; |
| // fp80 conversion is implemented by conversion tp fp128 first following |
| // a fptrunc to fp80. |
| FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth; |
| // FIXME: As there is no related builtins added in compliler-rt, |
| // here currently utilized the fp32 <-> fp16 lib calls to implement. |
| FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth; |
| FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth; |
| unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth); |
| bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP; |
| |
| assert(BitWidth > FloatWidth && "Unexpected conversion. expandIToFP() " |
| "assumes integer width is larger than fp."); |
| |
| Value *Temp1 = |
| Builder.CreateShl(Builder.getIntN(BitWidth, 1), |
| Builder.getIntN(BitWidth, FPMantissaWidth + 3)); |
| |
| BasicBlock *Entry = Builder.GetInsertBlock(); |
| Function *F = Entry->getParent(); |
| Entry->setName(Twine(Entry->getName(), "itofp-entry")); |
| BasicBlock *End = |
| Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return"); |
| BasicBlock *IfEnd = |
| BasicBlock::Create(Builder.getContext(), "itofp-if-end", F, End); |
| BasicBlock *IfThen4 = |
| BasicBlock::Create(Builder.getContext(), "itofp-if-then4", F, End); |
| BasicBlock *SwBB = |
| BasicBlock::Create(Builder.getContext(), "itofp-sw-bb", F, End); |
| BasicBlock *SwDefault = |
| BasicBlock::Create(Builder.getContext(), "itofp-sw-default", F, End); |
| BasicBlock *SwEpilog = |
| BasicBlock::Create(Builder.getContext(), "itofp-sw-epilog", F, End); |
| BasicBlock *IfThen20 = |
| BasicBlock::Create(Builder.getContext(), "itofp-if-then20", F, End); |
| BasicBlock *IfElse = |
| BasicBlock::Create(Builder.getContext(), "itofp-if-else", F, End); |
| BasicBlock *IfEnd26 = |
| BasicBlock::Create(Builder.getContext(), "itofp-if-end26", F, End); |
| |
| Entry->getTerminator()->eraseFromParent(); |
| |
| Function *CTLZ = |
| Intrinsic::getOrInsertDeclaration(F->getParent(), Intrinsic::ctlz, IntTy); |
| ConstantInt *True = Builder.getTrue(); |
| |
| // entry: |
| Builder.SetInsertPoint(Entry); |
| Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0)); |
| Builder.CreateCondBr(Cmp, End, IfEnd); |
| |
| // if.end: |
| Builder.SetInsertPoint(IfEnd); |
| Value *Shr = |
| Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1)); |
| Value *Xor = Builder.CreateXor(Shr, IntVal); |
| Value *Sub = Builder.CreateSub(Xor, Shr); |
| Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True}); |
| Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty()); |
| int BitWidthNew = FloatWidth == 128 ? BitWidth : 32; |
| Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth), |
| FloatWidth == 128 ? Call : Cast); |
| Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1), |
| FloatWidth == 128 ? Call : Cast); |
| Value *Cmp3 = Builder.CreateICmpSGT( |
| Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1)); |
| Builder.CreateCondBr(Cmp3, IfThen4, IfElse); |
| |
| // if.then4: |
| Builder.SetInsertPoint(IfThen4); |
| llvm::SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault); |
| SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB); |
| SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog); |
| |
| // sw.bb: |
| Builder.SetInsertPoint(SwBB); |
| Value *Shl = |
| Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1)); |
| Builder.CreateBr(SwEpilog); |
| |
| // sw.default: |
| Builder.SetInsertPoint(SwDefault); |
| Value *Sub5 = Builder.CreateSub( |
| Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3), |
| FloatWidth == 128 ? Call : Cast); |
| Value *ShProm = Builder.CreateZExt(Sub5, IntTy); |
| Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal, |
| FloatWidth == 128 ? Sub5 : ShProm); |
| Value *Sub8 = |
| Builder.CreateAdd(FloatWidth == 128 ? Call : Cast, |
| Builder.getIntN(BitWidthNew, FPMantissaWidth + 3)); |
| Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy); |
| Value *Shr9 = Builder.CreateLShr(ConstantInt::getSigned(IntTy, -1), |
| FloatWidth == 128 ? Sub8 : ShProm9); |
| Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal); |
| Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0)); |
| Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy); |
| Value *Or = Builder.CreateOr(Shr6, Conv11); |
| Builder.CreateBr(SwEpilog); |
| |
| // sw.epilog: |
| Builder.SetInsertPoint(SwEpilog); |
| PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3); |
| AAddr0->addIncoming(Or, SwDefault); |
| AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4); |
| AAddr0->addIncoming(Shl, SwBB); |
| Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty()); |
| Value *A1 = Builder.CreateLShr(A0, Builder.getInt32(2)); |
| Value *A2 = Builder.CreateAnd(A1, Builder.getInt32(1)); |
| Value *Conv16 = Builder.CreateZExt(A2, IntTy); |
| Value *Or17 = Builder.CreateOr(AAddr0, Conv16); |
| Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1)); |
| Value *Shr18 = nullptr; |
| if (IsSigned) |
| Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2)); |
| else |
| Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2)); |
| Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3"); |
| Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0)); |
| Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth)); |
| Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32)); |
| Value *ExtractT64 = nullptr; |
| if (FloatWidth > 80) |
| ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty()); |
| else |
| ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty()); |
| Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20); |
| |
| // if.then20 |
| Builder.SetInsertPoint(IfThen20); |
| Value *Shr21 = nullptr; |
| if (IsSigned) |
| Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3)); |
| else |
| Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3)); |
| Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth)); |
| Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32)); |
| Value *ExtractT62 = nullptr; |
| if (FloatWidth > 80) |
| ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getInt64Ty()); |
| else |
| ExtractT62 = Builder.CreateTrunc(Extract, Builder.getInt32Ty()); |
| Builder.CreateBr(IfEnd26); |
| |
| // if.else: |
| Builder.SetInsertPoint(IfElse); |
| Value *Sub24 = Builder.CreateAdd( |
| FloatWidth == 128 ? Call : Cast, |
| ConstantInt::getSigned(Builder.getIntNTy(BitWidthNew), |
| -(BitWidth - FPMantissaWidth - 1))); |
| Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy); |
| Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal, |
| FloatWidth == 128 ? Sub24 : ShProm25); |
| Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth)); |
| Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32)); |
| Value *ExtractT66 = nullptr; |
| if (FloatWidth > 80) |
| ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty()); |
| else |
| ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty()); |
| Builder.CreateBr(IfEnd26); |
| |
| // if.end26: |
| Builder.SetInsertPoint(IfEnd26); |
| PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3); |
| AAddr1Off0->addIncoming(ExtractT, IfThen20); |
| AAddr1Off0->addIncoming(ExtractT60, SwEpilog); |
| AAddr1Off0->addIncoming(ExtractT61, IfElse); |
| PHINode *AAddr1Off32 = nullptr; |
| if (FloatWidth > 32) { |
| AAddr1Off32 = |
| Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3); |
| AAddr1Off32->addIncoming(ExtractT62, IfThen20); |
| AAddr1Off32->addIncoming(ExtractT64, SwEpilog); |
| AAddr1Off32->addIncoming(ExtractT66, IfElse); |
| } |
| PHINode *E0 = nullptr; |
| if (FloatWidth <= 80) { |
| E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3); |
| E0->addIncoming(Sub1, IfThen20); |
| E0->addIncoming(Sub2, SwEpilog); |
| E0->addIncoming(Sub2, IfElse); |
| } |
| Value *And29 = nullptr; |
| if (FloatWidth > 80) { |
| Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1), |
| Builder.getIntN(BitWidth, 63)); |
| And29 = Builder.CreateAnd(Shr, Temp2, "and29"); |
| } else { |
| Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getInt32Ty()); |
| And29 = Builder.CreateAnd( |
| Conv28, ConstantInt::getSigned(Builder.getInt32Ty(), 0x80000000)); |
| } |
| unsigned TempMod = FPMantissaWidth % 32; |
| Value *And34 = nullptr; |
| Value *Shl30 = nullptr; |
| if (FloatWidth > 80) { |
| TempMod += 32; |
| Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getInt64(TempMod)); |
| Shl30 = Builder.CreateAdd( |
| Add, Builder.getInt64(((1ull << (62ull - TempMod)) - 1ull) << TempMod)); |
| And34 = Builder.CreateZExt(Shl30, Builder.getInt128Ty()); |
| } else { |
| Value *Add = Builder.CreateShl(E0, Builder.getInt32(TempMod)); |
| Shl30 = Builder.CreateAdd( |
| Add, Builder.getInt32(((1 << (30 - TempMod)) - 1) << TempMod)); |
| And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0, |
| Builder.getInt32((1 << TempMod) - 1)); |
| } |
| Value *Or35 = nullptr; |
| if (FloatWidth > 80) { |
| Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getInt128Ty()); |
| Value *Or31 = Builder.CreateOr(And29Trunc, And34); |
| Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64)); |
| Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1), |
| Builder.getIntN(128, FPMantissaWidth)); |
| Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1)); |
| Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4); |
| Or35 = Builder.CreateOr(Or34, A6); |
| } else { |
| Value *Or31 = Builder.CreateOr(And34, And29); |
| Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30); |
| } |
| Value *A4 = nullptr; |
| if (IToFP->getType()->isDoubleTy()) { |
| Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth)); |
| Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32)); |
| Value *And1 = |
| Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF)); |
| Value *Or1 = Builder.CreateOr(Shl1, And1); |
| A4 = Builder.CreateBitCast(Or1, IToFP->getType()); |
| } else if (IToFP->getType()->isX86_FP80Ty()) { |
| Value *A40 = |
| Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext())); |
| A4 = Builder.CreateFPTrunc(A40, IToFP->getType()); |
| } else if (IToFP->getType()->isHalfTy() || IToFP->getType()->isBFloatTy()) { |
| // Deal with "half" situation. This is a workaround since we don't have |
| // floattihf.c currently as referring. |
| Value *A40 = |
| Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext())); |
| A4 = Builder.CreateFPTrunc(A40, IToFP->getType()); |
| } else // float type |
| A4 = Builder.CreateBitCast(Or35, IToFP->getType()); |
| Builder.CreateBr(End); |
| |
| // return: |
| Builder.SetInsertPoint(End, End->begin()); |
| PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2); |
| Retval0->addIncoming(A4, IfEnd26); |
| Retval0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry); |
| |
| IToFP->replaceAllUsesWith(Retval0); |
| IToFP->dropAllReferences(); |
| IToFP->eraseFromParent(); |
| } |
| |
| static void scalarize(Instruction *I, SmallVectorImpl<Instruction *> &Replace) { |
| VectorType *VTy = cast<FixedVectorType>(I->getType()); |
| |
| IRBuilder<> Builder(I); |
| |
| unsigned NumElements = VTy->getElementCount().getFixedValue(); |
| Value *Result = PoisonValue::get(VTy); |
| for (unsigned Idx = 0; Idx < NumElements; ++Idx) { |
| Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx); |
| Value *Cast = Builder.CreateCast(cast<CastInst>(I)->getOpcode(), Ext, |
| I->getType()->getScalarType()); |
| Result = Builder.CreateInsertElement(Result, Cast, Idx); |
| if (isa<Instruction>(Cast)) |
| Replace.push_back(cast<Instruction>(Cast)); |
| } |
| I->replaceAllUsesWith(Result); |
| I->dropAllReferences(); |
| I->eraseFromParent(); |
| } |
| |
| // This covers all floating point types; more than we need here. |
| // TODO Move somewhere else for general use? |
| /// Return the Libcall for a frem instruction of |
| /// type \p Ty. |
| static RTLIB::Libcall fremToLibcall(Type *Ty) { |
| assert(Ty->isFloatingPointTy()); |
| if (Ty->isFloatTy() || Ty->is16bitFPTy()) |
| return RTLIB::REM_F32; |
| if (Ty->isDoubleTy()) |
| return RTLIB::REM_F64; |
| if (Ty->isFP128Ty()) |
| return RTLIB::REM_F128; |
| if (Ty->isX86_FP80Ty()) |
| return RTLIB::REM_F80; |
| if (Ty->isPPC_FP128Ty()) |
| return RTLIB::REM_PPCF128; |
| |
| llvm_unreachable("Unknown floating point type"); |
| } |
| |
| /* Return true if, according to \p LibInfo, the target either directly |
| supports the frem instruction for the \p Ty, has a custom lowering, |
| or uses a libcall. */ |
| static bool targetSupportsFrem(const TargetLowering &TLI, Type *Ty) { |
| if (!TLI.isOperationExpand(ISD::FREM, EVT::getEVT(Ty))) |
| return true; |
| |
| return TLI.getLibcallName(fremToLibcall(Ty->getScalarType())); |
| } |
| |
| static bool runImpl(Function &F, const TargetLowering &TLI, |
| AssumptionCache *AC) { |
| SmallVector<Instruction *, 4> Replace; |
| SmallVector<Instruction *, 4> ReplaceVector; |
| bool Modified = false; |
| |
| unsigned MaxLegalFpConvertBitWidth = |
| TLI.getMaxLargeFPConvertBitWidthSupported(); |
| if (ExpandFpConvertBits != llvm::IntegerType::MAX_INT_BITS) |
| MaxLegalFpConvertBitWidth = ExpandFpConvertBits; |
| |
| if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS) |
| return false; |
| |
| for (auto &I : instructions(F)) { |
| switch (I.getOpcode()) { |
| case Instruction::FRem: { |
| Type *Ty = I.getType(); |
| // TODO: This pass doesn't handle scalable vectors. |
| if (Ty->isScalableTy()) |
| continue; |
| |
| if (targetSupportsFrem(TLI, Ty) || |
| !FRemExpander::canExpandType(Ty->getScalarType())) |
| continue; |
| |
| Replace.push_back(&I); |
| Modified = true; |
| |
| break; |
| } |
| case Instruction::FPToUI: |
| case Instruction::FPToSI: { |
| // TODO: This pass doesn't handle scalable vectors. |
| if (I.getOperand(0)->getType()->isScalableTy()) |
| continue; |
| |
| auto *IntTy = cast<IntegerType>(I.getType()->getScalarType()); |
| if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) |
| continue; |
| |
| if (I.getOperand(0)->getType()->isVectorTy()) |
| ReplaceVector.push_back(&I); |
| else |
| Replace.push_back(&I); |
| Modified = true; |
| break; |
| } |
| case Instruction::UIToFP: |
| case Instruction::SIToFP: { |
| // TODO: This pass doesn't handle scalable vectors. |
| if (I.getOperand(0)->getType()->isScalableTy()) |
| continue; |
| |
| auto *IntTy = |
| cast<IntegerType>(I.getOperand(0)->getType()->getScalarType()); |
| if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) |
| continue; |
| |
| if (I.getOperand(0)->getType()->isVectorTy()) |
| ReplaceVector.push_back(&I); |
| else |
| Replace.push_back(&I); |
| Modified = true; |
| break; |
| } |
| default: |
| break; |
| } |
| } |
| |
| while (!ReplaceVector.empty()) { |
| Instruction *I = ReplaceVector.pop_back_val(); |
| scalarize(I, Replace); |
| } |
| |
| if (Replace.empty()) |
| return false; |
| |
| while (!Replace.empty()) { |
| Instruction *I = Replace.pop_back_val(); |
| if (I->getOpcode() == Instruction::FRem) { |
| auto SQ = [&]() -> std::optional<SimplifyQuery> { |
| if (AC) { |
| auto Res = std::make_optional<SimplifyQuery>( |
| I->getModule()->getDataLayout(), I); |
| Res->AC = AC; |
| return Res; |
| } |
| return {}; |
| }(); |
| |
| expandFRem(cast<BinaryOperator>(*I), SQ); |
| } else if (I->getOpcode() == Instruction::FPToUI || |
| I->getOpcode() == Instruction::FPToSI) { |
| expandFPToI(I); |
| } else { |
| expandIToFP(I); |
| } |
| } |
| |
| return Modified; |
| } |
| |
| namespace { |
| class ExpandFpLegacyPass : public FunctionPass { |
| CodeGenOptLevel OptLevel; |
| |
| public: |
| static char ID; |
| |
| ExpandFpLegacyPass(CodeGenOptLevel OptLevel) |
| : FunctionPass(ID), OptLevel(OptLevel) { |
| initializeExpandFpLegacyPassPass(*PassRegistry::getPassRegistry()); |
| } |
| |
| ExpandFpLegacyPass() : ExpandFpLegacyPass(CodeGenOptLevel::None) {}; |
| |
| bool runOnFunction(Function &F) override { |
| auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); |
| auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering(); |
| AssumptionCache *AC = nullptr; |
| |
| if (OptLevel != CodeGenOptLevel::None && !F.hasOptNone()) |
| AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); |
| return runImpl(F, *TLI, AC); |
| } |
| |
| void getAnalysisUsage(AnalysisUsage &AU) const override { |
| AU.addRequired<TargetPassConfig>(); |
| if (OptLevel != CodeGenOptLevel::None) |
| AU.addRequired<AssumptionCacheTracker>(); |
| AU.addPreserved<AAResultsWrapperPass>(); |
| AU.addPreserved<GlobalsAAWrapperPass>(); |
| } |
| }; |
| } // namespace |
| |
| ExpandFpPass::ExpandFpPass(const TargetMachine *TM, CodeGenOptLevel OptLevel) |
| : TM(TM), OptLevel(OptLevel) {} |
| |
| void ExpandFpPass::printPipeline( |
| raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { |
| static_cast<PassInfoMixin<ExpandFpPass> *>(this)->printPipeline( |
| OS, MapClassName2PassName); |
| OS << '<'; |
| OS << "O" << (int)OptLevel; |
| OS << '>'; |
| } |
| |
| PreservedAnalyses ExpandFpPass::run(Function &F, FunctionAnalysisManager &FAM) { |
| const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F); |
| auto &TLI = *STI->getTargetLowering(); |
| AssumptionCache *AC = nullptr; |
| if (OptLevel != CodeGenOptLevel::None) |
| AC = &FAM.getResult<AssumptionAnalysis>(F); |
| return runImpl(F, TLI, AC) ? PreservedAnalyses::none() |
| : PreservedAnalyses::all(); |
| } |
| |
| char ExpandFpLegacyPass::ID = 0; |
| INITIALIZE_PASS_BEGIN(ExpandFpLegacyPass, "expand-fp", |
| "Expand certain fp instructions", false, false) |
| INITIALIZE_PASS_END(ExpandFpLegacyPass, "expand-fp", "Expand fp", false, false) |
| |
| FunctionPass *llvm::createExpandFpPass(CodeGenOptLevel OptLevel) { |
| return new ExpandFpLegacyPass(OptLevel); |
| } |