| //===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This pass looks for instructions that can be replaced by a Test Data Class |
| // instruction, and replaces them when profitable. |
| // |
| // Roughly, the following rules are recognized: |
| // |
| // 1: fcmp pred X, 0 -> tdc X, mask |
| // 2: fcmp pred X, +-inf -> tdc X, mask |
| // 3: fcmp pred X, +-minnorm -> tdc X, mask |
| // 4: tdc (fabs X), mask -> tdc X, newmask |
| // 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit] |
| // 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask |
| // 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask |
| // 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2) |
| // 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2) |
| // 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2) |
| // |
| // The pass works in 4 steps: |
| // |
| // 1. All fcmp and icmp instructions in a function are checked for a match |
| // with rules 1-3 and 5-7. Their TDC equivalents are stored in |
| // the ConvertedInsts mapping. If the operand of a fcmp instruction is |
| // a fabs, it's also folded according to rule 4. |
| // 2. All and/or/xor i1 instructions whose both operands have been already |
| // mapped are mapped according to rules 8-10. LogicOpsWorklist is used |
| // as a queue of instructions to check. |
| // 3. All mapped instructions that are considered worthy of conversion (ie. |
| // replacing them will actually simplify the final code) are replaced |
| // with a call to the s390.tdc intrinsic. |
| // 4. All intermediate results of replaced instructions are removed if unused. |
| // |
| // Instructions that match rules 1-3 are considered unworthy of conversion |
| // on their own (since a comparison instruction is superior), but are mapped |
| // in the hopes of folding the result using rules 4 and 8-10 (likely removing |
| // the original comparison in the process). |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "SystemZ.h" |
| #include "llvm/ADT/MapVector.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/IR/InstIterator.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/IntrinsicInst.h" |
| #include "llvm/IR/LegacyPassManager.h" |
| #include "llvm/IR/Module.h" |
| #include <deque> |
| #include <set> |
| |
| using namespace llvm; |
| |
| namespace llvm { |
| void initializeSystemZTDCPassPass(PassRegistry&); |
| } |
| |
| namespace { |
| |
| class SystemZTDCPass : public FunctionPass { |
| public: |
| static char ID; |
| SystemZTDCPass() : FunctionPass(ID) { |
| initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry()); |
| } |
| |
| bool runOnFunction(Function &F) override; |
| private: |
| // Maps seen instructions that can be mapped to a TDC, values are |
| // (TDC operand, TDC mask, worthy flag) triples. |
| MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts; |
| // The queue of and/or/xor i1 instructions to be potentially folded. |
| std::vector<BinaryOperator *> LogicOpsWorklist; |
| // Instructions matched while folding, to be removed at the end if unused. |
| std::set<Instruction *> PossibleJunk; |
| |
| // Tries to convert a fcmp instruction. |
| void convertFCmp(CmpInst &I); |
| |
| // Tries to convert an icmp instruction. |
| void convertICmp(CmpInst &I); |
| |
| // Tries to convert an i1 and/or/xor instruction, whose both operands |
| // have been already converted. |
| void convertLogicOp(BinaryOperator &I); |
| |
| // Marks an instruction as converted - adds it to ConvertedInsts and adds |
| // any and/or/xor i1 users to the queue. |
| void converted(Instruction *I, Value *V, int Mask, bool Worthy) { |
| ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy); |
| auto &M = *I->getFunction()->getParent(); |
| auto &Ctx = M.getContext(); |
| for (auto *U : I->users()) { |
| auto *LI = dyn_cast<BinaryOperator>(U); |
| if (LI && LI->getType() == Type::getInt1Ty(Ctx) && |
| (LI->getOpcode() == Instruction::And || |
| LI->getOpcode() == Instruction::Or || |
| LI->getOpcode() == Instruction::Xor)) { |
| LogicOpsWorklist.push_back(LI); |
| } |
| } |
| } |
| }; |
| |
| } // end anonymous namespace |
| |
| char SystemZTDCPass::ID = 0; |
| INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc", |
| "SystemZ Test Data Class optimization", false, false) |
| |
| FunctionPass *llvm::createSystemZTDCPass() { |
| return new SystemZTDCPass(); |
| } |
| |
| void SystemZTDCPass::convertFCmp(CmpInst &I) { |
| Value *Op0 = I.getOperand(0); |
| auto *Const = dyn_cast<ConstantFP>(I.getOperand(1)); |
| auto Pred = I.getPredicate(); |
| // Only comparisons with consts are interesting. |
| if (!Const) |
| return; |
| // Compute the smallest normal number (and its negation). |
| auto &Sem = Op0->getType()->getFltSemantics(); |
| APFloat Smallest = APFloat::getSmallestNormalized(Sem); |
| APFloat NegSmallest = Smallest; |
| NegSmallest.changeSign(); |
| // Check if Const is one of our recognized consts. |
| int WhichConst; |
| if (Const->isZero()) { |
| // All comparisons with 0 can be converted. |
| WhichConst = 0; |
| } else if (Const->isInfinity()) { |
| // Likewise for infinities. |
| WhichConst = Const->isNegative() ? 2 : 1; |
| } else if (Const->isExactlyValue(Smallest)) { |
| // For Smallest, we cannot do EQ separately from GT. |
| if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE && |
| (Pred & CmpInst::FCMP_OGE) != 0) |
| return; |
| WhichConst = 3; |
| } else if (Const->isExactlyValue(NegSmallest)) { |
| // Likewise for NegSmallest, we cannot do EQ separately from LT. |
| if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE && |
| (Pred & CmpInst::FCMP_OLE) != 0) |
| return; |
| WhichConst = 4; |
| } else { |
| // Not one of our special constants. |
| return; |
| } |
| // Partial masks to use for EQ, GT, LT, UN comparisons, respectively. |
| static const int Masks[][4] = { |
| { // 0 |
| SystemZ::TDCMASK_ZERO, // eq |
| SystemZ::TDCMASK_POSITIVE, // gt |
| SystemZ::TDCMASK_NEGATIVE, // lt |
| SystemZ::TDCMASK_NAN, // un |
| }, |
| { // inf |
| SystemZ::TDCMASK_INFINITY_PLUS, // eq |
| 0, // gt |
| (SystemZ::TDCMASK_ZERO | |
| SystemZ::TDCMASK_NEGATIVE | |
| SystemZ::TDCMASK_NORMAL_PLUS | |
| SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt |
| SystemZ::TDCMASK_NAN, // un |
| }, |
| { // -inf |
| SystemZ::TDCMASK_INFINITY_MINUS, // eq |
| (SystemZ::TDCMASK_ZERO | |
| SystemZ::TDCMASK_POSITIVE | |
| SystemZ::TDCMASK_NORMAL_MINUS | |
| SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt |
| 0, // lt |
| SystemZ::TDCMASK_NAN, // un |
| }, |
| { // minnorm |
| 0, // eq (unsupported) |
| (SystemZ::TDCMASK_NORMAL_PLUS | |
| SystemZ::TDCMASK_INFINITY_PLUS), // gt (actually ge) |
| (SystemZ::TDCMASK_ZERO | |
| SystemZ::TDCMASK_NEGATIVE | |
| SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt |
| SystemZ::TDCMASK_NAN, // un |
| }, |
| { // -minnorm |
| 0, // eq (unsupported) |
| (SystemZ::TDCMASK_ZERO | |
| SystemZ::TDCMASK_POSITIVE | |
| SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt |
| (SystemZ::TDCMASK_NORMAL_MINUS | |
| SystemZ::TDCMASK_INFINITY_MINUS), // lt (actually le) |
| SystemZ::TDCMASK_NAN, // un |
| } |
| }; |
| // Construct the mask as a combination of the partial masks. |
| int Mask = 0; |
| if (Pred & CmpInst::FCMP_OEQ) |
| Mask |= Masks[WhichConst][0]; |
| if (Pred & CmpInst::FCMP_OGT) |
| Mask |= Masks[WhichConst][1]; |
| if (Pred & CmpInst::FCMP_OLT) |
| Mask |= Masks[WhichConst][2]; |
| if (Pred & CmpInst::FCMP_UNO) |
| Mask |= Masks[WhichConst][3]; |
| // A lone fcmp is unworthy of tdc conversion on its own, but may become |
| // worthy if combined with fabs. |
| bool Worthy = false; |
| if (CallInst *CI = dyn_cast<CallInst>(Op0)) { |
| Function *F = CI->getCalledFunction(); |
| if (F && F->getIntrinsicID() == Intrinsic::fabs) { |
| // Fold with fabs - adjust the mask appropriately. |
| Mask &= SystemZ::TDCMASK_PLUS; |
| Mask |= Mask >> 1; |
| Op0 = CI->getArgOperand(0); |
| // A combination of fcmp with fabs is a win, unless the constant |
| // involved is 0 (which is handled by later passes). |
| Worthy = WhichConst != 0; |
| PossibleJunk.insert(CI); |
| } |
| } |
| converted(&I, Op0, Mask, Worthy); |
| } |
| |
| void SystemZTDCPass::convertICmp(CmpInst &I) { |
| Value *Op0 = I.getOperand(0); |
| auto *Const = dyn_cast<ConstantInt>(I.getOperand(1)); |
| auto Pred = I.getPredicate(); |
| // All our icmp rules involve comparisons with consts. |
| if (!Const) |
| return; |
| if (auto *Cast = dyn_cast<BitCastInst>(Op0)) { |
| // Check for icmp+bitcast used for signbit. |
| if (!Cast->getSrcTy()->isFloatTy() && |
| !Cast->getSrcTy()->isDoubleTy() && |
| !Cast->getSrcTy()->isFP128Ty()) |
| return; |
| Value *V = Cast->getOperand(0); |
| int Mask; |
| if (Pred == CmpInst::ICMP_SLT && Const->isZero()) { |
| // icmp slt (bitcast X), 0 - set if sign bit true |
| Mask = SystemZ::TDCMASK_MINUS; |
| } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) { |
| // icmp sgt (bitcast X), -1 - set if sign bit false |
| Mask = SystemZ::TDCMASK_PLUS; |
| } else { |
| // Not a sign bit check. |
| return; |
| } |
| PossibleJunk.insert(Cast); |
| converted(&I, V, Mask, true); |
| } else if (auto *CI = dyn_cast<CallInst>(Op0)) { |
| // Check if this is a pre-existing call of our tdc intrinsic. |
| Function *F = CI->getCalledFunction(); |
| if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc) |
| return; |
| if (!Const->isZero()) |
| return; |
| Value *V = CI->getArgOperand(0); |
| auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); |
| // Bail if the mask is not a constant. |
| if (!MaskC) |
| return; |
| int Mask = MaskC->getZExtValue(); |
| Mask &= SystemZ::TDCMASK_ALL; |
| if (Pred == CmpInst::ICMP_NE) { |
| // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC |
| } else if (Pred == CmpInst::ICMP_EQ) { |
| // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask |
| Mask ^= SystemZ::TDCMASK_ALL; |
| } else { |
| // An unknown comparison - ignore. |
| return; |
| } |
| PossibleJunk.insert(CI); |
| converted(&I, V, Mask, false); |
| } |
| } |
| |
| void SystemZTDCPass::convertLogicOp(BinaryOperator &I) { |
| Value *Op0, *Op1; |
| int Mask0, Mask1; |
| bool Worthy0, Worthy1; |
| std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))]; |
| std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))]; |
| if (Op0 != Op1) |
| return; |
| int Mask; |
| switch (I.getOpcode()) { |
| case Instruction::And: |
| Mask = Mask0 & Mask1; |
| break; |
| case Instruction::Or: |
| Mask = Mask0 | Mask1; |
| break; |
| case Instruction::Xor: |
| Mask = Mask0 ^ Mask1; |
| break; |
| default: |
| llvm_unreachable("Unknown op in convertLogicOp"); |
| } |
| converted(&I, Op0, Mask, true); |
| } |
| |
| bool SystemZTDCPass::runOnFunction(Function &F) { |
| ConvertedInsts.clear(); |
| LogicOpsWorklist.clear(); |
| PossibleJunk.clear(); |
| |
| // Look for icmp+fcmp instructions. |
| for (auto &I : instructions(F)) { |
| if (I.getOpcode() == Instruction::FCmp) |
| convertFCmp(cast<CmpInst>(I)); |
| else if (I.getOpcode() == Instruction::ICmp) |
| convertICmp(cast<CmpInst>(I)); |
| } |
| |
| // If none found, bail already. |
| if (ConvertedInsts.empty()) |
| return false; |
| |
| // Process the queue of logic instructions. |
| while (!LogicOpsWorklist.empty()) { |
| BinaryOperator *Op = LogicOpsWorklist.back(); |
| LogicOpsWorklist.pop_back(); |
| // If both operands mapped, and the instruction itself not yet mapped, |
| // convert it. |
| if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) && |
| ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) && |
| !ConvertedInsts.count(Op)) |
| convertLogicOp(*Op); |
| } |
| |
| // Time to actually replace the instructions. Do it in the reverse order |
| // of finding them, since there's a good chance the earlier ones will be |
| // unused (due to being folded into later ones). |
| Module &M = *F.getParent(); |
| auto &Ctx = M.getContext(); |
| Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0); |
| bool MadeChange = false; |
| for (auto &It : reverse(ConvertedInsts)) { |
| Instruction *I = It.first; |
| Value *V; |
| int Mask; |
| bool Worthy; |
| std::tie(V, Mask, Worthy) = It.second; |
| if (!I->user_empty()) { |
| // If used and unworthy of conversion, skip it. |
| if (!Worthy) |
| continue; |
| // Call the intrinsic, compare result with 0. |
| Function *TDCFunc = |
| Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc, V->getType()); |
| IRBuilder<> IRB(I); |
| Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask); |
| Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal}); |
| Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32); |
| I->replaceAllUsesWith(ICmp); |
| } |
| // If unused, or used and converted, remove it. |
| I->eraseFromParent(); |
| MadeChange = true; |
| } |
| |
| if (!MadeChange) |
| return false; |
| |
| // We've actually done something - now clear misc accumulated junk (fabs, |
| // bitcast). |
| for (auto *I : PossibleJunk) |
| if (I->user_empty()) |
| I->eraseFromParent(); |
| |
| return true; |
| } |