|  | //===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // This pass looks for instructions that can be replaced by a Test Data Class | 
|  | // instruction, and replaces them when profitable. | 
|  | // | 
|  | // Roughly, the following rules are recognized: | 
|  | // | 
|  | // 1: fcmp pred X, 0 -> tdc X, mask | 
|  | // 2: fcmp pred X, +-inf -> tdc X, mask | 
|  | // 3: fcmp pred X, +-minnorm -> tdc X, mask | 
|  | // 4: tdc (fabs X), mask -> tdc X, newmask | 
|  | // 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit] | 
|  | // 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask | 
|  | // 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask | 
|  | // 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2) | 
|  | // 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2) | 
|  | // 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2) | 
|  | // | 
|  | // The pass works in 4 steps: | 
|  | // | 
|  | // 1. All fcmp and icmp instructions in a function are checked for a match | 
|  | //    with rules 1-3 and 5-7.  Their TDC equivalents are stored in | 
|  | //    the ConvertedInsts mapping.  If the operand of a fcmp instruction is | 
|  | //    a fabs, it's also folded according to rule 4. | 
|  | // 2. All and/or/xor i1 instructions whose both operands have been already | 
|  | //    mapped are mapped according to rules 8-10.  LogicOpsWorklist is used | 
|  | //    as a queue of instructions to check. | 
|  | // 3. All mapped instructions that are considered worthy of conversion (ie. | 
|  | //    replacing them will actually simplify the final code) are replaced | 
|  | //    with a call to the s390.tdc intrinsic. | 
|  | // 4. All intermediate results of replaced instructions are removed if unused. | 
|  | // | 
|  | // Instructions that match rules 1-3 are considered unworthy of conversion | 
|  | // on their own (since a comparison instruction is superior), but are mapped | 
|  | // in the hopes of folding the result using rules 4 and 8-10 (likely removing | 
|  | // the original comparison in the process). | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "SystemZ.h" | 
|  | #include "SystemZSubtarget.h" | 
|  | #include "llvm/ADT/MapVector.h" | 
|  | #include "llvm/CodeGen/TargetPassConfig.h" | 
|  | #include "llvm/IR/Constants.h" | 
|  | #include "llvm/IR/IRBuilder.h" | 
|  | #include "llvm/IR/InstIterator.h" | 
|  | #include "llvm/IR/Instructions.h" | 
|  | #include "llvm/IR/IntrinsicInst.h" | 
|  | #include "llvm/IR/IntrinsicsS390.h" | 
|  | #include "llvm/IR/LegacyPassManager.h" | 
|  | #include "llvm/IR/Module.h" | 
|  | #include "llvm/Target/TargetMachine.h" | 
|  | #include <deque> | 
|  | #include <set> | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | class SystemZTDCPass : public FunctionPass { | 
|  | public: | 
|  | static char ID; | 
|  | SystemZTDCPass() : FunctionPass(ID) { | 
|  | initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry()); | 
|  | } | 
|  |  | 
|  | bool runOnFunction(Function &F) override; | 
|  |  | 
|  | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
|  | AU.addRequired<TargetPassConfig>(); | 
|  | } | 
|  |  | 
|  | private: | 
|  | // Maps seen instructions that can be mapped to a TDC, values are | 
|  | // (TDC operand, TDC mask, worthy flag) triples. | 
|  | MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts; | 
|  | // The queue of and/or/xor i1 instructions to be potentially folded. | 
|  | std::vector<BinaryOperator *> LogicOpsWorklist; | 
|  | // Instructions matched while folding, to be removed at the end if unused. | 
|  | std::set<Instruction *> PossibleJunk; | 
|  |  | 
|  | // Tries to convert a fcmp instruction. | 
|  | void convertFCmp(CmpInst &I); | 
|  |  | 
|  | // Tries to convert an icmp instruction. | 
|  | void convertICmp(CmpInst &I); | 
|  |  | 
|  | // Tries to convert an i1 and/or/xor instruction, whose both operands | 
|  | // have been already converted. | 
|  | void convertLogicOp(BinaryOperator &I); | 
|  |  | 
|  | // Marks an instruction as converted - adds it to ConvertedInsts and adds | 
|  | // any and/or/xor i1 users to the queue. | 
|  | void converted(Instruction *I, Value *V, int Mask, bool Worthy) { | 
|  | ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy); | 
|  | auto &M = *I->getFunction()->getParent(); | 
|  | auto &Ctx = M.getContext(); | 
|  | for (auto *U : I->users()) { | 
|  | auto *LI = dyn_cast<BinaryOperator>(U); | 
|  | if (LI && LI->getType() == Type::getInt1Ty(Ctx) && | 
|  | (LI->getOpcode() == Instruction::And || | 
|  | LI->getOpcode() == Instruction::Or || | 
|  | LI->getOpcode() == Instruction::Xor)) { | 
|  | LogicOpsWorklist.push_back(LI); | 
|  | } | 
|  | } | 
|  | } | 
|  | }; | 
|  |  | 
|  | } // end anonymous namespace | 
|  |  | 
|  | char SystemZTDCPass::ID = 0; | 
|  | INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc", | 
|  | "SystemZ Test Data Class optimization", false, false) | 
|  |  | 
|  | FunctionPass *llvm::createSystemZTDCPass() { | 
|  | return new SystemZTDCPass(); | 
|  | } | 
|  |  | 
|  | void SystemZTDCPass::convertFCmp(CmpInst &I) { | 
|  | Value *Op0 = I.getOperand(0); | 
|  | auto *Const = dyn_cast<ConstantFP>(I.getOperand(1)); | 
|  | auto Pred = I.getPredicate(); | 
|  | // Only comparisons with consts are interesting. | 
|  | if (!Const) | 
|  | return; | 
|  | // Compute the smallest normal number (and its negation). | 
|  | auto &Sem = Op0->getType()->getFltSemantics(); | 
|  | APFloat Smallest = APFloat::getSmallestNormalized(Sem); | 
|  | APFloat NegSmallest = Smallest; | 
|  | NegSmallest.changeSign(); | 
|  | // Check if Const is one of our recognized consts. | 
|  | int WhichConst; | 
|  | if (Const->isZero()) { | 
|  | // All comparisons with 0 can be converted. | 
|  | WhichConst = 0; | 
|  | } else if (Const->isInfinity()) { | 
|  | // Likewise for infinities. | 
|  | WhichConst = Const->isNegative() ? 2 : 1; | 
|  | } else if (Const->isExactlyValue(Smallest)) { | 
|  | // For Smallest, we cannot do EQ separately from GT. | 
|  | if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE && | 
|  | (Pred & CmpInst::FCMP_OGE) != 0) | 
|  | return; | 
|  | WhichConst = 3; | 
|  | } else if (Const->isExactlyValue(NegSmallest)) { | 
|  | // Likewise for NegSmallest, we cannot do EQ separately from LT. | 
|  | if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE && | 
|  | (Pred & CmpInst::FCMP_OLE) != 0) | 
|  | return; | 
|  | WhichConst = 4; | 
|  | } else { | 
|  | // Not one of our special constants. | 
|  | return; | 
|  | } | 
|  | // Partial masks to use for EQ, GT, LT, UN comparisons, respectively. | 
|  | static const int Masks[][4] = { | 
|  | { // 0 | 
|  | SystemZ::TDCMASK_ZERO,              // eq | 
|  | SystemZ::TDCMASK_POSITIVE,          // gt | 
|  | SystemZ::TDCMASK_NEGATIVE,          // lt | 
|  | SystemZ::TDCMASK_NAN,               // un | 
|  | }, | 
|  | { // inf | 
|  | SystemZ::TDCMASK_INFINITY_PLUS,     // eq | 
|  | 0,                                  // gt | 
|  | (SystemZ::TDCMASK_ZERO | | 
|  | SystemZ::TDCMASK_NEGATIVE | | 
|  | SystemZ::TDCMASK_NORMAL_PLUS | | 
|  | SystemZ::TDCMASK_SUBNORMAL_PLUS),  // lt | 
|  | SystemZ::TDCMASK_NAN,               // un | 
|  | }, | 
|  | { // -inf | 
|  | SystemZ::TDCMASK_INFINITY_MINUS,    // eq | 
|  | (SystemZ::TDCMASK_ZERO | | 
|  | SystemZ::TDCMASK_POSITIVE | | 
|  | SystemZ::TDCMASK_NORMAL_MINUS | | 
|  | SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt | 
|  | 0,                                  // lt | 
|  | SystemZ::TDCMASK_NAN,               // un | 
|  | }, | 
|  | { // minnorm | 
|  | 0,                                  // eq (unsupported) | 
|  | (SystemZ::TDCMASK_NORMAL_PLUS | | 
|  | SystemZ::TDCMASK_INFINITY_PLUS),   // gt (actually ge) | 
|  | (SystemZ::TDCMASK_ZERO | | 
|  | SystemZ::TDCMASK_NEGATIVE | | 
|  | SystemZ::TDCMASK_SUBNORMAL_PLUS),  // lt | 
|  | SystemZ::TDCMASK_NAN,               // un | 
|  | }, | 
|  | { // -minnorm | 
|  | 0,                                  // eq (unsupported) | 
|  | (SystemZ::TDCMASK_ZERO | | 
|  | SystemZ::TDCMASK_POSITIVE | | 
|  | SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt | 
|  | (SystemZ::TDCMASK_NORMAL_MINUS | | 
|  | SystemZ::TDCMASK_INFINITY_MINUS),  // lt (actually le) | 
|  | SystemZ::TDCMASK_NAN,               // un | 
|  | } | 
|  | }; | 
|  | // Construct the mask as a combination of the partial masks. | 
|  | int Mask = 0; | 
|  | if (Pred & CmpInst::FCMP_OEQ) | 
|  | Mask |= Masks[WhichConst][0]; | 
|  | if (Pred & CmpInst::FCMP_OGT) | 
|  | Mask |= Masks[WhichConst][1]; | 
|  | if (Pred & CmpInst::FCMP_OLT) | 
|  | Mask |= Masks[WhichConst][2]; | 
|  | if (Pred & CmpInst::FCMP_UNO) | 
|  | Mask |= Masks[WhichConst][3]; | 
|  | // A lone fcmp is unworthy of tdc conversion on its own, but may become | 
|  | // worthy if combined with fabs. | 
|  | bool Worthy = false; | 
|  | if (CallInst *CI = dyn_cast<CallInst>(Op0)) { | 
|  | Function *F = CI->getCalledFunction(); | 
|  | if (F && F->getIntrinsicID() == Intrinsic::fabs) { | 
|  | // Fold with fabs - adjust the mask appropriately. | 
|  | Mask &= SystemZ::TDCMASK_PLUS; | 
|  | Mask |= Mask >> 1; | 
|  | Op0 = CI->getArgOperand(0); | 
|  | // A combination of fcmp with fabs is a win, unless the constant | 
|  | // involved is 0 (which is handled by later passes). | 
|  | Worthy = WhichConst != 0; | 
|  | PossibleJunk.insert(CI); | 
|  | } | 
|  | } | 
|  | converted(&I, Op0, Mask, Worthy); | 
|  | } | 
|  |  | 
|  | void SystemZTDCPass::convertICmp(CmpInst &I) { | 
|  | Value *Op0 = I.getOperand(0); | 
|  | auto *Const = dyn_cast<ConstantInt>(I.getOperand(1)); | 
|  | auto Pred = I.getPredicate(); | 
|  | // All our icmp rules involve comparisons with consts. | 
|  | if (!Const) | 
|  | return; | 
|  | if (auto *Cast = dyn_cast<BitCastInst>(Op0)) { | 
|  | // Check for icmp+bitcast used for signbit. | 
|  | if (!Cast->getSrcTy()->isFloatTy() && | 
|  | !Cast->getSrcTy()->isDoubleTy() && | 
|  | !Cast->getSrcTy()->isFP128Ty()) | 
|  | return; | 
|  | Value *V = Cast->getOperand(0); | 
|  | int Mask; | 
|  | if (Pred == CmpInst::ICMP_SLT && Const->isZero()) { | 
|  | // icmp slt (bitcast X), 0 - set if sign bit true | 
|  | Mask = SystemZ::TDCMASK_MINUS; | 
|  | } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) { | 
|  | // icmp sgt (bitcast X), -1 - set if sign bit false | 
|  | Mask = SystemZ::TDCMASK_PLUS; | 
|  | } else { | 
|  | // Not a sign bit check. | 
|  | return; | 
|  | } | 
|  | PossibleJunk.insert(Cast); | 
|  | converted(&I, V, Mask, true); | 
|  | } else if (auto *CI = dyn_cast<CallInst>(Op0)) { | 
|  | // Check if this is a pre-existing call of our tdc intrinsic. | 
|  | Function *F = CI->getCalledFunction(); | 
|  | if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc) | 
|  | return; | 
|  | if (!Const->isZero()) | 
|  | return; | 
|  | Value *V = CI->getArgOperand(0); | 
|  | auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); | 
|  | // Bail if the mask is not a constant. | 
|  | if (!MaskC) | 
|  | return; | 
|  | int Mask = MaskC->getZExtValue(); | 
|  | Mask &= SystemZ::TDCMASK_ALL; | 
|  | if (Pred == CmpInst::ICMP_NE) { | 
|  | // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC | 
|  | } else if (Pred == CmpInst::ICMP_EQ) { | 
|  | // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask | 
|  | Mask ^= SystemZ::TDCMASK_ALL; | 
|  | } else { | 
|  | // An unknown comparison - ignore. | 
|  | return; | 
|  | } | 
|  | PossibleJunk.insert(CI); | 
|  | converted(&I, V, Mask, false); | 
|  | } | 
|  | } | 
|  |  | 
|  | void SystemZTDCPass::convertLogicOp(BinaryOperator &I) { | 
|  | Value *Op0, *Op1; | 
|  | int Mask0, Mask1; | 
|  | bool Worthy0, Worthy1; | 
|  | std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))]; | 
|  | std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))]; | 
|  | if (Op0 != Op1) | 
|  | return; | 
|  | int Mask; | 
|  | switch (I.getOpcode()) { | 
|  | case Instruction::And: | 
|  | Mask = Mask0 & Mask1; | 
|  | break; | 
|  | case Instruction::Or: | 
|  | Mask = Mask0 | Mask1; | 
|  | break; | 
|  | case Instruction::Xor: | 
|  | Mask = Mask0 ^ Mask1; | 
|  | break; | 
|  | default: | 
|  | llvm_unreachable("Unknown op in convertLogicOp"); | 
|  | } | 
|  | converted(&I, Op0, Mask, true); | 
|  | } | 
|  |  | 
|  | bool SystemZTDCPass::runOnFunction(Function &F) { | 
|  | auto &TPC = getAnalysis<TargetPassConfig>(); | 
|  | if (TPC.getTM<TargetMachine>() | 
|  | .getSubtarget<SystemZSubtarget>(F) | 
|  | .hasSoftFloat()) | 
|  | return false; | 
|  |  | 
|  | ConvertedInsts.clear(); | 
|  | LogicOpsWorklist.clear(); | 
|  | PossibleJunk.clear(); | 
|  |  | 
|  | // Look for icmp+fcmp instructions. | 
|  | for (auto &I : instructions(F)) { | 
|  | if (I.getOpcode() == Instruction::FCmp) | 
|  | convertFCmp(cast<CmpInst>(I)); | 
|  | else if (I.getOpcode() == Instruction::ICmp) | 
|  | convertICmp(cast<CmpInst>(I)); | 
|  | } | 
|  |  | 
|  | // If none found, bail already. | 
|  | if (ConvertedInsts.empty()) | 
|  | return false; | 
|  |  | 
|  | // Process the queue of logic instructions. | 
|  | while (!LogicOpsWorklist.empty()) { | 
|  | BinaryOperator *Op = LogicOpsWorklist.back(); | 
|  | LogicOpsWorklist.pop_back(); | 
|  | // If both operands mapped, and the instruction itself not yet mapped, | 
|  | // convert it. | 
|  | if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) && | 
|  | ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) && | 
|  | !ConvertedInsts.count(Op)) | 
|  | convertLogicOp(*Op); | 
|  | } | 
|  |  | 
|  | // Time to actually replace the instructions.  Do it in the reverse order | 
|  | // of finding them, since there's a good chance the earlier ones will be | 
|  | // unused (due to being folded into later ones). | 
|  | Module &M = *F.getParent(); | 
|  | auto &Ctx = M.getContext(); | 
|  | Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0); | 
|  | bool MadeChange = false; | 
|  | for (auto &It : reverse(ConvertedInsts)) { | 
|  | Instruction *I = It.first; | 
|  | Value *V; | 
|  | int Mask; | 
|  | bool Worthy; | 
|  | std::tie(V, Mask, Worthy) = It.second; | 
|  | if (!I->user_empty()) { | 
|  | // If used and unworthy of conversion, skip it. | 
|  | if (!Worthy) | 
|  | continue; | 
|  | // Call the intrinsic, compare result with 0. | 
|  | Function *TDCFunc = | 
|  | Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc, V->getType()); | 
|  | IRBuilder<> IRB(I); | 
|  | Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask); | 
|  | Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal}); | 
|  | Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32); | 
|  | I->replaceAllUsesWith(ICmp); | 
|  | } | 
|  | // If unused, or used and converted, remove it. | 
|  | I->eraseFromParent(); | 
|  | MadeChange = true; | 
|  | } | 
|  |  | 
|  | if (!MadeChange) | 
|  | return false; | 
|  |  | 
|  | // We've actually done something - now clear misc accumulated junk (fabs, | 
|  | // bitcast). | 
|  | for (auto *I : PossibleJunk) | 
|  | if (I->user_empty()) | 
|  | I->eraseFromParent(); | 
|  |  | 
|  | return true; | 
|  | } |