llvm/lib/Target/X86/X86FixupLEAs.cpp - llvm-project - Git at Google

 //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file defines the pass that finds instructions that can be
 // re-written as LEA instructions in order to reduce pipeline delays.
 // It replaces LEAs with ADD/INC/DEC when that is better for size/speed.
 //
 //===----------------------------------------------------------------------===//

 #include "X86.h"
 #include "X86InstrInfo.h"
 #include "X86Subtarget.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineSizeOpts.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;

 #define FIXUPLEA_DESC "X86 LEA Fixup"
 #define FIXUPLEA_NAME "x86-fixup-LEAs"

 #define DEBUG_TYPE FIXUPLEA_NAME

 STATISTIC(NumLEAs, "Number of LEA instructions created");

 namespace {
 class FixupLEAPass : public MachineFunctionPass {
   enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };

   /// Given a machine register, look for the instruction
   /// which writes it in the current basic block. If found,
   /// try to replace it with an equivalent LEA instruction.
   /// If replacement succeeds, then also process the newly created
   /// instruction.
   void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,
                     MachineBasicBlock &MBB);

   /// Given a memory access or LEA instruction
   /// whose address mode uses a base and/or index register, look for
   /// an opportunity to replace the instruction which sets the base or index
   /// register with an equivalent LEA instruction.
   void processInstruction(MachineBasicBlock::iterator &I,
                           MachineBasicBlock &MBB);

   /// Given a LEA instruction which is unprofitable
   /// on SlowLEA targets try to replace it with an equivalent ADD instruction.
   void processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
                                     MachineBasicBlock &MBB);

   /// Given a LEA instruction which is unprofitable
   /// on SNB+ try to replace it with other instructions.
   /// According to Intel's Optimization Reference Manual:
   /// " For LEA instructions with three source operands and some specific
   ///   situations, instruction latency has increased to 3 cycles, and must
   ///   dispatch via port 1:
   /// - LEA that has all three source operands: base, index, and offset
   /// - LEA that uses base and index registers where the base is EBP, RBP,
   ///   or R13
   /// - LEA that uses RIP relative addressing mode
   /// - LEA that uses 16-bit addressing mode "
   /// This function currently handles the first 2 cases only.
   void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
                                  MachineBasicBlock &MBB, bool OptIncDec);

   /// Look for LEAs that are really two address LEAs that we might be able to
   /// turn into regular ADD instructions.
   bool optTwoAddrLEA(MachineBasicBlock::iterator &I,
                      MachineBasicBlock &MBB, bool OptIncDec,
                      bool UseLEAForSP) const;

   /// Look for and transform the sequence
   ///     lea (reg1, reg2), reg3
   ///     sub reg3, reg4
   /// to
   ///     sub reg1, reg4
   ///     sub reg2, reg4
   /// It can also optimize the sequence lea/add similarly.
   bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const;

   /// Step forwards in MBB, looking for an ADD/SUB instruction which uses
   /// the dest register of LEA instruction I.
   MachineBasicBlock::iterator searchALUInst(MachineBasicBlock::iterator &I,
                                             MachineBasicBlock &MBB) const;

   /// Check instructions between LeaI and AluI (exclusively).
   /// Set BaseIndexDef to true if base or index register from LeaI is defined.
   /// Set AluDestRef to true if the dest register of AluI is used or defined.
   /// *KilledBase is set to the killed base register usage.
   /// *KilledIndex is set to the killed index register usage.
   void checkRegUsage(MachineBasicBlock::iterator &LeaI,
                      MachineBasicBlock::iterator &AluI, bool &BaseIndexDef,
                      bool &AluDestRef, MachineOperand **KilledBase,
                      MachineOperand **KilledIndex) const;

   /// Determine if an instruction references a machine register
   /// and, if so, whether it reads or writes the register.
   RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);

   /// Step backwards through a basic block, looking
   /// for an instruction which writes a register within
   /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
   MachineBasicBlock::iterator searchBackwards(MachineOperand &p,
                                               MachineBasicBlock::iterator &I,
                                               MachineBasicBlock &MBB);

   /// if an instruction can be converted to an
   /// equivalent LEA, insert the new instruction into the basic block
   /// and return a pointer to it. Otherwise, return zero.
   MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator &MBBI) const;

 public:
   static char ID;

   StringRef getPassName() const override { return FIXUPLEA_DESC; }

   FixupLEAPass() : MachineFunctionPass(ID) { }

   /// Loop over all of the basic blocks,
   /// replacing instructions by equivalent LEA instructions
   /// if needed and when possible.
   bool runOnMachineFunction(MachineFunction &MF) override;

   // This pass runs after regalloc and doesn't support VReg operands.
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
         MachineFunctionProperties::Property::NoVRegs);
   }

   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<ProfileSummaryInfoWrapperPass>();
     AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }

 private:
   TargetSchedModel TSM;
   const X86InstrInfo *TII = nullptr;
   const X86RegisterInfo *TRI = nullptr;
 };
 }

 char FixupLEAPass::ID = 0;

 INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)

 MachineInstr *
 FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator &MBBI) const {
   MachineInstr &MI = *MBBI;
   switch (MI.getOpcode()) {
   case X86::MOV32rr:
   case X86::MOV64rr: {
     const MachineOperand &Src = MI.getOperand(1);
     const MachineOperand &Dest = MI.getOperand(0);
     MachineInstr *NewMI =
         BuildMI(MBB, MBBI, MI.getDebugLoc(),
                 TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r
                                                         : X86::LEA64r))
             .add(Dest)
             .add(Src)
             .addImm(1)
             .addReg(0)
             .addImm(0)
             .addReg(0);
     return NewMI;
   }
   }

   if (!MI.isConvertibleTo3Addr())
     return nullptr;

   switch (MI.getOpcode()) {
   default:
     // Only convert instructions that we've verified are safe.
     return nullptr;
   case X86::ADD64ri32:
   case X86::ADD64ri8:
   case X86::ADD64ri32_DB:
   case X86::ADD64ri8_DB:
   case X86::ADD32ri:
   case X86::ADD32ri8:
   case X86::ADD32ri_DB:
   case X86::ADD32ri8_DB:
     if (!MI.getOperand(2).isImm()) {
       // convertToThreeAddress will call getImm()
       // which requires isImm() to be true
       return nullptr;
     }
     break;
   case X86::SHL64ri:
   case X86::SHL32ri:
   case X86::INC64r:
   case X86::INC32r:
   case X86::DEC64r:
   case X86::DEC32r:
   case X86::ADD64rr:
   case X86::ADD64rr_DB:
   case X86::ADD32rr:
   case X86::ADD32rr_DB:
     // These instructions are all fine to convert.
     break;
   }
   return TII->convertToThreeAddress(MI, nullptr, nullptr);
 }

 FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }

 static bool isLEA(unsigned Opcode) {
   return Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
          Opcode == X86::LEA64_32r;
 }

 bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;

   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
   bool IsSlowLEA = ST.slowLEA();
   bool IsSlow3OpsLEA = ST.slow3OpsLEA();
   bool LEAUsesAG = ST.LEAusesAG();

   bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize();
   bool UseLEAForSP = ST.useLeaForSP();

   TSM.init(&ST);
   TII = ST.getInstrInfo();
   TRI = ST.getRegisterInfo();
   auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
   auto *MBFI = (PSI && PSI->hasProfileSummary())
                    ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
                    : nullptr;

   LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
   for (MachineBasicBlock &MBB : MF) {
     // First pass. Try to remove or optimize existing LEAs.
     bool OptIncDecPerBB =
         OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
     for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
       if (!isLEA(I->getOpcode()))
         continue;

       if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP))
         continue;

       if (IsSlowLEA)
         processInstructionForSlowLEA(I, MBB);
       else if (IsSlow3OpsLEA)
         processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB);
     }

     // Second pass for creating LEAs. This may reverse some of the
     // transformations above.
     if (LEAUsesAG) {
       for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
         processInstruction(I, MBB);
     }
   }

   LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);

   return true;
 }

 FixupLEAPass::RegUsageState
 FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
   RegUsageState RegUsage = RU_NotUsed;
   MachineInstr &MI = *I;

   for (const MachineOperand &MO : MI.operands()) {
     if (MO.isReg() && MO.getReg() == p.getReg()) {
       if (MO.isDef())
         return RU_Write;
       RegUsage = RU_Read;
     }
   }
   return RegUsage;
 }

 /// getPreviousInstr - Given a reference to an instruction in a basic
 /// block, return a reference to the previous instruction in the block,
 /// wrapping around to the last instruction of the block if the block
 /// branches to itself.
 static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
                                     MachineBasicBlock &MBB) {
   if (I == MBB.begin()) {
     if (MBB.isPredecessor(&MBB)) {
       I = --MBB.end();
       return true;
     } else
       return false;
   }
   --I;
   return true;
 }

 MachineBasicBlock::iterator
 FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
                               MachineBasicBlock &MBB) {
   int InstrDistance = 1;
   MachineBasicBlock::iterator CurInst;
   static const int INSTR_DISTANCE_THRESHOLD = 5;

   CurInst = I;
   bool Found;
   Found = getPreviousInstr(CurInst, MBB);
   while (Found && I != CurInst) {
     if (CurInst->isCall() || CurInst->isInlineAsm())
       break;
     if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
       break; // too far back to make a difference
     if (usesRegister(p, CurInst) == RU_Write) {
       return CurInst;
     }
     InstrDistance += TSM.computeInstrLatency(&*CurInst);
     Found = getPreviousInstr(CurInst, MBB);
   }
   return MachineBasicBlock::iterator();
 }

 static inline bool isInefficientLEAReg(unsigned Reg) {
   return Reg == X86::EBP || Reg == X86::RBP ||
          Reg == X86::R13D || Reg == X86::R13;
 }

 /// Returns true if this LEA uses base an index registers, and the base register
 /// is known to be inefficient for the subtarget.
 // TODO: use a variant scheduling class to model the latency profile
 // of LEA instructions, and implement this logic as a scheduling predicate.
 static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
                                             const MachineOperand &Index) {
   return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() &&
          Index.getReg() != X86::NoRegister;
 }

 static inline bool hasLEAOffset(const MachineOperand &Offset) {
   return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal();
 }

 static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {
   switch (LEAOpcode) {
   default:
     llvm_unreachable("Unexpected LEA instruction");
   case X86::LEA32r:
   case X86::LEA64_32r:
     return X86::ADD32rr;
   case X86::LEA64r:
     return X86::ADD64rr;
   }
 }

 static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) {
   switch (LEAOpcode) {
   default:
     llvm_unreachable("Unexpected LEA instruction");
   case X86::LEA32r:
   case X86::LEA64_32r:
     return X86::SUB32rr;
   case X86::LEA64r:
     return X86::SUB64rr;
   }
 }

 static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
                                        const MachineOperand &Offset) {
   bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm());
   switch (LEAOpcode) {
   default:
     llvm_unreachable("Unexpected LEA instruction");
   case X86::LEA32r:
   case X86::LEA64_32r:
     return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri;
   case X86::LEA64r:
     return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32;
   }
 }

 static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {
   switch (LEAOpcode) {
   default:
     llvm_unreachable("Unexpected LEA instruction");
   case X86::LEA32r:
   case X86::LEA64_32r:
     return IsINC ? X86::INC32r : X86::DEC32r;
   case X86::LEA64r:
     return IsINC ? X86::INC64r : X86::DEC64r;
   }
 }

 MachineBasicBlock::iterator
 FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I,
                             MachineBasicBlock &MBB) const {
   const int InstrDistanceThreshold = 5;
   int InstrDistance = 1;
   MachineBasicBlock::iterator CurInst = std::next(I);

   unsigned LEAOpcode = I->getOpcode();
   unsigned AddOpcode = getADDrrFromLEA(LEAOpcode);
   unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode);
   Register DestReg = I->getOperand(0).getReg();

   while (CurInst != MBB.end()) {
     if (CurInst->isCall() || CurInst->isInlineAsm())
       break;
     if (InstrDistance > InstrDistanceThreshold)
       break;

     // Check if the lea dest register is used in an add/sub instruction only.
     for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
       MachineOperand &Opnd = CurInst->getOperand(I);
       if (Opnd.isReg()) {
         if (Opnd.getReg() == DestReg) {
           if (Opnd.isDef() || !Opnd.isKill())
             return MachineBasicBlock::iterator();

           unsigned AluOpcode = CurInst->getOpcode();
           if (AluOpcode != AddOpcode && AluOpcode != SubOpcode)
             return MachineBasicBlock::iterator();

           MachineOperand &Opnd2 = CurInst->getOperand(3 - I);
           MachineOperand AluDest = CurInst->getOperand(0);
           if (Opnd2.getReg() != AluDest.getReg())
             return MachineBasicBlock::iterator();

           // X - (Y + Z) may generate different flags than (X - Y) - Z when
           // there is overflow. So we can't change the alu instruction if the
           // flags register is live.
           if (!CurInst->registerDefIsDead(X86::EFLAGS, TRI))
             return MachineBasicBlock::iterator();

           return CurInst;
         }
         if (TRI->regsOverlap(DestReg, Opnd.getReg()))
           return MachineBasicBlock::iterator();
       }
     }

     InstrDistance++;
     ++CurInst;
   }
   return MachineBasicBlock::iterator();
 }

 void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI,
                                  MachineBasicBlock::iterator &AluI,
                                  bool &BaseIndexDef, bool &AluDestRef,
                                  MachineOperand **KilledBase,
                                  MachineOperand **KilledIndex) const {
   BaseIndexDef = AluDestRef = false;
   *KilledBase = *KilledIndex = nullptr;
   Register BaseReg = LeaI->getOperand(1 + X86::AddrBaseReg).getReg();
   Register IndexReg = LeaI->getOperand(1 + X86::AddrIndexReg).getReg();
   Register AluDestReg = AluI->getOperand(0).getReg();

   MachineBasicBlock::iterator CurInst = std::next(LeaI);
   while (CurInst != AluI) {
     for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
       MachineOperand &Opnd = CurInst->getOperand(I);
       if (!Opnd.isReg())
         continue;
       Register Reg = Opnd.getReg();
       if (TRI->regsOverlap(Reg, AluDestReg))
         AluDestRef = true;
       if (TRI->regsOverlap(Reg, BaseReg)) {
         if (Opnd.isDef())
           BaseIndexDef = true;
         else if (Opnd.isKill())
           *KilledBase = &Opnd;
       }
       if (TRI->regsOverlap(Reg, IndexReg)) {
         if (Opnd.isDef())
           BaseIndexDef = true;
         else if (Opnd.isKill())
           *KilledIndex = &Opnd;
       }
     }
     ++CurInst;
   }
 }

 bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I,
                              MachineBasicBlock &MBB) const {
   // Look for an add/sub instruction which uses the result of lea.
   MachineBasicBlock::iterator AluI = searchALUInst(I, MBB);
   if (AluI == MachineBasicBlock::iterator())
     return false;

   // Check if there are any related register usage between lea and alu.
   bool BaseIndexDef, AluDestRef;
   MachineOperand *KilledBase, *KilledIndex;
   checkRegUsage(I, AluI, BaseIndexDef, AluDestRef, &KilledBase, &KilledIndex);

   MachineBasicBlock::iterator InsertPos = AluI;
   if (BaseIndexDef) {
     if (AluDestRef)
       return false;
     InsertPos = I;
     KilledBase = KilledIndex = nullptr;
   }

   // Check if there are same registers.
   Register AluDestReg = AluI->getOperand(0).getReg();
   Register BaseReg = I->getOperand(1 + X86::AddrBaseReg).getReg();
   Register IndexReg = I->getOperand(1 + X86::AddrIndexReg).getReg();
   if (I->getOpcode() == X86::LEA64_32r) {
     BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
     IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
   }
   if (AluDestReg == IndexReg) {
     if (BaseReg == IndexReg)
       return false;
     std::swap(BaseReg, IndexReg);
     std::swap(KilledBase, KilledIndex);
   }
   if (BaseReg == IndexReg)
     KilledBase = nullptr;

   // Now it's safe to change instructions.
   MachineInstr *NewMI1, *NewMI2;
   unsigned NewOpcode = AluI->getOpcode();
   NewMI1 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
                    AluDestReg)
                .addReg(AluDestReg, RegState::Kill)
                .addReg(BaseReg, KilledBase ? RegState::Kill : 0);
   NewMI1->addRegisterDead(X86::EFLAGS, TRI);
   NewMI2 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
                    AluDestReg)
                .addReg(AluDestReg, RegState::Kill)
                .addReg(IndexReg, KilledIndex ? RegState::Kill : 0);
   NewMI2->addRegisterDead(X86::EFLAGS, TRI);

   // Clear the old Kill flags.
   if (KilledBase)
     KilledBase->setIsKill(false);
   if (KilledIndex)
     KilledIndex->setIsKill(false);

   MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI1, 1);
   MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1);
   MBB.erase(I);
   MBB.erase(AluI);
   I = NewMI1;
   return true;
 }

 bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
                                  MachineBasicBlock &MBB, bool OptIncDec,
                                  bool UseLEAForSP) const {
   MachineInstr &MI = *I;

   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
   const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
   const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
   const MachineOperand &Disp =    MI.getOperand(1 + X86::AddrDisp);
   const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);

   if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 ||
       MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I) !=
           MachineBasicBlock::LQR_Dead)
     return false;

   Register DestReg = MI.getOperand(0).getReg();
   Register BaseReg = Base.getReg();
   Register IndexReg = Index.getReg();

   // Don't change stack adjustment LEAs.
   if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP))
     return false;

   // LEA64_32 has 64-bit operands but 32-bit result.
   if (MI.getOpcode() == X86::LEA64_32r) {
     if (BaseReg != 0)
       BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
     if (IndexReg != 0)
       IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
   }

   MachineInstr *NewMI = nullptr;

   // Case 1.
   // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
   // which can be turned into add %reg2, %reg1
   if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 &&
       (DestReg == BaseReg || DestReg == IndexReg)) {
     unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode());
     if (DestReg != BaseReg)
       std::swap(BaseReg, IndexReg);

     if (MI.getOpcode() == X86::LEA64_32r) {
       // TODO: Do we need the super register implicit use?
       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
         .addReg(BaseReg).addReg(IndexReg)
         .addReg(Base.getReg(), RegState::Implicit)
         .addReg(Index.getReg(), RegState::Implicit);
     } else {
       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
         .addReg(BaseReg).addReg(IndexReg);
     }
   } else if (DestReg == BaseReg && IndexReg == 0) {
     // Case 2.
     // This is an LEA with only a base register and a displacement,
     // We can use ADDri or INC/DEC.

     // Does this LEA have one these forms:
     // lea  %reg, 1(%reg)
     // lea  %reg, -1(%reg)
     if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) {
       bool IsINC = Disp.getImm() == 1;
       unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC);

       if (MI.getOpcode() == X86::LEA64_32r) {
         // TODO: Do we need the super register implicit use?
         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
           .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit);
       } else {
         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
           .addReg(BaseReg);
       }
     } else {
       unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp);
       if (MI.getOpcode() == X86::LEA64_32r) {
         // TODO: Do we need the super register implicit use?
         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
           .addReg(BaseReg).addImm(Disp.getImm())
           .addReg(Base.getReg(), RegState::Implicit);
       } else {
         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
           .addReg(BaseReg).addImm(Disp.getImm());
       }
     }
   } else if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0) {
     // Case 3.
     // Look for and transform the sequence
     //     lea (reg1, reg2), reg3
     //     sub reg3, reg4
     return optLEAALU(I, MBB);
   } else
     return false;

   MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
   MBB.erase(I);
   I = NewMI;
   return true;
 }

 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
                                       MachineBasicBlock &MBB) {
   // Process a load, store, or LEA instruction.
   MachineInstr &MI = *I;
   const MCInstrDesc &Desc = MI.getDesc();
   int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags);
   if (AddrOffset >= 0) {
     AddrOffset += X86II::getOperandBias(Desc);
     MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg);
     if (p.isReg() && p.getReg() != X86::ESP) {
       seekLEAFixup(p, I, MBB);
     }
     MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg);
     if (q.isReg() && q.getReg() != X86::ESP) {
       seekLEAFixup(q, I, MBB);
     }
   }
 }

 void FixupLEAPass::seekLEAFixup(MachineOperand &p,
                                 MachineBasicBlock::iterator &I,
                                 MachineBasicBlock &MBB) {
   MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB);
   if (MBI != MachineBasicBlock::iterator()) {
     MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI);
     if (NewMI) {
       ++NumLEAs;
       LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
       // now to replace with an equivalent LEA...
       LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
       MBB.getParent()->substituteDebugValuesForInst(*MBI, *NewMI, 1);
       MBB.erase(MBI);
       MachineBasicBlock::iterator J =
           static_cast<MachineBasicBlock::iterator>(NewMI);
       processInstruction(J, MBB);
     }
   }
 }

 void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
                                                 MachineBasicBlock &MBB) {
   MachineInstr &MI = *I;
   const unsigned Opcode = MI.getOpcode();

   const MachineOperand &Dst =     MI.getOperand(0);
   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
   const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
   const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
   const MachineOperand &Offset =  MI.getOperand(1 + X86::AddrDisp);
   const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);

   if (Segment.getReg() != 0 || !Offset.isImm() ||
       MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
           MachineBasicBlock::LQR_Dead)
     return;
   const Register DstR = Dst.getReg();
   const Register SrcR1 = Base.getReg();
   const Register SrcR2 = Index.getReg();
   if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))
     return;
   if (Scale.getImm() > 1)
     return;
   LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
   LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
   MachineInstr *NewMI = nullptr;
   // Make ADD instruction for two registers writing to LEA's destination
   if (SrcR1 != 0 && SrcR2 != 0) {
     const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));
     const MachineOperand &Src = SrcR1 == DstR ? Index : Base;
     NewMI =
         BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
     LLVM_DEBUG(NewMI->dump(););
   }
   // Make ADD instruction for immediate
   if (Offset.getImm() != 0) {
     const MCInstrDesc &ADDri =
         TII->get(getADDriFromLEA(Opcode, Offset));
     const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index;
     NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR)
                 .add(SrcR)
                 .addImm(Offset.getImm());
     LLVM_DEBUG(NewMI->dump(););
   }
   if (NewMI) {
     MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
     MBB.erase(I);
     I = NewMI;
   }
 }

 void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
                                              MachineBasicBlock &MBB,
                                              bool OptIncDec) {
   MachineInstr &MI = *I;
   const unsigned LEAOpcode = MI.getOpcode();

   const MachineOperand &Dest =    MI.getOperand(0);
   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
   const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
   const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
   const MachineOperand &Offset =  MI.getOperand(1 + X86::AddrDisp);
   const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);

   if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) ||
       MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
           MachineBasicBlock::LQR_Dead ||
       Segment.getReg() != X86::NoRegister)
     return;

   Register DestReg = Dest.getReg();
   Register BaseReg = Base.getReg();
   Register IndexReg = Index.getReg();

   if (MI.getOpcode() == X86::LEA64_32r) {
     if (BaseReg != 0)
       BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
     if (IndexReg != 0)
       IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
   }

   bool IsScale1 = Scale.getImm() == 1;
   bool IsInefficientBase = isInefficientLEAReg(BaseReg);
   bool IsInefficientIndex = isInefficientLEAReg(IndexReg);

   // Skip these cases since it takes more than 2 instructions
   // to replace the LEA instruction.
   if (IsInefficientBase && DestReg == BaseReg && !IsScale1)
     return;

   LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););
   LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);

   MachineInstr *NewMI = nullptr;

   // First try to replace LEA with one or two (for the 3-op LEA case)
   // add instructions:
   // 1.lea (%base,%index,1), %base => add %index,%base
   // 2.lea (%base,%index,1), %index => add %base,%index
   if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) {
     unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
     if (DestReg != BaseReg)
       std::swap(BaseReg, IndexReg);

     if (MI.getOpcode() == X86::LEA64_32r) {
       // TODO: Do we need the super register implicit use?
       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
                   .addReg(BaseReg)
                   .addReg(IndexReg)
                   .addReg(Base.getReg(), RegState::Implicit)
                   .addReg(Index.getReg(), RegState::Implicit);
     } else {
       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
                   .addReg(BaseReg)
                   .addReg(IndexReg);
     }
   } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
     // If the base is inefficient try switching the index and base operands,
     // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
     // lea offset(%base,%index,scale),%dst =>
     // lea (%base,%index,scale); add offset,%dst
     NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
                 .add(Dest)
                 .add(IsInefficientBase ? Index : Base)
                 .add(Scale)
                 .add(IsInefficientBase ? Base : Index)
                 .addImm(0)
                 .add(Segment);
     LLVM_DEBUG(NewMI->dump(););
   }

   // If either replacement succeeded above, add the offset if needed, then
   // replace the instruction.
   if (NewMI) {
     // Create ADD instruction for the Offset in case of 3-Ops LEA.
     if (hasLEAOffset(Offset)) {
       if (OptIncDec && Offset.isImm() &&
           (Offset.getImm() == 1 || Offset.getImm() == -1)) {
         unsigned NewOpc =
             getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1);
         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
                     .addReg(DestReg);
         LLVM_DEBUG(NewMI->dump(););
       } else {
         unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset);
         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
                     .addReg(DestReg)
                     .add(Offset);
         LLVM_DEBUG(NewMI->dump(););
       }
     }

     MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
     MBB.erase(I);
     I = NewMI;
     return;
   }

   // Handle the rest of the cases with inefficient base register:
   assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!");
   assert(IsInefficientBase && "efficient base should be handled already!");

   // FIXME: Handle LEA64_32r.
   if (LEAOpcode == X86::LEA64_32r)
     return;

   // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
   if (IsScale1 && !hasLEAOffset(Offset)) {
     bool BIK = Base.isKill() && BaseReg != IndexReg;
     TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK);
     LLVM_DEBUG(MI.getPrevNode()->dump(););

     unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
     NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
                 .addReg(DestReg)
                 .add(Index);
     LLVM_DEBUG(NewMI->dump(););

     MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
     MBB.erase(I);
     I = NewMI;
     return;
   }

   // lea offset(%base,%index,scale), %dst =>
   // lea offset( ,%index,scale), %dst; add %base,%dst
   NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
               .add(Dest)
               .addReg(0)
               .add(Scale)
               .add(Index)
               .add(Offset)
               .add(Segment);
   LLVM_DEBUG(NewMI->dump(););

   unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
   NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
               .addReg(DestReg)
               .add(Base);
   LLVM_DEBUG(NewMI->dump(););

   MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
   MBB.erase(I);
   I = NewMI;
 }