lib/Target/ARM/Thumb2SizeReduction.cpp - llvm - Git at Google

 //===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//

 #define DEBUG_TYPE "t2-reduce-size"
 #include "ARM.h"
 #include "ARMAddressingModes.h"
 #include "ARMBaseRegisterInfo.h"
 #include "ARMBaseInstrInfo.h"
 #include "Thumb2InstrInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;

 STATISTIC(NumNarrows,  "Number of 32-bit instrs reduced to 16-bit ones");
 STATISTIC(Num2Addrs,   "Number of 32-bit instrs reduced to 2addr 16-bit ones");
 STATISTIC(NumLdSts,    "Number of 32-bit load / store reduced to 16-bit ones");

 static cl::opt<int> ReduceLimit("t2-reduce-limit",
                                 cl::init(-1), cl::Hidden);
 static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
                                      cl::init(-1), cl::Hidden);
 static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
                                      cl::init(-1), cl::Hidden);

 namespace {
   /// ReduceTable - A static table with information on mapping from wide
   /// opcodes to narrow
   struct ReduceEntry {
     unsigned WideOpc;      // Wide opcode
     unsigned NarrowOpc1;   // Narrow opcode to transform to
     unsigned NarrowOpc2;   // Narrow opcode when it's two-address
     uint8_t  Imm1Limit;    // Limit of immediate field (bits)
     uint8_t  Imm2Limit;    // Limit of immediate field when it's two-address
     unsigned LowRegs1 : 1; // Only possible if low-registers are used
     unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
     unsigned PredCC1  : 2; // 0 - If predicated, cc is on and vice versa.
                            // 1 - No cc field.
                            // 2 - Always set CPSR.
     unsigned PredCC2  : 2;
     unsigned Special  : 1; // Needs to be dealt with specially
   };

   static const ReduceEntry ReduceTable[] = {
     // Wide,        Narrow1,      Narrow2,     imm1,imm2,  lo1, lo2, P/C, S
     { ARM::t2ADCrr, 0,            ARM::tADC,     0,   0,    0,   1,  0,0, 0 },
     { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  0,0, 0 },
     { ARM::t2ADDrr, ARM::tADDrr,  ARM::tADDhirr, 0,   0,    1,   0,  0,1, 0 },
     // Note: immediate scale is 4.
     { ARM::t2ADDrSPi,ARM::tADDrSPi,0,            8,   0,    1,   0,  1,0, 0 },
     { ARM::t2ADDSri,ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  2,2, 1 },
     { ARM::t2ADDSrr,ARM::tADDrr,  0,             0,   0,    1,   0,  2,0, 1 },
     { ARM::t2ANDrr, 0,            ARM::tAND,     0,   0,    0,   1,  0,0, 0 },
     { ARM::t2ASRri, ARM::tASRri,  0,             5,   0,    1,   0,  0,0, 0 },
     { ARM::t2ASRrr, 0,            ARM::tASRrr,   0,   0,    0,   1,  0,0, 0 },
     { ARM::t2BICrr, 0,            ARM::tBIC,     0,   0,    0,   1,  0,0, 0 },
     { ARM::t2CMNrr, ARM::tCMN,    0,             0,   0,    1,   0,  2,0, 0 },
     { ARM::t2CMPri, ARM::tCMPi8,  0,             8,   0,    1,   0,  2,0, 0 },
     { ARM::t2CMPrr, ARM::tCMPhir, 0,             0,   0,    0,   0,  2,0, 0 },
     { ARM::t2CMPzri,ARM::tCMPzi8, 0,             8,   0,    1,   0,  2,0, 0 },
     { ARM::t2CMPzrr,ARM::tCMPzhir,0,             0,   0,    0,   0,  2,0, 0 },
     { ARM::t2EORrr, 0,            ARM::tEOR,     0,   0,    0,   1,  0,0, 0 },
     // FIXME: adr.n immediate offset must be multiple of 4.
     //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0,     0,   0,    1,   0,  1,0, 0 },
     { ARM::t2LSLri, ARM::tLSLri,  0,             5,   0,    1,   0,  0,0, 0 },
     { ARM::t2LSLrr, 0,            ARM::tLSLrr,   0,   0,    0,   1,  0,0, 0 },
     { ARM::t2LSRri, ARM::tLSRri,  0,             5,   0,    1,   0,  0,0, 0 },
     { ARM::t2LSRrr, 0,            ARM::tLSRrr,   0,   0,    0,   1,  0,0, 0 },
     { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0 },
     // FIXME: Do we need the 16-bit 'S' variant?
     { ARM::t2MOVr,ARM::tMOVgpr2gpr,0,            0,   0,    0,   0,  1,0, 0 },
     { ARM::t2MOVCCr,0,            ARM::tMOVCCr,  0,   0,    0,   0,  0,1, 0 },
     { ARM::t2MOVCCi,0,            ARM::tMOVCCi,  0,   8,    0,   0,  0,1, 0 },
     { ARM::t2MUL,   0,            ARM::tMUL,     0,   0,    0,   1,  0,0, 0 },
     { ARM::t2MVNr,  ARM::tMVN,    0,             0,   0,    1,   0,  0,0, 0 },
     { ARM::t2ORRrr, 0,            ARM::tORR,     0,   0,    0,   1,  0,0, 0 },
     { ARM::t2REV,   ARM::tREV,    0,             0,   0,    1,   0,  1,0, 0 },
     { ARM::t2REV16, ARM::tREV16,  0,             0,   0,    1,   0,  1,0, 0 },
     { ARM::t2REVSH, ARM::tREVSH,  0,             0,   0,    1,   0,  1,0, 0 },
     { ARM::t2RORrr, 0,            ARM::tROR,     0,   0,    0,   1,  0,0, 0 },
     { ARM::t2RSBri, ARM::tRSB,    0,             0,   0,    1,   0,  0,0, 1 },
     { ARM::t2RSBSri,ARM::tRSB,    0,             0,   0,    1,   0,  2,0, 1 },
     { ARM::t2SBCrr, 0,            ARM::tSBC,     0,   0,    0,   1,  0,0, 0 },
     { ARM::t2SUBri, ARM::tSUBi3,  ARM::tSUBi8,   3,   8,    1,   1,  0,0, 0 },
     { ARM::t2SUBrr, ARM::tSUBrr,  0,             0,   0,    1,   0,  0,0, 0 },
     { ARM::t2SUBSri,ARM::tSUBi3,  ARM::tSUBi8,   3,   8,    1,   1,  2,2, 0 },
     { ARM::t2SUBSrr,ARM::tSUBrr,  0,             0,   0,    1,   0,  2,0, 0 },
     { ARM::t2SXTBr, ARM::tSXTB,   0,             0,   0,    1,   0,  1,0, 0 },
     { ARM::t2SXTHr, ARM::tSXTH,   0,             0,   0,    1,   0,  1,0, 0 },
     { ARM::t2TSTrr, ARM::tTST,    0,             0,   0,    1,   0,  2,0, 0 },
     { ARM::t2UXTBr, ARM::tUXTB,   0,             0,   0,    1,   0,  1,0, 0 },
     { ARM::t2UXTHr, ARM::tUXTH,   0,             0,   0,    1,   0,  1,0, 0 },

     // FIXME: Clean this up after splitting each Thumb load / store opcode
     // into multiple ones.
     { ARM::t2LDRi12,ARM::tLDR,    0,             5,   0,    1,   0,  0,0, 1 },
     { ARM::t2LDRs,  ARM::tLDR,    0,             0,   0,    1,   0,  0,0, 1 },
     { ARM::t2LDRBi12,ARM::tLDRB,  0,             5,   0,    1,   0,  0,0, 1 },
     { ARM::t2LDRBs, ARM::tLDRB,   0,             0,   0,    1,   0,  0,0, 1 },
     { ARM::t2LDRHi12,ARM::tLDRH,  0,             5,   0,    1,   0,  0,0, 1 },
     { ARM::t2LDRHs, ARM::tLDRH,   0,             0,   0,    1,   0,  0,0, 1 },
     { ARM::t2LDRSBs,ARM::tLDRSB,  0,             0,   0,    1,   0,  0,0, 1 },
     { ARM::t2LDRSHs,ARM::tLDRSH,  0,             0,   0,    1,   0,  0,0, 1 },
     { ARM::t2STRi12,ARM::tSTR,    0,             5,   0,    1,   0,  0,0, 1 },
     { ARM::t2STRs,  ARM::tSTR,    0,             0,   0,    1,   0,  0,0, 1 },
     { ARM::t2STRBi12,ARM::tSTRB,  0,             5,   0,    1,   0,  0,0, 1 },
     { ARM::t2STRBs, ARM::tSTRB,   0,             0,   0,    1,   0,  0,0, 1 },
     { ARM::t2STRHi12,ARM::tSTRH,  0,             5,   0,    1,   0,  0,0, 1 },
     { ARM::t2STRHs, ARM::tSTRH,   0,             0,   0,    1,   0,  0,0, 1 },

     { ARM::t2LDM_RET,0,           ARM::tPOP_RET, 0,   0,    1,   1,  1,1, 1 },
     { ARM::t2LDM,   ARM::tLDM,    ARM::tPOP,     0,   0,    1,   1,  1,1, 1 },
     { ARM::t2STM,   ARM::tSTM,    ARM::tPUSH,    0,   0,    1,   1,  1,1, 1 },
   };

   class VISIBILITY_HIDDEN Thumb2SizeReduce : public MachineFunctionPass {
   public:
     static char ID;
     Thumb2SizeReduce();

     const Thumb2InstrInfo *TII;

     virtual bool runOnMachineFunction(MachineFunction &MF);

     virtual const char *getPassName() const {
       return "Thumb2 instruction size reduction pass";
     }

   private:
     /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
     DenseMap<unsigned, unsigned> ReduceOpcodeMap;

     bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
                          bool is2Addr, ARMCC::CondCodes Pred,
                          bool LiveCPSR, bool &HasCC, bool &CCDead);

     bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
                          const ReduceEntry &Entry);

     bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
                        const ReduceEntry &Entry, bool LiveCPSR);

     /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
     /// instruction.
     bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
                        const ReduceEntry &Entry,
                        bool LiveCPSR);

     /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
     /// non-two-address instruction.
     bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
                         const ReduceEntry &Entry,
                         bool LiveCPSR);

     /// ReduceMBB - Reduce width of instructions in the specified basic block.
     bool ReduceMBB(MachineBasicBlock &MBB);
   };
   char Thumb2SizeReduce::ID = 0;
 }

 Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(&ID) {
   for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
     unsigned FromOpc = ReduceTable[i].WideOpc;
     if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
       assert(false && "Duplicated entries?");
   }
 }

 static bool HasImplicitCPSRDef(const TargetInstrDesc &TID) {
   for (const unsigned *Regs = TID.ImplicitDefs; *Regs; ++Regs)
     if (*Regs == ARM::CPSR)
       return true;
   return false;
 }

 bool
 Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
                                   bool is2Addr, ARMCC::CondCodes Pred,
                                   bool LiveCPSR, bool &HasCC, bool &CCDead) {
   if ((is2Addr  && Entry.PredCC2 == 0) ||
       (!is2Addr && Entry.PredCC1 == 0)) {
     if (Pred == ARMCC::AL) {
       // Not predicated, must set CPSR.
       if (!HasCC) {
         // Original instruction was not setting CPSR, but CPSR is not
         // currently live anyway. It's ok to set it. The CPSR def is
         // dead though.
         if (!LiveCPSR) {
           HasCC = true;
           CCDead = true;
           return true;
         }
         return false;
       }
     } else {
       // Predicated, must not set CPSR.
       if (HasCC)
         return false;
     }
   } else if ((is2Addr  && Entry.PredCC2 == 2) ||
              (!is2Addr && Entry.PredCC1 == 2)) {
     /// Old opcode has an optional def of CPSR.
     if (HasCC)
       return true;
     // If both old opcode does not implicit CPSR def, then it's not ok since
     // these new opcodes CPSR def is not meant to be thrown away. e.g. CMP.
     if (!HasImplicitCPSRDef(MI->getDesc()))
       return false;
     HasCC = true;
   } else {
     // 16-bit instruction does not set CPSR.
     if (HasCC)
       return false;
   }

   return true;
 }

 static bool VerifyLowRegs(MachineInstr *MI) {
   unsigned Opc = MI->getOpcode();
   bool isPCOk = (Opc == ARM::t2LDM_RET) || (Opc == ARM::t2LDM);
   bool isLROk = (Opc == ARM::t2STM);
   bool isSPOk = isPCOk || isLROk || (Opc == ARM::t2ADDrSPi);
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg() || MO.isImplicit())
       continue;
     unsigned Reg = MO.getReg();
     if (Reg == 0 || Reg == ARM::CPSR)
       continue;
     if (isPCOk && Reg == ARM::PC)
       continue;
     if (isLROk && Reg == ARM::LR)
       continue;
     if (isSPOk && Reg == ARM::SP)
       continue;
     if (!isARMLowRegister(Reg))
       return false;
   }
   return true;
 }

 bool
 Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
                                   const ReduceEntry &Entry) {
   if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
     return false;

   unsigned Scale = 1;
   bool HasImmOffset = false;
   bool HasShift = false;
   bool isLdStMul = false;
   bool isPopPush = false;
   unsigned Opc = Entry.NarrowOpc1;
   unsigned OpNum = 3; // First 'rest' of operands.
   switch (Entry.WideOpc) {
   default:
     llvm_unreachable("Unexpected Thumb2 load / store opcode!");
   case ARM::t2LDRi12:
   case ARM::t2STRi12:
     Scale = 4;
     HasImmOffset = true;
     break;
   case ARM::t2LDRBi12:
   case ARM::t2STRBi12:
     HasImmOffset = true;
     break;
   case ARM::t2LDRHi12:
   case ARM::t2STRHi12:
     Scale = 2;
     HasImmOffset = true;
     break;
   case ARM::t2LDRs:
   case ARM::t2LDRBs:
   case ARM::t2LDRHs:
   case ARM::t2LDRSBs:
   case ARM::t2LDRSHs:
   case ARM::t2STRs:
   case ARM::t2STRBs:
   case ARM::t2STRHs:
     HasShift = true;
     OpNum = 4;
     break;
   case ARM::t2LDM_RET:
   case ARM::t2LDM:
   case ARM::t2STM: {
     OpNum = 0;
     unsigned BaseReg = MI->getOperand(0).getReg();
     unsigned Mode = MI->getOperand(1).getImm();
     if (BaseReg == ARM::SP && ARM_AM::getAM4WBFlag(Mode)) {
       Opc = Entry.NarrowOpc2;
       isPopPush = true;
       OpNum = 2;
     } else if (Entry.WideOpc == ARM::t2LDM_RET ||
                !isARMLowRegister(BaseReg) ||
                !ARM_AM::getAM4WBFlag(Mode) ||
                ARM_AM::getAM4SubMode(Mode) != ARM_AM::ia) {
       return false;
     }
     isLdStMul = true;
     break;
   }
   }

   unsigned OffsetReg = 0;
   bool OffsetKill = false;
   if (HasShift) {
     OffsetReg  = MI->getOperand(2).getReg();
     OffsetKill = MI->getOperand(2).isKill();
     if (MI->getOperand(3).getImm())
       // Thumb1 addressing mode doesn't support shift.
       return false;
   }

   unsigned OffsetImm = 0;
   if (HasImmOffset) {
     OffsetImm = MI->getOperand(2).getImm();
     unsigned MaxOffset = ((1 << Entry.Imm1Limit) - 1) * Scale;
     if ((OffsetImm & (Scale-1)) || OffsetImm > MaxOffset)
       // Make sure the immediate field fits.
       return false;
   }

   // Add the 16-bit load / store instruction.
   // FIXME: Thumb1 addressing mode encode both immediate and register offset.
   DebugLoc dl = MI->getDebugLoc();
   MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc));
   if (!isLdStMul) {
     MIB.addOperand(MI->getOperand(0)).addOperand(MI->getOperand(1));
     if (Entry.NarrowOpc1 != ARM::tLDRSB && Entry.NarrowOpc1 != ARM::tLDRSH) {
       // tLDRSB and tLDRSH do not have an immediate offset field. On the other
       // hand, it must have an offset register.
       // FIXME: Remove this special case.
       MIB.addImm(OffsetImm/Scale);
     }
     assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");

     MIB.addReg(OffsetReg, getKillRegState(OffsetKill));
   }

   // Transfer the rest of operands.
   for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
     MIB.addOperand(MI->getOperand(OpNum));

   DOUT << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB;

   MBB.erase(MI);
   ++NumLdSts;
   return true;
 }

 bool
 Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
                                 const ReduceEntry &Entry,
                                 bool LiveCPSR) {
   if (Entry.LowRegs1 && !VerifyLowRegs(MI))
     return false;

   const TargetInstrDesc &TID = MI->getDesc();
   if (TID.mayLoad() || TID.mayStore())
     return ReduceLoadStore(MBB, MI, Entry);

   unsigned Opc = MI->getOpcode();
   switch (Opc) {
   default: break;
   case ARM::t2ADDSri:
   case ARM::t2ADDSrr: {
     unsigned PredReg = 0;
     if (getInstrPredicate(MI, PredReg) == ARMCC::AL) {
       switch (Opc) {
       default: break;
       case ARM::t2ADDSri: {
         if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR))
           return true;
         // fallthrough
       }
       case ARM::t2ADDSrr:
         return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
       }
     }
     break;
   }
   case ARM::t2RSBri:
   case ARM::t2RSBSri:
     if (MI->getOperand(2).getImm() == 0)
       return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
     break;
   }
   return false;
 }

 bool
 Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
                                 const ReduceEntry &Entry,
                                 bool LiveCPSR) {

   if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
     return false;

   const TargetInstrDesc &TID = MI->getDesc();
   unsigned Reg0 = MI->getOperand(0).getReg();
   unsigned Reg1 = MI->getOperand(1).getReg();
   if (Reg0 != Reg1)
     return false;
   if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
     return false;
   if (Entry.Imm2Limit) {
     unsigned Imm = MI->getOperand(2).getImm();
     unsigned Limit = (1 << Entry.Imm2Limit) - 1;
     if (Imm > Limit)
       return false;
   } else {
     unsigned Reg2 = MI->getOperand(2).getReg();
     if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
       return false;
   }

   // Check if it's possible / necessary to transfer the predicate.
   const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc2);
   unsigned PredReg = 0;
   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
   bool SkipPred = false;
   if (Pred != ARMCC::AL) {
     if (!NewTID.isPredicable())
       // Can't transfer predicate, fail.
       return false;
   } else {
     SkipPred = !NewTID.isPredicable();
   }

   bool HasCC = false;
   bool CCDead = false;
   if (TID.hasOptionalDef()) {
     unsigned NumOps = TID.getNumOperands();
     HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
     if (HasCC && MI->getOperand(NumOps-1).isDead())
       CCDead = true;
   }
   if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
     return false;

   // Add the 16-bit instruction.
   DebugLoc dl = MI->getDebugLoc();
   MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID);
   MIB.addOperand(MI->getOperand(0));
   if (NewTID.hasOptionalDef()) {
     if (HasCC)
       AddDefaultT1CC(MIB, CCDead);
     else
       AddNoT1CC(MIB);
   }

   // Transfer the rest of operands.
   unsigned NumOps = TID.getNumOperands();
   for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
     if (i < NumOps && TID.OpInfo[i].isOptionalDef())
       continue;
     if (SkipPred && TID.OpInfo[i].isPredicate())
       continue;
     MIB.addOperand(MI->getOperand(i));
   }

   DOUT << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB;

   MBB.erase(MI);
   ++Num2Addrs;
   return true;
 }

 bool
 Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
                                  const ReduceEntry &Entry,
                                  bool LiveCPSR) {
   if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
     return false;

   unsigned Limit = ~0U;
   unsigned Scale = (Entry.WideOpc == ARM::t2ADDrSPi) ? 4 : 1;
   if (Entry.Imm1Limit)
     Limit = ((1 << Entry.Imm1Limit) - 1) * Scale;

   const TargetInstrDesc &TID = MI->getDesc();
   for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
     if (TID.OpInfo[i].isPredicate())
       continue;
     const MachineOperand &MO = MI->getOperand(i);
     if (MO.isReg()) {
       unsigned Reg = MO.getReg();
       if (!Reg || Reg == ARM::CPSR)
         continue;
       if (Entry.WideOpc == ARM::t2ADDrSPi && Reg == ARM::SP)
         continue;
       if (Entry.LowRegs1 && !isARMLowRegister(Reg))
         return false;
     } else if (MO.isImm() &&
                !TID.OpInfo[i].isPredicate()) {
       if (MO.getImm() > Limit || (MO.getImm() & (Scale-1)) != 0)
         return false;
     }
   }

   // Check if it's possible / necessary to transfer the predicate.
   const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc1);
   unsigned PredReg = 0;
   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
   bool SkipPred = false;
   if (Pred != ARMCC::AL) {
     if (!NewTID.isPredicable())
       // Can't transfer predicate, fail.
       return false;
   } else {
     SkipPred = !NewTID.isPredicable();
   }

   bool HasCC = false;
   bool CCDead = false;
   if (TID.hasOptionalDef()) {
     unsigned NumOps = TID.getNumOperands();
     HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
     if (HasCC && MI->getOperand(NumOps-1).isDead())
       CCDead = true;
   }
   if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
     return false;

   // Add the 16-bit instruction.
   DebugLoc dl = MI->getDebugLoc();
   MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID);
   MIB.addOperand(MI->getOperand(0));
   if (NewTID.hasOptionalDef()) {
     if (HasCC)
       AddDefaultT1CC(MIB, CCDead);
     else
       AddNoT1CC(MIB);
   }

   // Transfer the rest of operands.
   unsigned NumOps = TID.getNumOperands();
   for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
     if (i < NumOps && TID.OpInfo[i].isOptionalDef())
       continue;
     if ((TID.getOpcode() == ARM::t2RSBSri ||
          TID.getOpcode() == ARM::t2RSBri) && i == 2)
       // Skip the zero immediate operand, it's now implicit.
       continue;
     bool isPred = (i < NumOps && TID.OpInfo[i].isPredicate());
     if (SkipPred && isPred)
         continue;
     const MachineOperand &MO = MI->getOperand(i);
     if (Scale > 1 && !isPred && MO.isImm())
       MIB.addImm(MO.getImm() / Scale);
     else {
       if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
         // Skip implicit def of CPSR. Either it's modeled as an optional
         // def now or it's already an implicit def on the new instruction.
         continue;
       MIB.addOperand(MO);
     }
   }
   if (!TID.isPredicable() && NewTID.isPredicable())
     AddDefaultPred(MIB);

   DOUT << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB;

   MBB.erase(MI);
   ++NumNarrows;
   return true;
 }

 static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR) {
   bool HasDef = false;
   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || MO.isUndef() || MO.isUse())
       continue;
     if (MO.getReg() != ARM::CPSR)
       continue;
     if (!MO.isDead())
       HasDef = true;
   }

   return HasDef || LiveCPSR;
 }

 static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || MO.isUndef() || MO.isDef())
       continue;
     if (MO.getReg() != ARM::CPSR)
       continue;
     assert(LiveCPSR && "CPSR liveness tracking is wrong!");
     if (MO.isKill()) {
       LiveCPSR = false;
       break;
     }
   }

   return LiveCPSR;
 }

 bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
   bool Modified = false;

   bool LiveCPSR = false;
   // Yes, CPSR could be livein.
   for (MachineBasicBlock::const_livein_iterator I = MBB.livein_begin(),
          E = MBB.livein_end(); I != E; ++I) {
     if (*I == ARM::CPSR) {
       LiveCPSR = true;
       break;
     }
   }

   MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
   MachineBasicBlock::iterator NextMII;
   for (; MII != E; MII = NextMII) {
     NextMII = next(MII);

     MachineInstr *MI = &*MII;
     LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);

     unsigned Opcode = MI->getOpcode();
     DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
     if (OPI != ReduceOpcodeMap.end()) {
       const ReduceEntry &Entry = ReduceTable[OPI->second];
       // Ignore "special" cases for now.
       if (Entry.Special) {
         if (ReduceSpecial(MBB, MI, Entry, LiveCPSR)) {
           Modified = true;
           MachineBasicBlock::iterator I = prior(NextMII);
           MI = &*I;
         }
         goto ProcessNext;
       }

       // Try to transform to a 16-bit two-address instruction.
       if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) {
         Modified = true;
         MachineBasicBlock::iterator I = prior(NextMII);
         MI = &*I;
         goto ProcessNext;
       }

       // Try to transform ro a 16-bit non-two-address instruction.
       if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR)) {
         Modified = true;
         MachineBasicBlock::iterator I = prior(NextMII);
         MI = &*I;
       }
     }

   ProcessNext:
     LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR);
   }

   return Modified;
 }

 bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
   const TargetMachine &TM = MF.getTarget();
   TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());

   bool Modified = false;
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
     Modified |= ReduceMBB(*I);
   return Modified;
 }

 /// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
 /// reduction pass.
 FunctionPass *llvm::createThumb2SizeReductionPass() {
   return new Thumb2SizeReduce();
 }
	//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -- C++ --=//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//

	#define DEBUG_TYPE "t2-reduce-size"
	#include "ARM.h"
	#include "ARMAddressingModes.h"
	#include "ARMBaseRegisterInfo.h"
	#include "ARMBaseInstrInfo.h"
	#include "Thumb2InstrInfo.h"
	#include "llvm/CodeGen/MachineInstr.h"
	#include "llvm/CodeGen/MachineInstrBuilder.h"
	#include "llvm/CodeGen/MachineFunctionPass.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/Statistic.h"
	using namespace llvm;

	STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones");
	STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones");
	STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones");

	static cl::opt<int> ReduceLimit("t2-reduce-limit",
	cl::init(-1), cl::Hidden);
	static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
	cl::init(-1), cl::Hidden);
	static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
	cl::init(-1), cl::Hidden);

	namespace {
	/// ReduceTable - A static table with information on mapping from wide
	/// opcodes to narrow
	struct ReduceEntry {
	unsigned WideOpc; // Wide opcode
	unsigned NarrowOpc1; // Narrow opcode to transform to
	unsigned NarrowOpc2; // Narrow opcode when it's two-address
	uint8_t Imm1Limit; // Limit of immediate field (bits)
	uint8_t Imm2Limit; // Limit of immediate field when it's two-address
	unsigned LowRegs1 : 1; // Only possible if low-registers are used
	unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
	unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa.
	// 1 - No cc field.
	// 2 - Always set CPSR.
	unsigned PredCC2 : 2;
	unsigned Special : 1; // Needs to be dealt with specially
	};

	static const ReduceEntry ReduceTable[] = {
	// Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, S
	{ ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0 },
	{ ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0 },
	{ ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0 },
	// Note: immediate scale is 4.
	{ ARM::t2ADDrSPi,ARM::tADDrSPi,0, 8, 0, 1, 0, 1,0, 0 },
	{ ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 1 },
	{ ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 1 },
	{ ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 0 },
	{ ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 0 },
	{ ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 0 },
	{ ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 0 },
	{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0 },
	{ ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0 },
	{ ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0 },
	{ ARM::t2CMPzri,ARM::tCMPzi8, 0, 8, 0, 1, 0, 2,0, 0 },
	{ ARM::t2CMPzrr,ARM::tCMPzhir,0, 0, 0, 0, 0, 2,0, 0 },
	{ ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 0 },
	// FIXME: adr.n immediate offset must be multiple of 4.
	//{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0 },
	{ ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 0 },
	{ ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 0 },
	{ ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 0 },
	{ ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 0 },
	{ ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 },
	// FIXME: Do we need the 16-bit 'S' variant?
	{ ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0 },
	{ ARM::t2MOVCCr,0, ARM::tMOVCCr, 0, 0, 0, 0, 0,1, 0 },
	{ ARM::t2MOVCCi,0, ARM::tMOVCCi, 0, 8, 0, 0, 0,1, 0 },
	{ ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 0 },
	{ ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0 },
	{ ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 0 },
	{ ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0 },
	{ ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0 },
	{ ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0 },
	{ ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 0 },
	{ ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 1 },
	{ ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 1 },
	{ ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0 },
	{ ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0 },
	{ ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0 },
	{ ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0 },
	{ ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0 },
	{ ARM::t2SXTBr, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0 },
	{ ARM::t2SXTHr, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0 },
	{ ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0 },
	{ ARM::t2UXTBr, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0 },
	{ ARM::t2UXTHr, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0 },

	// FIXME: Clean this up after splitting each Thumb load / store opcode
	// into multiple ones.
	{ ARM::t2LDRi12,ARM::tLDR, 0, 5, 0, 1, 0, 0,0, 1 },
	{ ARM::t2LDRs, ARM::tLDR, 0, 0, 0, 1, 0, 0,0, 1 },
	{ ARM::t2LDRBi12,ARM::tLDRB, 0, 5, 0, 1, 0, 0,0, 1 },
	{ ARM::t2LDRBs, ARM::tLDRB, 0, 0, 0, 1, 0, 0,0, 1 },
	{ ARM::t2LDRHi12,ARM::tLDRH, 0, 5, 0, 1, 0, 0,0, 1 },
	{ ARM::t2LDRHs, ARM::tLDRH, 0, 0, 0, 1, 0, 0,0, 1 },
	{ ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 1 },
	{ ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 1 },
	{ ARM::t2STRi12,ARM::tSTR, 0, 5, 0, 1, 0, 0,0, 1 },
	{ ARM::t2STRs, ARM::tSTR, 0, 0, 0, 1, 0, 0,0, 1 },
	{ ARM::t2STRBi12,ARM::tSTRB, 0, 5, 0, 1, 0, 0,0, 1 },
	{ ARM::t2STRBs, ARM::tSTRB, 0, 0, 0, 1, 0, 0,0, 1 },
	{ ARM::t2STRHi12,ARM::tSTRH, 0, 5, 0, 1, 0, 0,0, 1 },
	{ ARM::t2STRHs, ARM::tSTRH, 0, 0, 0, 1, 0, 0,0, 1 },

	{ ARM::t2LDM_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 1 },
	{ ARM::t2LDM, ARM::tLDM, ARM::tPOP, 0, 0, 1, 1, 1,1, 1 },
	{ ARM::t2STM, ARM::tSTM, ARM::tPUSH, 0, 0, 1, 1, 1,1, 1 },
	};

	class VISIBILITY_HIDDEN Thumb2SizeReduce : public MachineFunctionPass {
	public:
	static char ID;
	Thumb2SizeReduce();

	const Thumb2InstrInfo *TII;

	virtual bool runOnMachineFunction(MachineFunction &MF);

	virtual const char *getPassName() const {
	return "Thumb2 instruction size reduction pass";
	}

	private:
	/// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
	DenseMap<unsigned, unsigned> ReduceOpcodeMap;

	bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
	bool is2Addr, ARMCC::CondCodes Pred,
	bool LiveCPSR, bool &HasCC, bool &CCDead);

	bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
	const ReduceEntry &Entry);

	bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
	const ReduceEntry &Entry, bool LiveCPSR);

	/// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
	/// instruction.
	bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
	const ReduceEntry &Entry,
	bool LiveCPSR);

	/// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
	/// non-two-address instruction.
	bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
	const ReduceEntry &Entry,
	bool LiveCPSR);

	/// ReduceMBB - Reduce width of instructions in the specified basic block.
	bool ReduceMBB(MachineBasicBlock &MBB);
	};
	char Thumb2SizeReduce::ID = 0;
	}

	Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(&ID) {
	for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
	unsigned FromOpc = ReduceTable[i].WideOpc;
	if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
	assert(false && "Duplicated entries?");
	}
	}

	static bool HasImplicitCPSRDef(const TargetInstrDesc &TID) {
	for (const unsigned Regs = TID.ImplicitDefs; Regs; ++Regs)
	if (*Regs == ARM::CPSR)
	return true;
	return false;
	}

	bool
	Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
	bool is2Addr, ARMCC::CondCodes Pred,
	bool LiveCPSR, bool &HasCC, bool &CCDead) {
	if ((is2Addr && Entry.PredCC2 == 0) \|\|
	(!is2Addr && Entry.PredCC1 == 0)) {
	if (Pred == ARMCC::AL) {
	// Not predicated, must set CPSR.
	if (!HasCC) {
	// Original instruction was not setting CPSR, but CPSR is not
	// currently live anyway. It's ok to set it. The CPSR def is
	// dead though.
	if (!LiveCPSR) {
	HasCC = true;
	CCDead = true;
	return true;
	}
	return false;
	}
	} else {
	// Predicated, must not set CPSR.
	if (HasCC)
	return false;
	}
	} else if ((is2Addr && Entry.PredCC2 == 2) \|\|
	(!is2Addr && Entry.PredCC1 == 2)) {
	/// Old opcode has an optional def of CPSR.
	if (HasCC)
	return true;
	// If both old opcode does not implicit CPSR def, then it's not ok since
	// these new opcodes CPSR def is not meant to be thrown away. e.g. CMP.
	if (!HasImplicitCPSRDef(MI->getDesc()))
	return false;
	HasCC = true;
	} else {
	// 16-bit instruction does not set CPSR.
	if (HasCC)
	return false;
	}

	return true;
	}

	static bool VerifyLowRegs(MachineInstr *MI) {
	unsigned Opc = MI->getOpcode();
	bool isPCOk = (Opc == ARM::t2LDM_RET) \|\| (Opc == ARM::t2LDM);
	bool isLROk = (Opc == ARM::t2STM);
	bool isSPOk = isPCOk \|\| isLROk \|\| (Opc == ARM::t2ADDrSPi);
	for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
	const MachineOperand &MO = MI->getOperand(i);
	if (!MO.isReg() \|\| MO.isImplicit())
	continue;
	unsigned Reg = MO.getReg();
	if (Reg == 0 \|\| Reg == ARM::CPSR)
	continue;
	if (isPCOk && Reg == ARM::PC)
	continue;
	if (isLROk && Reg == ARM::LR)
	continue;
	if (isSPOk && Reg == ARM::SP)
	continue;
	if (!isARMLowRegister(Reg))
	return false;
	}
	return true;
	}

	bool
	Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
	const ReduceEntry &Entry) {
	if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
	return false;

	unsigned Scale = 1;
	bool HasImmOffset = false;
	bool HasShift = false;
	bool isLdStMul = false;
	bool isPopPush = false;
	unsigned Opc = Entry.NarrowOpc1;
	unsigned OpNum = 3; // First 'rest' of operands.
	switch (Entry.WideOpc) {
	default:
	llvm_unreachable("Unexpected Thumb2 load / store opcode!");
	case ARM::t2LDRi12:
	case ARM::t2STRi12:
	Scale = 4;
	HasImmOffset = true;
	break;
	case ARM::t2LDRBi12:
	case ARM::t2STRBi12:
	HasImmOffset = true;
	break;
	case ARM::t2LDRHi12:
	case ARM::t2STRHi12:
	Scale = 2;
	HasImmOffset = true;
	break;
	case ARM::t2LDRs:
	case ARM::t2LDRBs:
	case ARM::t2LDRHs:
	case ARM::t2LDRSBs:
	case ARM::t2LDRSHs:
	case ARM::t2STRs:
	case ARM::t2STRBs:
	case ARM::t2STRHs:
	HasShift = true;
	OpNum = 4;
	break;
	case ARM::t2LDM_RET:
	case ARM::t2LDM:
	case ARM::t2STM: {
	OpNum = 0;
	unsigned BaseReg = MI->getOperand(0).getReg();
	unsigned Mode = MI->getOperand(1).getImm();
	if (BaseReg == ARM::SP && ARM_AM::getAM4WBFlag(Mode)) {
	Opc = Entry.NarrowOpc2;
	isPopPush = true;
	OpNum = 2;
	} else if (Entry.WideOpc == ARM::t2LDM_RET \|\|
	!isARMLowRegister(BaseReg) \|\|
	!ARM_AM::getAM4WBFlag(Mode) \|\|
	ARM_AM::getAM4SubMode(Mode) != ARM_AM::ia) {
	return false;
	}
	isLdStMul = true;
	break;
	}
	}

	unsigned OffsetReg = 0;
	bool OffsetKill = false;
	if (HasShift) {
	OffsetReg = MI->getOperand(2).getReg();
	OffsetKill = MI->getOperand(2).isKill();
	if (MI->getOperand(3).getImm())
	// Thumb1 addressing mode doesn't support shift.
	return false;
	}

	unsigned OffsetImm = 0;
	if (HasImmOffset) {
	OffsetImm = MI->getOperand(2).getImm();
	unsigned MaxOffset = ((1 << Entry.Imm1Limit) - 1) * Scale;
	if ((OffsetImm & (Scale-1)) \|\| OffsetImm > MaxOffset)
	// Make sure the immediate field fits.
	return false;
	}

	// Add the 16-bit load / store instruction.
	// FIXME: Thumb1 addressing mode encode both immediate and register offset.
	DebugLoc dl = MI->getDebugLoc();
	MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc));
	if (!isLdStMul) {
	MIB.addOperand(MI->getOperand(0)).addOperand(MI->getOperand(1));
	if (Entry.NarrowOpc1 != ARM::tLDRSB && Entry.NarrowOpc1 != ARM::tLDRSH) {
	// tLDRSB and tLDRSH do not have an immediate offset field. On the other
	// hand, it must have an offset register.
	// FIXME: Remove this special case.
	MIB.addImm(OffsetImm/Scale);
	}
	assert((!HasShift \|\| OffsetReg) && "Invalid so_reg load / store address!");

	MIB.addReg(OffsetReg, getKillRegState(OffsetKill));
	}

	// Transfer the rest of operands.
	for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
	MIB.addOperand(MI->getOperand(OpNum));

	DOUT << "Converted 32-bit: " << MI << " to 16-bit: " << MIB;

	MBB.erase(MI);
	++NumLdSts;
	return true;
	}

	bool
	Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
	const ReduceEntry &Entry,
	bool LiveCPSR) {
	if (Entry.LowRegs1 && !VerifyLowRegs(MI))
	return false;

	const TargetInstrDesc &TID = MI->getDesc();
	if (TID.mayLoad() \|\| TID.mayStore())
	return ReduceLoadStore(MBB, MI, Entry);

	unsigned Opc = MI->getOpcode();
	switch (Opc) {
	default: break;
	case ARM::t2ADDSri:
	case ARM::t2ADDSrr: {
	unsigned PredReg = 0;
	if (getInstrPredicate(MI, PredReg) == ARMCC::AL) {
	switch (Opc) {
	default: break;
	case ARM::t2ADDSri: {
	if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR))
	return true;
	// fallthrough
	}
	case ARM::t2ADDSrr:
	return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
	}
	}
	break;
	}
	case ARM::t2RSBri:
	case ARM::t2RSBSri:
	if (MI->getOperand(2).getImm() == 0)
	return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
	break;
	}
	return false;
	}

	bool
	Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
	const ReduceEntry &Entry,
	bool LiveCPSR) {

	if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
	return false;

	const TargetInstrDesc &TID = MI->getDesc();
	unsigned Reg0 = MI->getOperand(0).getReg();
	unsigned Reg1 = MI->getOperand(1).getReg();
	if (Reg0 != Reg1)
	return false;
	if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
	return false;
	if (Entry.Imm2Limit) {
	unsigned Imm = MI->getOperand(2).getImm();
	unsigned Limit = (1 << Entry.Imm2Limit) - 1;
	if (Imm > Limit)
	return false;
	} else {
	unsigned Reg2 = MI->getOperand(2).getReg();
	if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
	return false;
	}

	// Check if it's possible / necessary to transfer the predicate.
	const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc2);
	unsigned PredReg = 0;
	ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
	bool SkipPred = false;
	if (Pred != ARMCC::AL) {
	if (!NewTID.isPredicable())
	// Can't transfer predicate, fail.
	return false;
	} else {
	SkipPred = !NewTID.isPredicable();
	}

	bool HasCC = false;
	bool CCDead = false;
	if (TID.hasOptionalDef()) {
	unsigned NumOps = TID.getNumOperands();
	HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
	if (HasCC && MI->getOperand(NumOps-1).isDead())
	CCDead = true;
	}
	if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
	return false;

	// Add the 16-bit instruction.
	DebugLoc dl = MI->getDebugLoc();
	MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID);
	MIB.addOperand(MI->getOperand(0));
	if (NewTID.hasOptionalDef()) {
	if (HasCC)
	AddDefaultT1CC(MIB, CCDead);
	else
	AddNoT1CC(MIB);
	}

	// Transfer the rest of operands.
	unsigned NumOps = TID.getNumOperands();
	for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
	if (i < NumOps && TID.OpInfo[i].isOptionalDef())
	continue;
	if (SkipPred && TID.OpInfo[i].isPredicate())
	continue;
	MIB.addOperand(MI->getOperand(i));
	}

	DOUT << "Converted 32-bit: " << MI << " to 16-bit: " << MIB;

	MBB.erase(MI);
	++Num2Addrs;
	return true;
	}

	bool
	Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
	const ReduceEntry &Entry,
	bool LiveCPSR) {
	if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
	return false;

	unsigned Limit = ~0U;
	unsigned Scale = (Entry.WideOpc == ARM::t2ADDrSPi) ? 4 : 1;
	if (Entry.Imm1Limit)
	Limit = ((1 << Entry.Imm1Limit) - 1) * Scale;

	const TargetInstrDesc &TID = MI->getDesc();
	for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
	if (TID.OpInfo[i].isPredicate())
	continue;
	const MachineOperand &MO = MI->getOperand(i);
	if (MO.isReg()) {
	unsigned Reg = MO.getReg();
	if (!Reg \|\| Reg == ARM::CPSR)
	continue;
	if (Entry.WideOpc == ARM::t2ADDrSPi && Reg == ARM::SP)
	continue;
	if (Entry.LowRegs1 && !isARMLowRegister(Reg))
	return false;
	} else if (MO.isImm() &&
	!TID.OpInfo[i].isPredicate()) {
	if (MO.getImm() > Limit \|\| (MO.getImm() & (Scale-1)) != 0)
	return false;
	}
	}

	// Check if it's possible / necessary to transfer the predicate.
	const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc1);
	unsigned PredReg = 0;
	ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
	bool SkipPred = false;
	if (Pred != ARMCC::AL) {
	if (!NewTID.isPredicable())
	// Can't transfer predicate, fail.
	return false;
	} else {
	SkipPred = !NewTID.isPredicable();
	}

	bool HasCC = false;
	bool CCDead = false;
	if (TID.hasOptionalDef()) {
	unsigned NumOps = TID.getNumOperands();
	HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
	if (HasCC && MI->getOperand(NumOps-1).isDead())
	CCDead = true;
	}
	if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
	return false;

	// Add the 16-bit instruction.
	DebugLoc dl = MI->getDebugLoc();
	MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID);
	MIB.addOperand(MI->getOperand(0));
	if (NewTID.hasOptionalDef()) {
	if (HasCC)
	AddDefaultT1CC(MIB, CCDead);
	else
	AddNoT1CC(MIB);
	}

	// Transfer the rest of operands.
	unsigned NumOps = TID.getNumOperands();
	for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
	if (i < NumOps && TID.OpInfo[i].isOptionalDef())
	continue;
	if ((TID.getOpcode() == ARM::t2RSBSri \|\|
	TID.getOpcode() == ARM::t2RSBri) && i == 2)
	// Skip the zero immediate operand, it's now implicit.
	continue;
	bool isPred = (i < NumOps && TID.OpInfo[i].isPredicate());
	if (SkipPred && isPred)
	continue;
	const MachineOperand &MO = MI->getOperand(i);
	if (Scale > 1 && !isPred && MO.isImm())
	MIB.addImm(MO.getImm() / Scale);
	else {
	if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
	// Skip implicit def of CPSR. Either it's modeled as an optional
	// def now or it's already an implicit def on the new instruction.
	continue;
	MIB.addOperand(MO);
	}
	}
	if (!TID.isPredicable() && NewTID.isPredicable())
	AddDefaultPred(MIB);

	DOUT << "Converted 32-bit: " << MI << " to 16-bit: " << MIB;

	MBB.erase(MI);
	++NumNarrows;
	return true;
	}

	static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR) {
	bool HasDef = false;
	for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
	const MachineOperand &MO = MI.getOperand(i);
	if (!MO.isReg() \|\| MO.isUndef() \|\| MO.isUse())
	continue;
	if (MO.getReg() != ARM::CPSR)
	continue;
	if (!MO.isDead())
	HasDef = true;
	}

	return HasDef \|\| LiveCPSR;
	}

	static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
	for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
	const MachineOperand &MO = MI.getOperand(i);
	if (!MO.isReg() \|\| MO.isUndef() \|\| MO.isDef())
	continue;
	if (MO.getReg() != ARM::CPSR)
	continue;
	assert(LiveCPSR && "CPSR liveness tracking is wrong!");
	if (MO.isKill()) {
	LiveCPSR = false;
	break;
	}
	}

	return LiveCPSR;
	}

	bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
	bool Modified = false;

	bool LiveCPSR = false;
	// Yes, CPSR could be livein.
	for (MachineBasicBlock::const_livein_iterator I = MBB.livein_begin(),
	E = MBB.livein_end(); I != E; ++I) {
	if (*I == ARM::CPSR) {
	LiveCPSR = true;
	break;
	}
	}

	MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
	MachineBasicBlock::iterator NextMII;
	for (; MII != E; MII = NextMII) {
	NextMII = next(MII);

	MachineInstr MI = &MII;
	LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);

	unsigned Opcode = MI->getOpcode();
	DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
	if (OPI != ReduceOpcodeMap.end()) {
	const ReduceEntry &Entry = ReduceTable[OPI->second];
	// Ignore "special" cases for now.
	if (Entry.Special) {
	if (ReduceSpecial(MBB, MI, Entry, LiveCPSR)) {
	Modified = true;
	MachineBasicBlock::iterator I = prior(NextMII);
	MI = &*I;
	}
	goto ProcessNext;
	}

	// Try to transform to a 16-bit two-address instruction.
	if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) {
	Modified = true;
	MachineBasicBlock::iterator I = prior(NextMII);
	MI = &*I;
	goto ProcessNext;
	}

	// Try to transform ro a 16-bit non-two-address instruction.
	if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR)) {
	Modified = true;
	MachineBasicBlock::iterator I = prior(NextMII);
	MI = &*I;
	}
	}

	ProcessNext:
	LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR);
	}

	return Modified;
	}

	bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
	const TargetMachine &TM = MF.getTarget();
	TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());

	bool Modified = false;
	for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
	Modified \|= ReduceMBB(*I);
	return Modified;
	}

	/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
	/// reduction pass.
	FunctionPass *llvm::createThumb2SizeReductionPass() {
	return new Thumb2SizeReduce();
	}