| //===-- X86InstrInfo.cpp - X86 Instruction Information --------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file contains the X86 implementation of the TargetInstrInfo class. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "X86InstrInfo.h" |
| #include "X86.h" |
| #include "X86InstrBuilder.h" |
| #include "X86InstrFoldTables.h" |
| #include "X86MachineFunctionInfo.h" |
| #include "X86Subtarget.h" |
| #include "X86TargetMachine.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/Sequence.h" |
| #include "llvm/CodeGen/LivePhysRegs.h" |
| #include "llvm/CodeGen/LiveVariables.h" |
| #include "llvm/CodeGen/MachineConstantPool.h" |
| #include "llvm/CodeGen/MachineDominators.h" |
| #include "llvm/CodeGen/MachineFrameInfo.h" |
| #include "llvm/CodeGen/MachineInstrBuilder.h" |
| #include "llvm/CodeGen/MachineModuleInfo.h" |
| #include "llvm/CodeGen/MachineRegisterInfo.h" |
| #include "llvm/CodeGen/StackMaps.h" |
| #include "llvm/IR/DerivedTypes.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/DebugInfoMetadata.h" |
| #include "llvm/MC/MCAsmInfo.h" |
| #include "llvm/MC/MCExpr.h" |
| #include "llvm/MC/MCInst.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include "llvm/Target/TargetOptions.h" |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "x86-instr-info" |
| |
| #define GET_INSTRINFO_CTOR_DTOR |
| #include "X86GenInstrInfo.inc" |
| |
| static cl::opt<bool> |
| NoFusing("disable-spill-fusing", |
| cl::desc("Disable fusing of spill code into instructions"), |
| cl::Hidden); |
| static cl::opt<bool> |
| PrintFailedFusing("print-failed-fuse-candidates", |
| cl::desc("Print instructions that the allocator wants to" |
| " fuse, but the X86 backend currently can't"), |
| cl::Hidden); |
| static cl::opt<bool> |
| ReMatPICStubLoad("remat-pic-stub-load", |
| cl::desc("Re-materialize load from stub in PIC mode"), |
| cl::init(false), cl::Hidden); |
| static cl::opt<unsigned> |
| PartialRegUpdateClearance("partial-reg-update-clearance", |
| cl::desc("Clearance between two register writes " |
| "for inserting XOR to avoid partial " |
| "register update"), |
| cl::init(64), cl::Hidden); |
| static cl::opt<unsigned> |
| UndefRegClearance("undef-reg-clearance", |
| cl::desc("How many idle instructions we would like before " |
| "certain undef register reads"), |
| cl::init(128), cl::Hidden); |
| |
| |
| // Pin the vtable to this file. |
| void X86InstrInfo::anchor() {} |
| |
| X86InstrInfo::X86InstrInfo(X86Subtarget &STI) |
| : X86GenInstrInfo((STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64 |
| : X86::ADJCALLSTACKDOWN32), |
| (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64 |
| : X86::ADJCALLSTACKUP32), |
| X86::CATCHRET, |
| (STI.is64Bit() ? X86::RETQ : X86::RETL)), |
| Subtarget(STI), RI(STI.getTargetTriple()) { |
| } |
| |
| bool |
| X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, |
| unsigned &SrcReg, unsigned &DstReg, |
| unsigned &SubIdx) const { |
| switch (MI.getOpcode()) { |
| default: break; |
| case X86::MOVSX16rr8: |
| case X86::MOVZX16rr8: |
| case X86::MOVSX32rr8: |
| case X86::MOVZX32rr8: |
| case X86::MOVSX64rr8: |
| if (!Subtarget.is64Bit()) |
| // It's not always legal to reference the low 8-bit of the larger |
| // register in 32-bit mode. |
| return false; |
| LLVM_FALLTHROUGH; |
| case X86::MOVSX32rr16: |
| case X86::MOVZX32rr16: |
| case X86::MOVSX64rr16: |
| case X86::MOVSX64rr32: { |
| if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg()) |
| // Be conservative. |
| return false; |
| SrcReg = MI.getOperand(1).getReg(); |
| DstReg = MI.getOperand(0).getReg(); |
| switch (MI.getOpcode()) { |
| default: llvm_unreachable("Unreachable!"); |
| case X86::MOVSX16rr8: |
| case X86::MOVZX16rr8: |
| case X86::MOVSX32rr8: |
| case X86::MOVZX32rr8: |
| case X86::MOVSX64rr8: |
| SubIdx = X86::sub_8bit; |
| break; |
| case X86::MOVSX32rr16: |
| case X86::MOVZX32rr16: |
| case X86::MOVSX64rr16: |
| SubIdx = X86::sub_16bit; |
| break; |
| case X86::MOVSX64rr32: |
| SubIdx = X86::sub_32bit; |
| break; |
| } |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const { |
| const MachineFunction *MF = MI.getParent()->getParent(); |
| const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); |
| |
| if (isFrameInstr(MI)) { |
| unsigned StackAlign = TFI->getStackAlignment(); |
| int SPAdj = alignTo(getFrameSize(MI), StackAlign); |
| SPAdj -= getFrameAdjustment(MI); |
| if (!isFrameSetup(MI)) |
| SPAdj = -SPAdj; |
| return SPAdj; |
| } |
| |
| // To know whether a call adjusts the stack, we need information |
| // that is bound to the following ADJCALLSTACKUP pseudo. |
| // Look for the next ADJCALLSTACKUP that follows the call. |
| if (MI.isCall()) { |
| const MachineBasicBlock *MBB = MI.getParent(); |
| auto I = ++MachineBasicBlock::const_iterator(MI); |
| for (auto E = MBB->end(); I != E; ++I) { |
| if (I->getOpcode() == getCallFrameDestroyOpcode() || |
| I->isCall()) |
| break; |
| } |
| |
| // If we could not find a frame destroy opcode, then it has already |
| // been simplified, so we don't care. |
| if (I->getOpcode() != getCallFrameDestroyOpcode()) |
| return 0; |
| |
| return -(I->getOperand(1).getImm()); |
| } |
| |
| // Currently handle only PUSHes we can reasonably expect to see |
| // in call sequences |
| switch (MI.getOpcode()) { |
| default: |
| return 0; |
| case X86::PUSH32i8: |
| case X86::PUSH32r: |
| case X86::PUSH32rmm: |
| case X86::PUSH32rmr: |
| case X86::PUSHi32: |
| return 4; |
| case X86::PUSH64i8: |
| case X86::PUSH64r: |
| case X86::PUSH64rmm: |
| case X86::PUSH64rmr: |
| case X86::PUSH64i32: |
| return 8; |
| } |
| } |
| |
| /// Return true and the FrameIndex if the specified |
| /// operand and follow operands form a reference to the stack frame. |
| bool X86InstrInfo::isFrameOperand(const MachineInstr &MI, unsigned int Op, |
| int &FrameIndex) const { |
| if (MI.getOperand(Op + X86::AddrBaseReg).isFI() && |
| MI.getOperand(Op + X86::AddrScaleAmt).isImm() && |
| MI.getOperand(Op + X86::AddrIndexReg).isReg() && |
| MI.getOperand(Op + X86::AddrDisp).isImm() && |
| MI.getOperand(Op + X86::AddrScaleAmt).getImm() == 1 && |
| MI.getOperand(Op + X86::AddrIndexReg).getReg() == 0 && |
| MI.getOperand(Op + X86::AddrDisp).getImm() == 0) { |
| FrameIndex = MI.getOperand(Op + X86::AddrBaseReg).getIndex(); |
| return true; |
| } |
| return false; |
| } |
| |
| static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) { |
| switch (Opcode) { |
| default: |
| return false; |
| case X86::MOV8rm: |
| case X86::KMOVBkm: |
| MemBytes = 1; |
| return true; |
| case X86::MOV16rm: |
| case X86::KMOVWkm: |
| MemBytes = 2; |
| return true; |
| case X86::MOV32rm: |
| case X86::MOVSSrm: |
| case X86::MOVSSrm_alt: |
| case X86::VMOVSSrm: |
| case X86::VMOVSSrm_alt: |
| case X86::VMOVSSZrm: |
| case X86::VMOVSSZrm_alt: |
| case X86::KMOVDkm: |
| MemBytes = 4; |
| return true; |
| case X86::MOV64rm: |
| case X86::LD_Fp64m: |
| case X86::MOVSDrm: |
| case X86::MOVSDrm_alt: |
| case X86::VMOVSDrm: |
| case X86::VMOVSDrm_alt: |
| case X86::VMOVSDZrm: |
| case X86::VMOVSDZrm_alt: |
| case X86::MMX_MOVD64rm: |
| case X86::MMX_MOVQ64rm: |
| case X86::KMOVQkm: |
| MemBytes = 8; |
| return true; |
| case X86::MOVAPSrm: |
| case X86::MOVUPSrm: |
| case X86::MOVAPDrm: |
| case X86::MOVUPDrm: |
| case X86::MOVDQArm: |
| case X86::MOVDQUrm: |
| case X86::VMOVAPSrm: |
| case X86::VMOVUPSrm: |
| case X86::VMOVAPDrm: |
| case X86::VMOVUPDrm: |
| case X86::VMOVDQArm: |
| case X86::VMOVDQUrm: |
| case X86::VMOVAPSZ128rm: |
| case X86::VMOVUPSZ128rm: |
| case X86::VMOVAPSZ128rm_NOVLX: |
| case X86::VMOVUPSZ128rm_NOVLX: |
| case X86::VMOVAPDZ128rm: |
| case X86::VMOVUPDZ128rm: |
| case X86::VMOVDQU8Z128rm: |
| case X86::VMOVDQU16Z128rm: |
| case X86::VMOVDQA32Z128rm: |
| case X86::VMOVDQU32Z128rm: |
| case X86::VMOVDQA64Z128rm: |
| case X86::VMOVDQU64Z128rm: |
| MemBytes = 16; |
| return true; |
| case X86::VMOVAPSYrm: |
| case X86::VMOVUPSYrm: |
| case X86::VMOVAPDYrm: |
| case X86::VMOVUPDYrm: |
| case X86::VMOVDQAYrm: |
| case X86::VMOVDQUYrm: |
| case X86::VMOVAPSZ256rm: |
| case X86::VMOVUPSZ256rm: |
| case X86::VMOVAPSZ256rm_NOVLX: |
| case X86::VMOVUPSZ256rm_NOVLX: |
| case X86::VMOVAPDZ256rm: |
| case X86::VMOVUPDZ256rm: |
| case X86::VMOVDQU8Z256rm: |
| case X86::VMOVDQU16Z256rm: |
| case X86::VMOVDQA32Z256rm: |
| case X86::VMOVDQU32Z256rm: |
| case X86::VMOVDQA64Z256rm: |
| case X86::VMOVDQU64Z256rm: |
| MemBytes = 32; |
| return true; |
| case X86::VMOVAPSZrm: |
| case X86::VMOVUPSZrm: |
| case X86::VMOVAPDZrm: |
| case X86::VMOVUPDZrm: |
| case X86::VMOVDQU8Zrm: |
| case X86::VMOVDQU16Zrm: |
| case X86::VMOVDQA32Zrm: |
| case X86::VMOVDQU32Zrm: |
| case X86::VMOVDQA64Zrm: |
| case X86::VMOVDQU64Zrm: |
| MemBytes = 64; |
| return true; |
| } |
| } |
| |
| static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) { |
| switch (Opcode) { |
| default: |
| return false; |
| case X86::MOV8mr: |
| case X86::KMOVBmk: |
| MemBytes = 1; |
| return true; |
| case X86::MOV16mr: |
| case X86::KMOVWmk: |
| MemBytes = 2; |
| return true; |
| case X86::MOV32mr: |
| case X86::MOVSSmr: |
| case X86::VMOVSSmr: |
| case X86::VMOVSSZmr: |
| case X86::KMOVDmk: |
| MemBytes = 4; |
| return true; |
| case X86::MOV64mr: |
| case X86::ST_FpP64m: |
| case X86::MOVSDmr: |
| case X86::VMOVSDmr: |
| case X86::VMOVSDZmr: |
| case X86::MMX_MOVD64mr: |
| case X86::MMX_MOVQ64mr: |
| case X86::MMX_MOVNTQmr: |
| case X86::KMOVQmk: |
| MemBytes = 8; |
| return true; |
| case X86::MOVAPSmr: |
| case X86::MOVUPSmr: |
| case X86::MOVAPDmr: |
| case X86::MOVUPDmr: |
| case X86::MOVDQAmr: |
| case X86::MOVDQUmr: |
| case X86::VMOVAPSmr: |
| case X86::VMOVUPSmr: |
| case X86::VMOVAPDmr: |
| case X86::VMOVUPDmr: |
| case X86::VMOVDQAmr: |
| case X86::VMOVDQUmr: |
| case X86::VMOVUPSZ128mr: |
| case X86::VMOVAPSZ128mr: |
| case X86::VMOVUPSZ128mr_NOVLX: |
| case X86::VMOVAPSZ128mr_NOVLX: |
| case X86::VMOVUPDZ128mr: |
| case X86::VMOVAPDZ128mr: |
| case X86::VMOVDQA32Z128mr: |
| case X86::VMOVDQU32Z128mr: |
| case X86::VMOVDQA64Z128mr: |
| case X86::VMOVDQU64Z128mr: |
| case X86::VMOVDQU8Z128mr: |
| case X86::VMOVDQU16Z128mr: |
| MemBytes = 16; |
| return true; |
| case X86::VMOVUPSYmr: |
| case X86::VMOVAPSYmr: |
| case X86::VMOVUPDYmr: |
| case X86::VMOVAPDYmr: |
| case X86::VMOVDQUYmr: |
| case X86::VMOVDQAYmr: |
| case X86::VMOVUPSZ256mr: |
| case X86::VMOVAPSZ256mr: |
| case X86::VMOVUPSZ256mr_NOVLX: |
| case X86::VMOVAPSZ256mr_NOVLX: |
| case X86::VMOVUPDZ256mr: |
| case X86::VMOVAPDZ256mr: |
| case X86::VMOVDQU8Z256mr: |
| case X86::VMOVDQU16Z256mr: |
| case X86::VMOVDQA32Z256mr: |
| case X86::VMOVDQU32Z256mr: |
| case X86::VMOVDQA64Z256mr: |
| case X86::VMOVDQU64Z256mr: |
| MemBytes = 32; |
| return true; |
| case X86::VMOVUPSZmr: |
| case X86::VMOVAPSZmr: |
| case X86::VMOVUPDZmr: |
| case X86::VMOVAPDZmr: |
| case X86::VMOVDQU8Zmr: |
| case X86::VMOVDQU16Zmr: |
| case X86::VMOVDQA32Zmr: |
| case X86::VMOVDQU32Zmr: |
| case X86::VMOVDQA64Zmr: |
| case X86::VMOVDQU64Zmr: |
| MemBytes = 64; |
| return true; |
| } |
| return false; |
| } |
| |
| unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, |
| int &FrameIndex) const { |
| unsigned Dummy; |
| return X86InstrInfo::isLoadFromStackSlot(MI, FrameIndex, Dummy); |
| } |
| |
| unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, |
| int &FrameIndex, |
| unsigned &MemBytes) const { |
| if (isFrameLoadOpcode(MI.getOpcode(), MemBytes)) |
| if (MI.getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex)) |
| return MI.getOperand(0).getReg(); |
| return 0; |
| } |
| |
| unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI, |
| int &FrameIndex) const { |
| unsigned Dummy; |
| if (isFrameLoadOpcode(MI.getOpcode(), Dummy)) { |
| unsigned Reg; |
| if ((Reg = isLoadFromStackSlot(MI, FrameIndex))) |
| return Reg; |
| // Check for post-frame index elimination operations |
| SmallVector<const MachineMemOperand *, 1> Accesses; |
| if (hasLoadFromStackSlot(MI, Accesses)) { |
| FrameIndex = |
| cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue()) |
| ->getFrameIndex(); |
| return 1; |
| } |
| } |
| return 0; |
| } |
| |
| unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI, |
| int &FrameIndex) const { |
| unsigned Dummy; |
| return X86InstrInfo::isStoreToStackSlot(MI, FrameIndex, Dummy); |
| } |
| |
| unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI, |
| int &FrameIndex, |
| unsigned &MemBytes) const { |
| if (isFrameStoreOpcode(MI.getOpcode(), MemBytes)) |
| if (MI.getOperand(X86::AddrNumOperands).getSubReg() == 0 && |
| isFrameOperand(MI, 0, FrameIndex)) |
| return MI.getOperand(X86::AddrNumOperands).getReg(); |
| return 0; |
| } |
| |
| unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, |
| int &FrameIndex) const { |
| unsigned Dummy; |
| if (isFrameStoreOpcode(MI.getOpcode(), Dummy)) { |
| unsigned Reg; |
| if ((Reg = isStoreToStackSlot(MI, FrameIndex))) |
| return Reg; |
| // Check for post-frame index elimination operations |
| SmallVector<const MachineMemOperand *, 1> Accesses; |
| if (hasStoreToStackSlot(MI, Accesses)) { |
| FrameIndex = |
| cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue()) |
| ->getFrameIndex(); |
| return 1; |
| } |
| } |
| return 0; |
| } |
| |
| /// Return true if register is PIC base; i.e.g defined by X86::MOVPC32r. |
| static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { |
| // Don't waste compile time scanning use-def chains of physregs. |
| if (!Register::isVirtualRegister(BaseReg)) |
| return false; |
| bool isPICBase = false; |
| for (MachineRegisterInfo::def_instr_iterator I = MRI.def_instr_begin(BaseReg), |
| E = MRI.def_instr_end(); I != E; ++I) { |
| MachineInstr *DefMI = &*I; |
| if (DefMI->getOpcode() != X86::MOVPC32r) |
| return false; |
| assert(!isPICBase && "More than one PIC base?"); |
| isPICBase = true; |
| } |
| return isPICBase; |
| } |
| |
| bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, |
| AAResults *AA) const { |
| switch (MI.getOpcode()) { |
| default: |
| // This function should only be called for opcodes with the ReMaterializable |
| // flag set. |
| llvm_unreachable("Unknown rematerializable operation!"); |
| break; |
| |
| case X86::LOAD_STACK_GUARD: |
| case X86::AVX1_SETALLONES: |
| case X86::AVX2_SETALLONES: |
| case X86::AVX512_128_SET0: |
| case X86::AVX512_256_SET0: |
| case X86::AVX512_512_SET0: |
| case X86::AVX512_512_SETALLONES: |
| case X86::AVX512_FsFLD0SD: |
| case X86::AVX512_FsFLD0SS: |
| case X86::AVX512_FsFLD0F128: |
| case X86::AVX_SET0: |
| case X86::FsFLD0SD: |
| case X86::FsFLD0SS: |
| case X86::FsFLD0F128: |
| case X86::KSET0D: |
| case X86::KSET0Q: |
| case X86::KSET0W: |
| case X86::KSET1D: |
| case X86::KSET1Q: |
| case X86::KSET1W: |
| case X86::MMX_SET0: |
| case X86::MOV32ImmSExti8: |
| case X86::MOV32r0: |
| case X86::MOV32r1: |
| case X86::MOV32r_1: |
| case X86::MOV32ri64: |
| case X86::MOV64ImmSExti8: |
| case X86::V_SET0: |
| case X86::V_SETALLONES: |
| case X86::MOV16ri: |
| case X86::MOV32ri: |
| case X86::MOV64ri: |
| case X86::MOV64ri32: |
| case X86::MOV8ri: |
| return true; |
| |
| case X86::MOV8rm: |
| case X86::MOV8rm_NOREX: |
| case X86::MOV16rm: |
| case X86::MOV32rm: |
| case X86::MOV64rm: |
| case X86::MOVSSrm: |
| case X86::MOVSSrm_alt: |
| case X86::MOVSDrm: |
| case X86::MOVSDrm_alt: |
| case X86::MOVAPSrm: |
| case X86::MOVUPSrm: |
| case X86::MOVAPDrm: |
| case X86::MOVUPDrm: |
| case X86::MOVDQArm: |
| case X86::MOVDQUrm: |
| case X86::VMOVSSrm: |
| case X86::VMOVSSrm_alt: |
| case X86::VMOVSDrm: |
| case X86::VMOVSDrm_alt: |
| case X86::VMOVAPSrm: |
| case X86::VMOVUPSrm: |
| case X86::VMOVAPDrm: |
| case X86::VMOVUPDrm: |
| case X86::VMOVDQArm: |
| case X86::VMOVDQUrm: |
| case X86::VMOVAPSYrm: |
| case X86::VMOVUPSYrm: |
| case X86::VMOVAPDYrm: |
| case X86::VMOVUPDYrm: |
| case X86::VMOVDQAYrm: |
| case X86::VMOVDQUYrm: |
| case X86::MMX_MOVD64rm: |
| case X86::MMX_MOVQ64rm: |
| // AVX-512 |
| case X86::VMOVSSZrm: |
| case X86::VMOVSSZrm_alt: |
| case X86::VMOVSDZrm: |
| case X86::VMOVSDZrm_alt: |
| case X86::VMOVAPDZ128rm: |
| case X86::VMOVAPDZ256rm: |
| case X86::VMOVAPDZrm: |
| case X86::VMOVAPSZ128rm: |
| case X86::VMOVAPSZ256rm: |
| case X86::VMOVAPSZ128rm_NOVLX: |
| case X86::VMOVAPSZ256rm_NOVLX: |
| case X86::VMOVAPSZrm: |
| case X86::VMOVDQA32Z128rm: |
| case X86::VMOVDQA32Z256rm: |
| case X86::VMOVDQA32Zrm: |
| case X86::VMOVDQA64Z128rm: |
| case X86::VMOVDQA64Z256rm: |
| case X86::VMOVDQA64Zrm: |
| case X86::VMOVDQU16Z128rm: |
| case X86::VMOVDQU16Z256rm: |
| case X86::VMOVDQU16Zrm: |
| case X86::VMOVDQU32Z128rm: |
| case X86::VMOVDQU32Z256rm: |
| case X86::VMOVDQU32Zrm: |
| case X86::VMOVDQU64Z128rm: |
| case X86::VMOVDQU64Z256rm: |
| case X86::VMOVDQU64Zrm: |
| case X86::VMOVDQU8Z128rm: |
| case X86::VMOVDQU8Z256rm: |
| case X86::VMOVDQU8Zrm: |
| case X86::VMOVUPDZ128rm: |
| case X86::VMOVUPDZ256rm: |
| case X86::VMOVUPDZrm: |
| case X86::VMOVUPSZ128rm: |
| case X86::VMOVUPSZ256rm: |
| case X86::VMOVUPSZ128rm_NOVLX: |
| case X86::VMOVUPSZ256rm_NOVLX: |
| case X86::VMOVUPSZrm: { |
| // Loads from constant pools are trivially rematerializable. |
| if (MI.getOperand(1 + X86::AddrBaseReg).isReg() && |
| MI.getOperand(1 + X86::AddrScaleAmt).isImm() && |
| MI.getOperand(1 + X86::AddrIndexReg).isReg() && |
| MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 && |
| MI.isDereferenceableInvariantLoad(AA)) { |
| Register BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg(); |
| if (BaseReg == 0 || BaseReg == X86::RIP) |
| return true; |
| // Allow re-materialization of PIC load. |
| if (!ReMatPICStubLoad && MI.getOperand(1 + X86::AddrDisp).isGlobal()) |
| return false; |
| const MachineFunction &MF = *MI.getParent()->getParent(); |
| const MachineRegisterInfo &MRI = MF.getRegInfo(); |
| return regIsPICBase(BaseReg, MRI); |
| } |
| return false; |
| } |
| |
| case X86::LEA32r: |
| case X86::LEA64r: { |
| if (MI.getOperand(1 + X86::AddrScaleAmt).isImm() && |
| MI.getOperand(1 + X86::AddrIndexReg).isReg() && |
| MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 && |
| !MI.getOperand(1 + X86::AddrDisp).isReg()) { |
| // lea fi#, lea GV, etc. are all rematerializable. |
| if (!MI.getOperand(1 + X86::AddrBaseReg).isReg()) |
| return true; |
| Register BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg(); |
| if (BaseReg == 0) |
| return true; |
| // Allow re-materialization of lea PICBase + x. |
| const MachineFunction &MF = *MI.getParent()->getParent(); |
| const MachineRegisterInfo &MRI = MF.getRegInfo(); |
| return regIsPICBase(BaseReg, MRI); |
| } |
| return false; |
| } |
| } |
| } |
| |
| void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator I, |
| unsigned DestReg, unsigned SubIdx, |
| const MachineInstr &Orig, |
| const TargetRegisterInfo &TRI) const { |
| bool ClobbersEFLAGS = Orig.modifiesRegister(X86::EFLAGS, &TRI); |
| if (ClobbersEFLAGS && !isSafeToClobberEFLAGS(MBB, I)) { |
| // The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side |
| // effects. |
| int Value; |
| switch (Orig.getOpcode()) { |
| case X86::MOV32r0: Value = 0; break; |
| case X86::MOV32r1: Value = 1; break; |
| case X86::MOV32r_1: Value = -1; break; |
| default: |
| llvm_unreachable("Unexpected instruction!"); |
| } |
| |
| const DebugLoc &DL = Orig.getDebugLoc(); |
| BuildMI(MBB, I, DL, get(X86::MOV32ri)) |
| .add(Orig.getOperand(0)) |
| .addImm(Value); |
| } else { |
| MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig); |
| MBB.insert(I, MI); |
| } |
| |
| MachineInstr &NewMI = *std::prev(I); |
| NewMI.substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI); |
| } |
| |
| /// True if MI has a condition code def, e.g. EFLAGS, that is not marked dead. |
| bool X86InstrInfo::hasLiveCondCodeDef(MachineInstr &MI) const { |
| for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { |
| MachineOperand &MO = MI.getOperand(i); |
| if (MO.isReg() && MO.isDef() && |
| MO.getReg() == X86::EFLAGS && !MO.isDead()) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /// Check whether the shift count for a machine operand is non-zero. |
| inline static unsigned getTruncatedShiftCount(const MachineInstr &MI, |
| unsigned ShiftAmtOperandIdx) { |
| // The shift count is six bits with the REX.W prefix and five bits without. |
| unsigned ShiftCountMask = (MI.getDesc().TSFlags & X86II::REX_W) ? 63 : 31; |
| unsigned Imm = MI.getOperand(ShiftAmtOperandIdx).getImm(); |
| return Imm & ShiftCountMask; |
| } |
| |
| /// Check whether the given shift count is appropriate |
| /// can be represented by a LEA instruction. |
| inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) { |
| // Left shift instructions can be transformed into load-effective-address |
| // instructions if we can encode them appropriately. |
| // A LEA instruction utilizes a SIB byte to encode its scale factor. |
| // The SIB.scale field is two bits wide which means that we can encode any |
| // shift amount less than 4. |
| return ShAmt < 4 && ShAmt > 0; |
| } |
| |
| bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, |
| unsigned Opc, bool AllowSP, Register &NewSrc, |
| bool &isKill, MachineOperand &ImplicitOp, |
| LiveVariables *LV) const { |
| MachineFunction &MF = *MI.getParent()->getParent(); |
| const TargetRegisterClass *RC; |
| if (AllowSP) { |
| RC = Opc != X86::LEA32r ? &X86::GR64RegClass : &X86::GR32RegClass; |
| } else { |
| RC = Opc != X86::LEA32r ? |
| &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass; |
| } |
| Register SrcReg = Src.getReg(); |
| |
| // For both LEA64 and LEA32 the register already has essentially the right |
| // type (32-bit or 64-bit) we may just need to forbid SP. |
| if (Opc != X86::LEA64_32r) { |
| NewSrc = SrcReg; |
| isKill = Src.isKill(); |
| assert(!Src.isUndef() && "Undef op doesn't need optimization"); |
| |
| if (Register::isVirtualRegister(NewSrc) && |
| !MF.getRegInfo().constrainRegClass(NewSrc, RC)) |
| return false; |
| |
| return true; |
| } |
| |
| // This is for an LEA64_32r and incoming registers are 32-bit. One way or |
| // another we need to add 64-bit registers to the final MI. |
| if (Register::isPhysicalRegister(SrcReg)) { |
| ImplicitOp = Src; |
| ImplicitOp.setImplicit(); |
| |
| NewSrc = getX86SubSuperRegister(Src.getReg(), 64); |
| isKill = Src.isKill(); |
| assert(!Src.isUndef() && "Undef op doesn't need optimization"); |
| } else { |
| // Virtual register of the wrong class, we have to create a temporary 64-bit |
| // vreg to feed into the LEA. |
| NewSrc = MF.getRegInfo().createVirtualRegister(RC); |
| MachineInstr *Copy = |
| BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY)) |
| .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit) |
| .add(Src); |
| |
| // Which is obviously going to be dead after we're done with it. |
| isKill = true; |
| |
| if (LV) |
| LV->replaceKillInstruction(SrcReg, MI, *Copy); |
| } |
| |
| // We've set all the parameters without issue. |
| return true; |
| } |
| |
| MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA( |
| unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI, |
| LiveVariables *LV, bool Is8BitOp) const { |
| // We handle 8-bit adds and various 16-bit opcodes in the switch below. |
| MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); |
| assert((Is8BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits( |
| *RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) && |
| "Unexpected type for LEA transform"); |
| |
| // TODO: For a 32-bit target, we need to adjust the LEA variables with |
| // something like this: |
| // Opcode = X86::LEA32r; |
| // InRegLEA = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); |
| // OutRegLEA = |
| // Is8BitOp ? RegInfo.createVirtualRegister(&X86::GR32ABCD_RegClass) |
| // : RegInfo.createVirtualRegister(&X86::GR32RegClass); |
| if (!Subtarget.is64Bit()) |
| return nullptr; |
| |
| unsigned Opcode = X86::LEA64_32r; |
| Register InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); |
| Register OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass); |
| |
| // Build and insert into an implicit UNDEF value. This is OK because |
| // we will be shifting and then extracting the lower 8/16-bits. |
| // This has the potential to cause partial register stall. e.g. |
| // movw (%rbp,%rcx,2), %dx |
| // leal -65(%rdx), %esi |
| // But testing has shown this *does* help performance in 64-bit mode (at |
| // least on modern x86 machines). |
| MachineBasicBlock::iterator MBBI = MI.getIterator(); |
| Register Dest = MI.getOperand(0).getReg(); |
| Register Src = MI.getOperand(1).getReg(); |
| bool IsDead = MI.getOperand(0).isDead(); |
| bool IsKill = MI.getOperand(1).isKill(); |
| unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit; |
| assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization"); |
| BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA); |
| MachineInstr *InsMI = |
| BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY)) |
| .addReg(InRegLEA, RegState::Define, SubReg) |
| .addReg(Src, getKillRegState(IsKill)); |
| |
| MachineInstrBuilder MIB = |
| BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(Opcode), OutRegLEA); |
| switch (MIOpc) { |
| default: llvm_unreachable("Unreachable!"); |
| case X86::SHL8ri: |
| case X86::SHL16ri: { |
| unsigned ShAmt = MI.getOperand(2).getImm(); |
| MIB.addReg(0).addImm(1ULL << ShAmt) |
| .addReg(InRegLEA, RegState::Kill).addImm(0).addReg(0); |
| break; |
| } |
| case X86::INC8r: |
| case X86::INC16r: |
| addRegOffset(MIB, InRegLEA, true, 1); |
| break; |
| case X86::DEC8r: |
| case X86::DEC16r: |
| addRegOffset(MIB, InRegLEA, true, -1); |
| break; |
| case X86::ADD8ri: |
| case X86::ADD8ri_DB: |
| case X86::ADD16ri: |
| case X86::ADD16ri8: |
| case X86::ADD16ri_DB: |
| case X86::ADD16ri8_DB: |
| addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm()); |
| break; |
| case X86::ADD8rr: |
| case X86::ADD8rr_DB: |
| case X86::ADD16rr: |
| case X86::ADD16rr_DB: { |
| Register Src2 = MI.getOperand(2).getReg(); |
| bool IsKill2 = MI.getOperand(2).isKill(); |
| assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization"); |
| unsigned InRegLEA2 = 0; |
| MachineInstr *InsMI2 = nullptr; |
| if (Src == Src2) { |
| // ADD8rr/ADD16rr killed %reg1028, %reg1028 |
| // just a single insert_subreg. |
| addRegReg(MIB, InRegLEA, true, InRegLEA, false); |
| } else { |
| if (Subtarget.is64Bit()) |
| InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); |
| else |
| InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); |
| // Build and insert into an implicit UNDEF value. This is OK because |
| // we will be shifting and then extracting the lower 8/16-bits. |
| BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA2); |
| InsMI2 = BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY)) |
| .addReg(InRegLEA2, RegState::Define, SubReg) |
| .addReg(Src2, getKillRegState(IsKill2)); |
| addRegReg(MIB, InRegLEA, true, InRegLEA2, true); |
| } |
| if (LV && IsKill2 && InsMI2) |
| LV->replaceKillInstruction(Src2, MI, *InsMI2); |
| break; |
| } |
| } |
| |
| MachineInstr *NewMI = MIB; |
| MachineInstr *ExtMI = |
| BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY)) |
| .addReg(Dest, RegState::Define | getDeadRegState(IsDead)) |
| .addReg(OutRegLEA, RegState::Kill, SubReg); |
| |
| if (LV) { |
| // Update live variables. |
| LV->getVarInfo(InRegLEA).Kills.push_back(NewMI); |
| LV->getVarInfo(OutRegLEA).Kills.push_back(ExtMI); |
| if (IsKill) |
| LV->replaceKillInstruction(Src, MI, *InsMI); |
| if (IsDead) |
| LV->replaceKillInstruction(Dest, MI, *ExtMI); |
| } |
| |
| return ExtMI; |
| } |
| |
| /// This method must be implemented by targets that |
| /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target |
| /// may be able to convert a two-address instruction into a true |
| /// three-address instruction on demand. This allows the X86 target (for |
| /// example) to convert ADD and SHL instructions into LEA instructions if they |
| /// would require register copies due to two-addressness. |
| /// |
| /// This method returns a null pointer if the transformation cannot be |
| /// performed, otherwise it returns the new instruction. |
| /// |
| MachineInstr * |
| X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, |
| MachineInstr &MI, LiveVariables *LV) const { |
| // The following opcodes also sets the condition code register(s). Only |
| // convert them to equivalent lea if the condition code register def's |
| // are dead! |
| if (hasLiveCondCodeDef(MI)) |
| return nullptr; |
| |
| MachineFunction &MF = *MI.getParent()->getParent(); |
| // All instructions input are two-addr instructions. Get the known operands. |
| const MachineOperand &Dest = MI.getOperand(0); |
| const MachineOperand &Src = MI.getOperand(1); |
| |
| // Ideally, operations with undef should be folded before we get here, but we |
| // can't guarantee it. Bail out because optimizing undefs is a waste of time. |
| // Without this, we have to forward undef state to new register operands to |
| // avoid machine verifier errors. |
| if (Src.isUndef()) |
| return nullptr; |
| if (MI.getNumOperands() > 2) |
| if (MI.getOperand(2).isReg() && MI.getOperand(2).isUndef()) |
| return nullptr; |
| |
| MachineInstr *NewMI = nullptr; |
| bool Is64Bit = Subtarget.is64Bit(); |
| |
| bool Is8BitOp = false; |
| unsigned MIOpc = MI.getOpcode(); |
| switch (MIOpc) { |
| default: llvm_unreachable("Unreachable!"); |
| case X86::SHL64ri: { |
| assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!"); |
| unsigned ShAmt = getTruncatedShiftCount(MI, 2); |
| if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr; |
| |
| // LEA can't handle RSP. |
| if (Register::isVirtualRegister(Src.getReg()) && |
| !MF.getRegInfo().constrainRegClass(Src.getReg(), |
| &X86::GR64_NOSPRegClass)) |
| return nullptr; |
| |
| NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)) |
| .add(Dest) |
| .addReg(0) |
| .addImm(1ULL << ShAmt) |
| .add(Src) |
| .addImm(0) |
| .addReg(0); |
| break; |
| } |
| case X86::SHL32ri: { |
| assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!"); |
| unsigned ShAmt = getTruncatedShiftCount(MI, 2); |
| if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr; |
| |
| unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; |
| |
| // LEA can't handle ESP. |
| bool isKill; |
| Register SrcReg; |
| MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); |
| if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, |
| SrcReg, isKill, ImplicitOp, LV)) |
| return nullptr; |
| |
| MachineInstrBuilder MIB = |
| BuildMI(MF, MI.getDebugLoc(), get(Opc)) |
| .add(Dest) |
| .addReg(0) |
| .addImm(1ULL << ShAmt) |
| .addReg(SrcReg, getKillRegState(isKill)) |
| .addImm(0) |
| .addReg(0); |
| if (ImplicitOp.getReg() != 0) |
| MIB.add(ImplicitOp); |
| NewMI = MIB; |
| |
| break; |
| } |
| case X86::SHL8ri: |
| Is8BitOp = true; |
| LLVM_FALLTHROUGH; |
| case X86::SHL16ri: { |
| assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!"); |
| unsigned ShAmt = getTruncatedShiftCount(MI, 2); |
| if (!isTruncatedShiftCountForLEA(ShAmt)) |
| return nullptr; |
| return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp); |
| } |
| case X86::INC64r: |
| case X86::INC32r: { |
| assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!"); |
| unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r : |
| (Is64Bit ? X86::LEA64_32r : X86::LEA32r); |
| bool isKill; |
| Register SrcReg; |
| MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); |
| if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill, |
| ImplicitOp, LV)) |
| return nullptr; |
| |
| MachineInstrBuilder MIB = |
| BuildMI(MF, MI.getDebugLoc(), get(Opc)) |
| .add(Dest) |
| .addReg(SrcReg, getKillRegState(isKill)); |
| if (ImplicitOp.getReg() != 0) |
| MIB.add(ImplicitOp); |
| |
| NewMI = addOffset(MIB, 1); |
| break; |
| } |
| case X86::DEC64r: |
| case X86::DEC32r: { |
| assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!"); |
| unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r |
| : (Is64Bit ? X86::LEA64_32r : X86::LEA32r); |
| |
| bool isKill; |
| Register SrcReg; |
| MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); |
| if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill, |
| ImplicitOp, LV)) |
| return nullptr; |
| |
| MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)) |
| .add(Dest) |
| .addReg(SrcReg, getKillRegState(isKill)); |
| if (ImplicitOp.getReg() != 0) |
| MIB.add(ImplicitOp); |
| |
| NewMI = addOffset(MIB, -1); |
| |
| break; |
| } |
| case X86::DEC8r: |
| case X86::INC8r: |
| Is8BitOp = true; |
| LLVM_FALLTHROUGH; |
| case X86::DEC16r: |
| case X86::INC16r: |
| return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp); |
| case X86::ADD64rr: |
| case X86::ADD64rr_DB: |
| case X86::ADD32rr: |
| case X86::ADD32rr_DB: { |
| assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); |
| unsigned Opc; |
| if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) |
| Opc = X86::LEA64r; |
| else |
| Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; |
| |
| bool isKill; |
| Register SrcReg; |
| MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); |
| if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, |
| SrcReg, isKill, ImplicitOp, LV)) |
| return nullptr; |
| |
| const MachineOperand &Src2 = MI.getOperand(2); |
| bool isKill2; |
| Register SrcReg2; |
| MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false); |
| if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false, |
| SrcReg2, isKill2, ImplicitOp2, LV)) |
| return nullptr; |
| |
| MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)).add(Dest); |
| if (ImplicitOp.getReg() != 0) |
| MIB.add(ImplicitOp); |
| if (ImplicitOp2.getReg() != 0) |
| MIB.add(ImplicitOp2); |
| |
| NewMI = addRegReg(MIB, SrcReg, isKill, SrcReg2, isKill2); |
| if (LV && Src2.isKill()) |
| LV->replaceKillInstruction(SrcReg2, MI, *NewMI); |
| break; |
| } |
| case X86::ADD8rr: |
| case X86::ADD8rr_DB: |
| Is8BitOp = true; |
| LLVM_FALLTHROUGH; |
| case X86::ADD16rr: |
| case X86::ADD16rr_DB: |
| return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp); |
| case X86::ADD64ri32: |
| case X86::ADD64ri8: |
| case X86::ADD64ri32_DB: |
| case X86::ADD64ri8_DB: |
| assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); |
| NewMI = addOffset( |
| BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src), |
| MI.getOperand(2)); |
| break; |
| case X86::ADD32ri: |
| case X86::ADD32ri8: |
| case X86::ADD32ri_DB: |
| case X86::ADD32ri8_DB: { |
| assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); |
| unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; |
| |
| bool isKill; |
| Register SrcReg; |
| MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); |
| if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, |
| SrcReg, isKill, ImplicitOp, LV)) |
| return nullptr; |
| |
| MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)) |
| .add(Dest) |
| .addReg(SrcReg, getKillRegState(isKill)); |
| if (ImplicitOp.getReg() != 0) |
| MIB.add(ImplicitOp); |
| |
| NewMI = addOffset(MIB, MI.getOperand(2)); |
| break; |
| } |
| case X86::ADD8ri: |
| case X86::ADD8ri_DB: |
| Is8BitOp = true; |
| LLVM_FALLTHROUGH; |
| case X86::ADD16ri: |
| case X86::ADD16ri8: |
| case X86::ADD16ri_DB: |
| case X86::ADD16ri8_DB: |
| return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp); |
| case X86::SUB8ri: |
| case X86::SUB16ri8: |
| case X86::SUB16ri: |
| /// FIXME: Support these similar to ADD8ri/ADD16ri*. |
| return nullptr; |
| case X86::SUB32ri8: |
| case X86::SUB32ri: { |
| if (!MI.getOperand(2).isImm()) |
| return nullptr; |
| int64_t Imm = MI.getOperand(2).getImm(); |
| if (!isInt<32>(-Imm)) |
| return nullptr; |
| |
| assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); |
| unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; |
| |
| bool isKill; |
| Register SrcReg; |
| MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); |
| if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, |
| SrcReg, isKill, ImplicitOp, LV)) |
| return nullptr; |
| |
| MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)) |
| .add(Dest) |
| .addReg(SrcReg, getKillRegState(isKill)); |
| if (ImplicitOp.getReg() != 0) |
| MIB.add(ImplicitOp); |
| |
| NewMI = addOffset(MIB, -Imm); |
| break; |
| } |
| |
| case X86::SUB64ri8: |
| case X86::SUB64ri32: { |
| if (!MI.getOperand(2).isImm()) |
| return nullptr; |
| int64_t Imm = MI.getOperand(2).getImm(); |
| if (!isInt<32>(-Imm)) |
| return nullptr; |
| |
| assert(MI.getNumOperands() >= 3 && "Unknown sub instruction!"); |
| |
| MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), |
| get(X86::LEA64r)).add(Dest).add(Src); |
| NewMI = addOffset(MIB, -Imm); |
| break; |
| } |
| |
| case X86::VMOVDQU8Z128rmk: |
| case X86::VMOVDQU8Z256rmk: |
| case X86::VMOVDQU8Zrmk: |
| case X86::VMOVDQU16Z128rmk: |
| case X86::VMOVDQU16Z256rmk: |
| case X86::VMOVDQU16Zrmk: |
| case X86::VMOVDQU32Z128rmk: case X86::VMOVDQA32Z128rmk: |
| case X86::VMOVDQU32Z256rmk: case X86::VMOVDQA32Z256rmk: |
| case X86::VMOVDQU32Zrmk: case X86::VMOVDQA32Zrmk: |
| case X86::VMOVDQU64Z128rmk: case X86::VMOVDQA64Z128rmk: |
| case X86::VMOVDQU64Z256rmk: case X86::VMOVDQA64Z256rmk: |
| case X86::VMOVDQU64Zrmk: case X86::VMOVDQA64Zrmk: |
| case X86::VMOVUPDZ128rmk: case X86::VMOVAPDZ128rmk: |
| case X86::VMOVUPDZ256rmk: case X86::VMOVAPDZ256rmk: |
| case X86::VMOVUPDZrmk: case X86::VMOVAPDZrmk: |
| case X86::VMOVUPSZ128rmk: case X86::VMOVAPSZ128rmk: |
| case X86::VMOVUPSZ256rmk: case X86::VMOVAPSZ256rmk: |
| case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk: |
| case X86::VBROADCASTSDZ256mk: |
| case X86::VBROADCASTSDZmk: |
| case X86::VBROADCASTSSZ128mk: |
| case X86::VBROADCASTSSZ256mk: |
| case X86::VBROADCASTSSZmk: |
| case X86::VPBROADCASTDZ128mk: |
| case X86::VPBROADCASTDZ256mk: |
| case X86::VPBROADCASTDZmk: |
| case X86::VPBROADCASTQZ128mk: |
| case X86::VPBROADCASTQZ256mk: |
| case X86::VPBROADCASTQZmk: { |
| unsigned Opc; |
| switch (MIOpc) { |
| default: llvm_unreachable("Unreachable!"); |
| case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break; |
| case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break; |
| case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break; |
| case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break; |
| case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break; |
| case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break; |
| case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break; |
| case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break; |
| case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break; |
| case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break; |
| case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break; |
| case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break; |
| case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break; |
| case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break; |
| case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break; |
| case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break; |
| case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break; |
| case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break; |
| case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break; |
| case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break; |
| case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break; |
| case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break; |
| case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break; |
| case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break; |
| case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break; |
| case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break; |
| case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break; |
| case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break; |
| case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break; |
| case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break; |
| case X86::VBROADCASTSDZ256mk: Opc = X86::VBLENDMPDZ256rmbk; break; |
| case X86::VBROADCASTSDZmk: Opc = X86::VBLENDMPDZrmbk; break; |
| case X86::VBROADCASTSSZ128mk: Opc = X86::VBLENDMPSZ128rmbk; break; |
| case X86::VBROADCASTSSZ256mk: Opc = X86::VBLENDMPSZ256rmbk; break; |
| case X86::VBROADCASTSSZmk: Opc = X86::VBLENDMPSZrmbk; break; |
| case X86::VPBROADCASTDZ128mk: Opc = X86::VPBLENDMDZ128rmbk; break; |
| case X86::VPBROADCASTDZ256mk: Opc = X86::VPBLENDMDZ256rmbk; break; |
| case X86::VPBROADCASTDZmk: Opc = X86::VPBLENDMDZrmbk; break; |
| case X86::VPBROADCASTQZ128mk: Opc = X86::VPBLENDMQZ128rmbk; break; |
| case X86::VPBROADCASTQZ256mk: Opc = X86::VPBLENDMQZ256rmbk; break; |
| case X86::VPBROADCASTQZmk: Opc = X86::VPBLENDMQZrmbk; break; |
| } |
| |
| NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc)) |
| .add(Dest) |
| .add(MI.getOperand(2)) |
| .add(Src) |
| .add(MI.getOperand(3)) |
| .add(MI.getOperand(4)) |
| .add(MI.getOperand(5)) |
| .add(MI.getOperand(6)) |
| .add(MI.getOperand(7)); |
| break; |
| } |
| |
| case X86::VMOVDQU8Z128rrk: |
| case X86::VMOVDQU8Z256rrk: |
| case X86::VMOVDQU8Zrrk: |
| case X86::VMOVDQU16Z128rrk: |
| case X86::VMOVDQU16Z256rrk: |
| case X86::VMOVDQU16Zrrk: |
| case X86::VMOVDQU32Z128rrk: case X86::VMOVDQA32Z128rrk: |
| case X86::VMOVDQU32Z256rrk: case X86::VMOVDQA32Z256rrk: |
| case X86::VMOVDQU32Zrrk: case X86::VMOVDQA32Zrrk: |
| case X86::VMOVDQU64Z128rrk: case X86::VMOVDQA64Z128rrk: |
| case X86::VMOVDQU64Z256rrk: case X86::VMOVDQA64Z256rrk: |
| case X86::VMOVDQU64Zrrk: case X86::VMOVDQA64Zrrk: |
| case X86::VMOVUPDZ128rrk: case X86::VMOVAPDZ128rrk: |
| case X86::VMOVUPDZ256rrk: case X86::VMOVAPDZ256rrk: |
| case X86::VMOVUPDZrrk: case X86::VMOVAPDZrrk: |
| case X86::VMOVUPSZ128rrk: case X86::VMOVAPSZ128rrk: |
| case X86::VMOVUPSZ256rrk: case X86::VMOVAPSZ256rrk: |
| case X86::VMOVUPSZrrk: case X86::VMOVAPSZrrk: { |
| unsigned Opc; |
| switch (MIOpc) { |
| default: llvm_unreachable("Unreachable!"); |
| case X86::VMOVDQU8Z128rrk: Opc = X86::VPBLENDMBZ128rrk; break; |
| case X86::VMOVDQU8Z256rrk: Opc = X86::VPBLENDMBZ256rrk; break; |
| case X86::VMOVDQU8Zrrk: Opc = X86::VPBLENDMBZrrk; break; |
| case X86::VMOVDQU16Z128rrk: Opc = X86::VPBLENDMWZ128rrk; break; |
| case X86::VMOVDQU16Z256rrk: Opc = X86::VPBLENDMWZ256rrk; break; |
| case X86::VMOVDQU16Zrrk: Opc = X86::VPBLENDMWZrrk; break; |
| case X86::VMOVDQU32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break; |
| case X86::VMOVDQU32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break; |
| case X86::VMOVDQU32Zrrk: Opc = X86::VPBLENDMDZrrk; break; |
| case X86::VMOVDQU64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break; |
| case X86::VMOVDQU64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break; |
| case X86::VMOVDQU64Zrrk: Opc = X86::VPBLENDMQZrrk; break; |
| case X86::VMOVUPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break; |
| case X86::VMOVUPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break; |
| case X86::VMOVUPDZrrk: Opc = X86::VBLENDMPDZrrk; break; |
| case X86::VMOVUPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break; |
| case X86::VMOVUPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break; |
| case X86::VMOVUPSZrrk: Opc = X86::VBLENDMPSZrrk; break; |
| case X86::VMOVDQA32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break; |
| case X86::VMOVDQA32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break; |
| case X86::VMOVDQA32Zrrk: Opc = X86::VPBLENDMDZrrk; break; |
| case X86::VMOVDQA64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break; |
| case X86::VMOVDQA64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break; |
| case X86::VMOVDQA64Zrrk: Opc = X86::VPBLENDMQZrrk; break; |
| case X86::VMOVAPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break; |
| case X86::VMOVAPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break; |
| case X86::VMOVAPDZrrk: Opc = X86::VBLENDMPDZrrk; break; |
| case X86::VMOVAPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break; |
| case X86::VMOVAPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break; |
| case X86::VMOVAPSZrrk: Opc = X86::VBLENDMPSZrrk; break; |
| } |
| |
| NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc)) |
| .add(Dest) |
| .add(MI.getOperand(2)) |
| .add(Src) |
| .add(MI.getOperand(3)); |
| break; |
| } |
| } |
| |
| if (!NewMI) return nullptr; |
| |
| if (LV) { // Update live variables |
| if (Src.isKill()) |
| LV->replaceKillInstruction(Src.getReg(), MI, *NewMI); |
| if (Dest.isDead()) |
| LV->replaceKillInstruction(Dest.getReg(), MI, *NewMI); |
| } |
| |
| MFI->insert(MI.getIterator(), NewMI); // Insert the new inst |
| return NewMI; |
| } |
| |
| /// This determines which of three possible cases of a three source commute |
| /// the source indexes correspond to taking into account any mask operands. |
| /// All prevents commuting a passthru operand. Returns -1 if the commute isn't |
| /// possible. |
| /// Case 0 - Possible to commute the first and second operands. |
| /// Case 1 - Possible to commute the first and third operands. |
| /// Case 2 - Possible to commute the second and third operands. |
| static unsigned getThreeSrcCommuteCase(uint64_t TSFlags, unsigned SrcOpIdx1, |
| unsigned SrcOpIdx2) { |
| // Put the lowest index to SrcOpIdx1 to simplify the checks below. |
| if (SrcOpIdx1 > SrcOpIdx2) |
| std::swap(SrcOpIdx1, SrcOpIdx2); |
| |
| unsigned Op1 = 1, Op2 = 2, Op3 = 3; |
| if (X86II::isKMasked(TSFlags)) { |
| Op2++; |
| Op3++; |
| } |
| |
| if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op2) |
| return 0; |
| if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op3) |
| return 1; |
| if (SrcOpIdx1 == Op2 && SrcOpIdx2 == Op3) |
| return 2; |
| llvm_unreachable("Unknown three src commute case."); |
| } |
| |
| unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands( |
| const MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2, |
| const X86InstrFMA3Group &FMA3Group) const { |
| |
| unsigned Opc = MI.getOpcode(); |
| |
| // TODO: Commuting the 1st operand of FMA*_Int requires some additional |
| // analysis. The commute optimization is legal only if all users of FMA*_Int |
| // use only the lowest element of the FMA*_Int instruction. Such analysis are |
| // not implemented yet. So, just return 0 in that case. |
| // When such analysis are available this place will be the right place for |
| // calling it. |
| assert(!(FMA3Group.isIntrinsic() && (SrcOpIdx1 == 1 || SrcOpIdx2 == 1)) && |
| "Intrinsic instructions can't commute operand 1"); |
| |
| // Determine which case this commute is or if it can't be done. |
| unsigned Case = getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1, |
| SrcOpIdx2); |
| assert(Case < 3 && "Unexpected case number!"); |
| |
| // Define the FMA forms mapping array that helps to map input FMA form |
| // to output FMA form to preserve the operation semantics after |
| // commuting the operands. |
| const unsigned Form132Index = 0; |
| const unsigned Form213Index = 1; |
| const unsigned Form231Index = 2; |
| static const unsigned FormMapping[][3] = { |
| // 0: SrcOpIdx1 == 1 && SrcOpIdx2 == 2; |
| // FMA132 A, C, b; ==> FMA231 C, A, b; |
| // FMA213 B, A, c; ==> FMA213 A, B, c; |
| // FMA231 C, A, b; ==> FMA132 A, C, b; |
| { Form231Index, Form213Index, Form132Index }, |
| // 1: SrcOpIdx1 == 1 && SrcOpIdx2 == 3; |
| // FMA132 A, c, B; ==> FMA132 B, c, A; |
| // FMA213 B, a, C; ==> FMA231 C, a, B; |
| // FMA231 C, a, B; ==> FMA213 B, a, C; |
| { Form132Index, Form231Index, Form213Index }, |
| // 2: SrcOpIdx1 == 2 && SrcOpIdx2 == 3; |
| // FMA132 a, C, B; ==> FMA213 a, B, C; |
| // FMA213 b, A, C; ==> FMA132 b, C, A; |
| // FMA231 c, A, B; ==> FMA231 c, B, A; |
| { Form213Index, Form132Index, Form231Index } |
| }; |
| |
| unsigned FMAForms[3]; |
| FMAForms[0] = FMA3Group.get132Opcode(); |
| FMAForms[1] = FMA3Group.get213Opcode(); |
| FMAForms[2] = FMA3Group.get231Opcode(); |
| unsigned FormIndex; |
| for (FormIndex = 0; FormIndex < 3; FormIndex++) |
| if (Opc == FMAForms[FormIndex]) |
| break; |
| |
| // Everything is ready, just adjust the FMA opcode and return it. |
| FormIndex = FormMapping[Case][FormIndex]; |
| return FMAForms[FormIndex]; |
| } |
| |
| static void commuteVPTERNLOG(MachineInstr &MI, unsigned SrcOpIdx1, |
| unsigned SrcOpIdx2) { |
| // Determine which case this commute is or if it can't be done. |
| unsigned Case = getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1, |
| SrcOpIdx2); |
| assert(Case < 3 && "Unexpected case value!"); |
| |
| // For each case we need to swap two pairs of bits in the final immediate. |
| static const uint8_t SwapMasks[3][4] = { |
| { 0x04, 0x10, 0x08, 0x20 }, // Swap bits 2/4 and 3/5. |
| { 0x02, 0x10, 0x08, 0x40 }, // Swap bits 1/4 and 3/6. |
| { 0x02, 0x04, 0x20, 0x40 }, // Swap bits 1/2 and 5/6. |
| }; |
| |
| uint8_t Imm = MI.getOperand(MI.getNumOperands()-1).getImm(); |
| // Clear out the bits we are swapping. |
| uint8_t NewImm = Imm & ~(SwapMasks[Case][0] | SwapMasks[Case][1] | |
| SwapMasks[Case][2] | SwapMasks[Case][3]); |
| // If the immediate had a bit of the pair set, then set the opposite bit. |
| if (Imm & SwapMasks[Case][0]) NewImm |= SwapMasks[Case][1]; |
| if (Imm & SwapMasks[Case][1]) NewImm |= SwapMasks[Case][0]; |
| if (Imm & SwapMasks[Case][2]) NewImm |= SwapMasks[Case][3]; |
| if (Imm & SwapMasks[Case][3]) NewImm |= SwapMasks[Case][2]; |
| MI.getOperand(MI.getNumOperands()-1).setImm(NewImm); |
| } |
| |
| // Returns true if this is a VPERMI2 or VPERMT2 instruction that can be |
| // commuted. |
| static bool isCommutableVPERMV3Instruction(unsigned Opcode) { |
| #define VPERM_CASES(Suffix) \ |
| case X86::VPERMI2##Suffix##128rr: case X86::VPERMT2##Suffix##128rr: \ |
| case X86::VPERMI2##Suffix##256rr: case X86::VPERMT2##Suffix##256rr: \ |
| case X86::VPERMI2##Suffix##rr: case X86::VPERMT2##Suffix##rr: \ |
| case X86::VPERMI2##Suffix##128rm: case X86::VPERMT2##Suffix##128rm: \ |
| case X86::VPERMI2##Suffix##256rm: case X86::VPERMT2##Suffix##256rm: \ |
| case X86::VPERMI2##Suffix##rm: case X86::VPERMT2##Suffix##rm: \ |
| case X86::VPERMI2##Suffix##128rrkz: case X86::VPERMT2##Suffix##128rrkz: \ |
| case X86::VPERMI2##Suffix##256rrkz: case X86::VPERMT2##Suffix##256rrkz: \ |
| case X86::VPERMI2##Suffix##rrkz: case X86::VPERMT2##Suffix##rrkz: \ |
| case X86::VPERMI2##Suffix##128rmkz: case X86::VPERMT2##Suffix##128rmkz: \ |
| case X86::VPERMI2##Suffix##256rmkz: case X86::VPERMT2##Suffix##256rmkz: \ |
| case X86::VPERMI2##Suffix##rmkz: case X86::VPERMT2##Suffix##rmkz: |
| |
| #define VPERM_CASES_BROADCAST(Suffix) \ |
| VPERM_CASES(Suffix) \ |
| case X86::VPERMI2##Suffix##128rmb: case X86::VPERMT2##Suffix##128rmb: \ |
| case X86::VPERMI2##Suffix##256rmb: case X86::VPERMT2##Suffix##256rmb: \ |
| case X86::VPERMI2##Suffix##rmb: case X86::VPERMT2##Suffix##rmb: \ |
| case X86::VPERMI2##Suffix##128rmbkz: case X86::VPERMT2##Suffix##128rmbkz: \ |
| case X86::VPERMI2##Suffix##256rmbkz: case X86::VPERMT2##Suffix##256rmbkz: \ |
| case X86::VPERMI2##Suffix##rmbkz: case X86::VPERMT2##Suffix##rmbkz: |
| |
| switch (Opcode) { |
| default: return false; |
| VPERM_CASES(B) |
| VPERM_CASES_BROADCAST(D) |
| VPERM_CASES_BROADCAST(PD) |
| VPERM_CASES_BROADCAST(PS) |
| VPERM_CASES_BROADCAST(Q) |
| VPERM_CASES(W) |
| return true; |
| } |
| #undef VPERM_CASES_BROADCAST |
| #undef VPERM_CASES |
| } |
| |
| // Returns commuted opcode for VPERMI2 and VPERMT2 instructions by switching |
| // from the I opcode to the T opcode and vice versa. |
| static unsigned getCommutedVPERMV3Opcode(unsigned Opcode) { |
| #define VPERM_CASES(Orig, New) \ |
| case X86::Orig##128rr: return X86::New##128rr; \ |
| case X86::Orig##128rrkz: return X86::New##128rrkz; \ |
| case X86::Orig##128rm: return X86::New##128rm; \ |
| case X86::Orig##128rmkz: return X86::New##128rmkz; \ |
| case X86::Orig##256rr: return X86::New##256rr; \ |
| case X86::Orig##256rrkz: return X86::New##256rrkz; \ |
| case X86::Orig##256rm: return X86::New##256rm; \ |
| case X86::Orig##256rmkz: return X86::New##256rmkz; \ |
| case X86::Orig##rr: return X86::New##rr; \ |
| case X86::Orig##rrkz: return X86::New##rrkz; \ |
| case X86::Orig##rm: return X86::New##rm; \ |
| case X86::Orig##rmkz: return X86::New##rmkz; |
| |
| #define VPERM_CASES_BROADCAST(Orig, New) \ |
| VPERM_CASES(Orig, New) \ |
| case X86::Orig##128rmb: return X86::New##128rmb; \ |
| case X86::Orig##128rmbkz: return X86::New##128rmbkz; \ |
| case X86::Orig##256rmb: return X86::New##256rmb; \ |
| case X86::Orig##256rmbkz: return X86::New##256rmbkz; \ |
| case X86::Orig##rmb: return X86::New##rmb; \ |
| case X86::Orig##rmbkz: return X86::New##rmbkz; |
| |
| switch (Opcode) { |
| VPERM_CASES(VPERMI2B, VPERMT2B) |
| VPERM_CASES_BROADCAST(VPERMI2D, VPERMT2D) |
| VPERM_CASES_BROADCAST(VPERMI2PD, VPERMT2PD) |
| VPERM_CASES_BROADCAST(VPERMI2PS, VPERMT2PS) |
| VPERM_CASES_BROADCAST(VPERMI2Q, VPERMT2Q) |
| VPERM_CASES(VPERMI2W, VPERMT2W) |
| VPERM_CASES(VPERMT2B, VPERMI2B) |
| VPERM_CASES_BROADCAST(VPERMT2D, VPERMI2D) |
| VPERM_CASES_BROADCAST(VPERMT2PD, VPERMI2PD) |
| VPERM_CASES_BROADCAST(VPERMT2PS, VPERMI2PS) |
| VPERM_CASES_BROADCAST(VPERMT2Q, VPERMI2Q) |
| VPERM_CASES(VPERMT2W, VPERMI2W) |
| } |
| |
| llvm_unreachable("Unreachable!"); |
| #undef VPERM_CASES_BROADCAST |
| #undef VPERM_CASES |
| } |
| |
| MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, |
| unsigned OpIdx1, |
| unsigned OpIdx2) const { |
| auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & { |
| if (NewMI) |
| return *MI.getParent()->getParent()->CloneMachineInstr(&MI); |
| return MI; |
| }; |
| |
| switch (MI.getOpcode()) { |
| case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I) |
| case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I) |
| case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I) |
| case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I) |
| case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I) |
| case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I) |
| unsigned Opc; |
| unsigned Size; |
| switch (MI.getOpcode()) { |
| default: llvm_unreachable("Unreachable!"); |
| case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break; |
| case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break; |
| case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break; |
| case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break; |
| case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break; |
| case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break; |
| } |
| unsigned Amt = MI.getOperand(3).getImm(); |
| auto &WorkingMI = cloneIfNew(MI); |
| WorkingMI.setDesc(get(Opc)); |
| WorkingMI.getOperand(3).setImm(Size - Amt); |
| return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, |
| OpIdx1, OpIdx2); |
| } |
| case X86::PFSUBrr: |
| case X86::PFSUBRrr: { |
| // PFSUB x, y: x = x - y |
| // PFSUBR x, y: x = y - x |
| unsigned Opc = |
| (X86::PFSUBRrr == MI.getOpcode() ? X86::PFSUBrr : X86::PFSUBRrr); |
| auto &WorkingMI = cloneIfNew(MI); |
| WorkingMI.setDesc(get(Opc)); |
| return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, |
| OpIdx1, OpIdx2); |
| } |
| case X86::BLENDPDrri: |
| case X86::BLENDPSrri: |
| case X86::VBLENDPDrri: |
| case X86::VBLENDPSrri: |
| // If we're optimizing for size, try to use MOVSD/MOVSS. |
| if (MI.getParent()->getParent()->getFunction().hasOptSize()) { |
| unsigned Mask, Opc; |
| switch (MI.getOpcode()) { |
| default: llvm_unreachable("Unreachable!"); |
| case X86::BLENDPDrri: Opc = X86::MOVSDrr; Mask = 0x03; break; |
| case X86::BLENDPSrri: Opc = X86::MOVSSrr; Mask = 0x0F; break; |
| case X86::VBLENDPDrri: Opc = X86::VMOVSDrr; Mask = 0x03; break; |
| case X86::VBLENDPSrri: Opc = X86::VMOVSSrr; Mask = 0x0F; break; |
| } |
| if ((MI.getOperand(3).getImm() ^ Mask) == 1) { |
| auto &WorkingMI = cloneIfNew(MI); |
| WorkingMI.setDesc(get(Opc)); |
| WorkingMI.RemoveOperand(3); |
| return TargetInstrInfo::commuteInstructionImpl(WorkingMI, |
| /*NewMI=*/false, |
| OpIdx1, OpIdx2); |
| } |
| } |
| LLVM_FALLTHROUGH; |
| case X86::PBLENDWrri: |
| case X86::VBLENDPDYrri: |
| case X86::VBLENDPSYrri: |
| case X86::VPBLENDDrri: |
| case X86::VPBLENDWrri: |
| case X86::VPBLENDDYrri: |
| case X86::VPBLENDWYrri:{ |
| int8_t Mask; |
| switch (MI.getOpcode()) { |
| default: llvm_unreachable("Unreachable!"); |
| case X86::BLENDPDrri: Mask = (int8_t)0x03; break; |
| case X86::BLENDPSrri: Mask = (int8_t)0x0F; break; |
| case X86::PBLENDWrri: Mask = (int8_t)0xFF; break; |
| case X86::VBLENDPDrri: Mask = (int8_t)0x03; break; |
| case X86::VBLENDPSrri: Mask = (int8_t)0x0F; break; |
| case X86::VBLENDPDYrri: Mask = (int8_t)0x0F; break; |
| case X86::VBLENDPSYrri: Mask = (int8_t)0xFF; break; |
| case X86::VPBLENDDrri: Mask = (int8_t)0x0F; break; |
| case X86::VPBLENDWrri: Mask = (int8_t)0xFF; break; |
| case X86::VPBLENDDYrri: Mask = (int8_t)0xFF; break; |
| case X86::VPBLENDWYrri: Mask = (int8_t)0xFF; break; |
| } |
| // Only the least significant bits of Imm are used. |
| // Using int8_t to ensure it will be sign extended to the int64_t that |
| // setImm takes in order to match isel behavior. |
| int8_t Imm = MI.getOperand(3).getImm() & Mask; |
| auto &WorkingMI = cloneIfNew(MI); |
| WorkingMI.getOperand(3).setImm(Mask ^ Imm); |
| return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, |
| OpIdx1, OpIdx2); |
| } |
| case X86::INSERTPSrr: |
| case X86::VINSERTPSrr: |
| case X86::VINSERTPSZrr: { |
| unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm(); |
| unsigned ZMask = Imm & 15; |
| unsigned DstIdx = (Imm >> 4) & 3; |
| unsigned SrcIdx = (Imm >> 6) & 3; |
| |
| // We can commute insertps if we zero 2 of the elements, the insertion is |
| // "inline" and we don't override the insertion with a zero. |
| if (DstIdx == SrcIdx && (ZMask & (1 << DstIdx)) == 0 && |
| countPopulation(ZMask) == 2) { |
| unsigned AltIdx = findFirstSet((ZMask | (1 << DstIdx)) ^ 15); |
| assert(AltIdx < 4 && "Illegal insertion index"); |
| unsigned AltImm = (AltIdx << 6) | (AltIdx << 4) | ZMask; |
| auto &WorkingMI = cloneIfNew(MI); |
| WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(AltImm); |
| return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, |
| OpIdx1, OpIdx2); |
| } |
| return nullptr; |
| } |
| case X86::MOVSDrr: |
| case X86::MOVSSrr: |
| case X86::VMOVSDrr: |
| case X86::VMOVSSrr:{ |
| // On SSE41 or later we can commute a MOVSS/MOVSD to a BLENDPS/BLENDPD. |
| if (Subtarget.hasSSE41()) { |
| unsigned Mask, Opc; |
| switch (MI.getOpcode()) { |
| default: llvm_unreachable("Unreachable!"); |
| case X86::MOVSDrr: Opc = X86::BLENDPDrri; Mask = 0x02; break; |
| case X86::MOVSSrr: Opc = X86::BLENDPSrri; Mask = 0x0E; break; |
| case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break; |
| case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break; |
| } |
| |
| auto &WorkingMI = cloneIfNew(MI); |
| WorkingMI.setDesc(get(Opc)); |
| WorkingMI.addOperand(MachineOperand::CreateImm(Mask)); |
| return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, |
| OpIdx1, OpIdx2); |
| } |
| |
| // Convert to SHUFPD. |
| assert(MI.getOpcode() == X86::MOVSDrr && |
| "Can only commute MOVSDrr without SSE4.1"); |
| |
| auto &WorkingMI = cloneIfNew(MI); |
| WorkingMI.setDesc(get(X86::SHUFPDrri)); |
| WorkingMI.addOperand(MachineOperand::CreateImm(0x02)); |
| return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, |
| OpIdx1, OpIdx2); |
| } |
| case X86::SHUFPDrri: { |
| // Commute to MOVSD. |
| assert(MI.getOperand(3).getImm() == 0x02 && "Unexpected immediate!"); |
| auto &WorkingMI = cloneIfNew(MI); |
| WorkingMI.setDesc(get(X86::MOVSDrr)); |
| WorkingMI.RemoveOperand(3); |
| return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, |
| OpIdx1, OpIdx2); |
| } |
| case X86::PCLMULQDQrr: |
| case X86::VPCLMULQDQrr: |
| case X86::VPCLMULQDQYrr: |
| case X86::VPCLMULQDQZrr: |
| case X86::VPCLMULQDQZ128rr: |
| case X86::VPCLMULQDQZ256rr: { |
| // SRC1 64bits = Imm[0] ? SRC1[127:64] : SRC1[63:0] |
| // SRC2 64bits = Imm[4] ? SRC2[127:64] : SRC2[63:0] |
| unsigned Imm = MI.getOperand(3).getImm(); |
| unsigned Src1Hi = Imm & 0x01; |
| unsigned Src2Hi = Imm & 0x10; |
| auto &WorkingMI = cloneIfNew(MI); |
| WorkingMI.getOperand(3).setImm((Src1Hi << 4) | (Src2Hi >> 4)); |
| return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, |
| OpIdx1, OpIdx2); |
| } |
| case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri: |
| case X86::VPCMPBZ256rri: case X86::VPCMPUBZ256rri: |
| case X86::VPCMPBZrri: case X86::VPCMPUBZrri: |
| case X86::VPCMPDZ128rri: case X86::VPCMPUDZ128rri: |
| case X86::VPCMPDZ256rri: case X86::VPCMPUDZ256rri: |
| case X86::VPCMPDZrri: case X86::VPCMPUDZrri: |
| case X86::VPCMPQZ128rri: case X86::VPCMPUQZ128rri: |
| case X86::VPCMPQZ256rri: case X86::VPCMPUQZ256rri: |
| case X86::VPCMPQZrri: case X86::VPCMPUQZrri: |
| case X86::VPCMPWZ128rri: case X86::VPCMPUWZ128rri: |
| case X86::VPCMPWZ256rri: case X86::VPCMPUWZ256rri: |
| case X86::VPCMPWZrri: case X86::VPCMPUWZrri: |
| case X86::VPCMPBZ128rrik: case X86::VPCMPUBZ128rrik: |
| case X86::VPCMPBZ256rrik: case X86::VPCMPUBZ256rrik: |
| case X86::VPCMPBZrrik: case X86::VPCMPUBZrrik: |
| case X86::VPCMPDZ128rrik: case X86::VPCMPUDZ128rrik: |
| case X86::VPCMPDZ256rrik: case X86::VPCMPUDZ256rrik: |
| case X86::VPCMPDZrrik: case X86::VPCMPUDZrrik: |
| case X86::VPCMPQZ128rrik: case X86::VPCMPUQZ128rrik: |
| case X86::VPCMPQZ256rrik: case X86::VPCMPUQZ256rrik: |
| case X86::VPCMPQZrrik: case X86::VPCMPUQZrrik: |
| case X86::VPCMPWZ128rrik: case X86::VPCMPUWZ128rrik: |
| case X86::VPCMPWZ256rrik: case X86::VPCMPUWZ256rrik: |
| case X86::VPCMPWZrrik: case X86::VPCMPUWZrrik: { |
| // Flip comparison mode immediate (if necessary). |
| unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x7; |
| Imm = X86::getSwappedVPCMPImm(Imm); |
| auto &WorkingMI = cloneIfNew(MI); |
| WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm); |
| return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, |
| OpIdx1, OpIdx2); |
| } |
| case X86::VPCOMBri: case X86::VPCOMUBri: |
| case X86::VPCOMDri: case X86::VPCOMUDri: |
| case X86::VPCOMQri: case X86::VPCOMUQri: |
| case X86::VPCOMWri: case X86::VPCOMUWri: { |
| // Flip comparison mode immediate (if necessary). |
| unsigned Imm = MI.getOperand(3).getImm() & 0x7; |
| Imm = X86::getSwappedVPCOMImm(Imm); |
| auto &WorkingMI = cloneIfNew(MI); |
| WorkingMI.getOperand(3).setImm(Imm); |
| return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, |
| OpIdx1, OpIdx2); |
| } |
| case X86::VCMPSDZrr: |
| case X86::VCMPSSZrr: |
| case X86::VCMPPDZrri: |
| case X86::VCMPPSZrri: |
| case X86::VCMPPDZ128rri: |
| case X86::VCMPPSZ128rri: |
| case X86::VCMPPDZ256rri: |
| case X86::VCMPPSZ256rri: |
| case X86::VCMPPDZrrik: |
| case X86::VCMPPSZrrik: |
| case X86::VCMPPDZ128rrik: |
| case X86::VCMPPSZ128rrik: |
| case X86::VCMPPDZ256rrik: |
| case X86::VCMPPSZ256rrik: { |
| unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x1f; |
| Imm = X86::getSwappedVCMPImm(Imm); |
| auto &WorkingMI = cloneIfNew(MI); |
| WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm); |
| return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, |
| OpIdx1, OpIdx2); |
| } |
| case X86::VPERM2F128rr: |
| case X86::VPERM2I128rr: { |
| // Flip permute source immediate. |
| // Imm & 0x02: lo = if set, select Op1.lo/hi else Op0.lo/hi. |
| // Imm & 0x20: hi = if set, select Op1.lo/hi else Op0.lo/hi. |
| int8_t Imm = MI.getOperand(3).getImm() & 0xFF; |
| auto &WorkingMI = cloneIfNew(MI); |
| WorkingMI.getOperand(3).setImm(Imm ^ 0x22); |
| return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, |
| OpIdx1, OpIdx2); |
| } |
| case X86::MOVHLPSrr: |
| case X86::UNPCKHPDrr: |
| case X86::VMOVHLPSrr: |
| case X86::VUNPCKHPDrr: |
| case X86::VMOVHLPSZrr: |
| case X86::VUNPCKHPDZ128rr: { |
| assert(Subtarget.hasSSE2() && "Commuting MOVHLP/UNPCKHPD requires SSE2!"); |
| |
| unsigned Opc = MI.getOpcode(); |
| switch (Opc) { |
| default: llvm_unreachable("Unreachable!"); |
| case X86::MOVHLPSrr: Opc = X86::UNPCKHPDrr; break; |
| case X86::UNPCKHPDrr: Opc = X86::MOVHLPSrr; break; |
| case X86::VMOVHLPSrr: Opc = X86::VUNPCKHPDrr; break; |
| case X86::VUNPCKHPDrr: Opc = X86::VMOVHLPSrr; break; |
| case X86::VMOVHLPSZrr: Opc = X86::VUNPCKHPDZ128rr; break; |
| case X86::VUNPCKHPDZ128rr: Opc = X86::VMOVHLPSZrr; break; |
| } |
| auto &WorkingMI = cloneIfNew(MI); |
| WorkingMI.setDesc(get(Opc)); |
| return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, |
| OpIdx1, OpIdx2); |
| } |
| case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr: { |
| auto &WorkingMI = cloneIfNew(MI); |
| unsigned OpNo = MI.getDesc().getNumOperands() - 1; |
| X86::CondCode CC = static_cast<X86::CondCode>(MI.getOperand(OpNo).getImm()); |
| WorkingMI.getOperand(OpNo).setImm(X86::GetOppositeBranchCondition(CC)); |
| return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, |
| OpIdx1, OpIdx2); |
| } |
| case X86::VPTERNLOGDZrri: case X86::VPTERNLOGDZrmi: |
| case X86::VPTERNLOGDZ128rri: case X86::VPTERNLOGDZ128rmi: |
| case X86::VPTERNLOGDZ256rri: case X86::VPTERNLOGDZ256rmi: |
| case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi: |
| case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi: |
| case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi: |
| case X86::VPTERNLOGDZrrik: |
| case X86::VPTERNLOGDZ128rrik: |
| case X86::VPTERNLOGDZ256rrik: |
| case X86::VPTERNLOGQZrrik: |
| case X86::VPTERNLOGQZ128rrik: |
| case X86::VPTERNLOGQZ256rrik: |
| case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz: |
| case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz: |
| case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz: |
| case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz: |
| case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz: |
| case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz: |
| case X86::VPTERNLOGDZ128rmbi: |
| case X86::VPTERNLOGDZ256rmbi: |
| case X86::VPTERNLOGDZrmbi: |
| case X86::VPTERNLOGQZ128rmbi: |
| case X86::VPTERNLOGQZ256rmbi: |
| case X86::VPTERNLOGQZrmbi: |
| case X86::VPTERNLOGDZ128rmbikz: |
| case X86::VPTERNLOGDZ256rmbikz: |
| case X86::VPTERNLOGDZrmbikz: |
| case X86::VPTERNLOGQZ128rmbikz: |
| case X86::VPTERNLOGQZ256rmbikz: |
| case X86::VPTERNLOGQZrmbikz: { |
| auto &WorkingMI = cloneIfNew(MI); |
| commuteVPTERNLOG(WorkingMI, OpIdx1, OpIdx2); |
| return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, |
| OpIdx1, OpIdx2); |
| } |
| default: { |
| if (isCommutableVPERMV3Instruction(MI.getOpcode())) { |
| unsigned Opc = getCommutedVPERMV3Opcode(MI.getOpcode()); |
| auto &WorkingMI = cloneIfNew(MI); |
| WorkingMI.setDesc(get(Opc)); |
| return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, |
| OpIdx1, OpIdx2); |
| } |
| |
| const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(), |
| MI.getDesc().TSFlags); |
| if (FMA3Group) { |
| unsigned Opc = |
| getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2, *FMA3Group); |
| auto &WorkingMI = cloneIfNew(MI); |
| WorkingMI.setDesc(get(Opc)); |
| return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, |
| OpIdx1, OpIdx2); |
| } |
| |
| return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); |
| } |
| } |
| } |
| |
| bool |
| X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI, |
| unsigned &SrcOpIdx1, |
| unsigned &SrcOpIdx2, |
| bool IsIntrinsic) const { |
| uint64_t TSFlags = MI.getDesc().TSFlags; |
| |
| unsigned FirstCommutableVecOp = 1; |
| unsigned LastCommutableVecOp = 3; |
| unsigned KMaskOp = -1U; |
| if (X86II::isKMasked(TSFlags)) { |
| // For k-zero-masked operations it is Ok to commute the first vector |
| // operand. |
| // For regular k-masked operations a conservative choice is done as the |
| // elements of the first vector operand, for which the corresponding bit |
| // in the k-mask operand is set to 0, are copied to the result of the |
| // instruction. |
| // TODO/FIXME: The commute still may be legal if it is known that the |
| // k-mask operand is set to either all ones or all zeroes. |
| // It is also Ok to commute the 1st operand if all users of MI use only |
| // the elements enabled by the k-mask operand. For example, |
| // v4 = VFMADD213PSZrk v1, k, v2, v3; // v1[i] = k[i] ? v2[i]*v1[i]+v3[i] |
| // : v1[i]; |
| // VMOVAPSZmrk <mem_addr>, k, v4; // this is the ONLY user of v4 -> |
| // // Ok, to commute v1 in FMADD213PSZrk. |
| |
| // The k-mask operand has index = 2 for masked and zero-masked operations. |
| KMaskOp = 2; |
| |
| // The operand with index = 1 is used as a source for those elements for |
| // which the corresponding bit in the k-mask is set to 0. |
| if (X86II::isKMergeMasked(TSFlags)) |
| FirstCommutableVecOp = 3; |
| |
| LastCommutableVecOp++; |
| } else if (IsIntrinsic) { |
| // Commuting the first operand of an intrinsic instruction isn't possible |
| // unless we can prove that only the lowest element of the result is used. |
| FirstCommutableVecOp = 2; |
| } |
| |
| if (isMem(MI, LastCommutableVecOp)) |
| LastCommutableVecOp--; |
| |
| // Only the first RegOpsNum operands are commutable. |
| // Also, the value 'CommuteAnyOperandIndex' is valid here as it means |
| // that the operand is not specified/fixed. |
| if (SrcOpIdx1 != CommuteAnyOperandIndex && |
| (SrcOpIdx1 < FirstCommutableVecOp || SrcOpIdx1 > LastCommutableVecOp || |
| SrcOpIdx1 == KMaskOp)) |
| return false; |
| if (SrcOpIdx2 != CommuteAnyOperandIndex && |
| (SrcOpIdx2 < FirstCommutableVecOp || SrcOpIdx2 > LastCommutableVecOp || |
| SrcOpIdx2 == KMaskOp)) |
| return false; |
| |
| // Look for two different register operands assumed to be commutable |
| // regardless of the FMA opcode. The FMA opcode is adjusted later. |
| if (SrcOpIdx1 == CommuteAnyOperandIndex || |
| SrcOpIdx2 == CommuteAnyOperandIndex) { |
| unsigned CommutableOpIdx2 = SrcOpIdx2; |
| |
| // At least one of operands to be commuted is not specified and |
| // this method is free to choose appropriate commutable operands. |
| if (SrcOpIdx1 == SrcOpIdx2) |
| // Both of operands are not fixed. By default set one of commutable |
| // operands to the last register operand of the instruction. |
| CommutableOpIdx2 = LastCommutableVecOp; |
| else if (SrcOpIdx2 == CommuteAnyOperandIndex) |
| // Only one of operands is not fixed. |
| CommutableOpIdx2 = SrcOpIdx1; |
| |
| // CommutableOpIdx2 is well defined now. Let's choose another commutable |
| // operand and assign its index to CommutableOpIdx1. |
| Register Op2Reg = MI.getOperand(CommutableOpIdx2).getReg(); |
| |
| unsigned CommutableOpIdx1; |
| for (CommutableOpIdx1 = LastCommutableVecOp; |
| CommutableOpIdx1 >= FirstCommutableVecOp; CommutableOpIdx1--) { |
| // Just ignore and skip the k-mask operand. |
| if (CommutableOpIdx1 == KMaskOp) |
| continue; |
| |
| // The commuted operands must have different registers. |
| // Otherwise, the commute transformation does not change anything and |
| // is useless then. |
| if (Op2Reg != MI.getOperand(CommutableOpIdx1).getReg()) |
| break; |
| } |
| |
| // No appropriate commutable operands were found. |
| if (CommutableOpIdx1 < FirstCommutableVecOp) |
| return false; |
| |
| // Assign the found pair of commutable indices to SrcOpIdx1 and SrcOpidx2 |
| // to return those values. |
| if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, |
| CommutableOpIdx1, CommutableOpIdx2)) |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI, |
| unsigned &SrcOpIdx1, |
| unsigned &SrcOpIdx2) const { |
| const MCInstrDesc &Desc = MI.getDesc(); |
| if (!Desc.isCommutable()) |
| return false; |
| |
| switch (MI.getOpcode()) { |
| case X86::CMPSDrr: |
| case X86::CMPSSrr: |
| case X86::CMPPDrri: |
| case X86::CMPPSrri: |
| case X86::VCMPSDrr: |
| case X86::VCMPSSrr: |
| case X86::VCMPPDrri: |
| case X86::VCMPPSrri: |
| case X86::VCMPPDYrri: |
| case X86::VCMPPSYrri: |
| case X86::VCMPSDZrr: |
| case X86::VCMPSSZrr: |
| case X86::VCMPPDZrri: |
| case X86::VCMPPSZrri: |
| case X86::VCMPPDZ128rri: |
| case X86::VCMPPSZ128rri: |
| case X86::VCMPPDZ256rri: |
| case X86::VCMPPSZ256rri: |
| case X86::VCMPPDZrrik: |
| case X86::VCMPPSZrrik: |
| case X86::VCMPPDZ128rrik: |
| case X86::VCMPPSZ128rrik: |
| case X86::VCMPPDZ256rrik: |
| case X86::VCMPPSZ256rrik: { |
| unsigned OpOffset = X86II::isKMasked(Desc.TSFlags) ? 1 : 0; |
| |
| // Float comparison can be safely commuted for |
| // Ordered/Unordered/Equal/NotEqual tests |
| unsigned Imm = MI.getOperand(3 + OpOffset).getImm() & 0x7; |
| switch (Imm) { |
| default: |
| // EVEX versions can be commuted. |
| if ((Desc.TSFlags & X86II::EncodingMask) == X86II::EVEX) |
| break; |
| return false; |
| case 0x00: // EQUAL |
| case 0x03: // UNORDERED |
| case 0x04: // NOT EQUAL |
| case 0x07: // ORDERED |
| break; |
| } |
| |
| // The indices of the commutable operands are 1 and 2 (or 2 and 3 |
| // when masked). |
| // Assign them to the returned operand indices here. |
| return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1 + OpOffset, |
| 2 + OpOffset); |
| } |
| case X86::MOVSSrr: |
| // X86::MOVSDrr is always commutable. MOVSS is only commutable if we can |
| // form sse4.1 blend. We assume VMOVSSrr/VMOVSDrr is always commutable since |
| // AVX implies sse4.1. |
| if (Subtarget.hasSSE41()) |
| return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); |
| return false; |
| case X86::SHUFPDrri: |
| // We can commute this to MOVSD. |
| if (MI.getOperand(3).getImm() == 0x02) |
| return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); |
| return false; |
| case X86::MOVHLPSrr: |
| case X86::UNPCKHPDrr: |
| case X86::VMOVHLPSrr: |
| case X86::VUNPCKHPDrr: |
| case X86::VMOVHLPSZrr: |
| case X86::VUNPCKHPDZ128rr: |
| if (Subtarget.hasSSE2()) |
| return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); |
| return false; |
| case X86::VPTERNLOGDZrri: case X86::VPTERNLOGDZrmi: |
| case X86::VPTERNLOGDZ128rri: case X86::VPTERNLOGDZ128rmi: |
| case X86::VPTERNLOGDZ256rri: case X86::VPTERNLOGDZ256rmi: |
| case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi: |
| case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi: |
| case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi: |
| case X86::VPTERNLOGDZrrik: |
| case X86::VPTERNLOGDZ128rrik: |
| case X86::VPTERNLOGDZ256rrik: |
| case X86::VPTERNLOGQZrrik: |
| case X86::VPTERNLOGQZ128rrik: |
| case X86::VPTERNLOGQZ256rrik: |
| case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz: |
| case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz: |
| case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz: |
| case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz: |
| case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz: |
| case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz: |
| case X86::VPTERNLOGDZ128rmbi: |
| case X86::VPTERNLOGDZ256rmbi: |
| case X86::VPTERNLOGDZrmbi: |
| case X86::VPTERNLOGQZ128rmbi: |
| case X86::VPTERNLOGQZ256rmbi: |
| case X86::VPTERNLOGQZrmbi: |
| case X86::VPTERNLOGDZ128rmbikz: |
| case X86::VPTERNLOGDZ256rmbikz: |
| case X86::VPTERNLOGDZrmbikz: |
| case X86::VPTERNLOGQZ128rmbikz: |
| case X86::VPTERNLOGQZ256rmbikz: |
| case X86::VPTERNLOGQZrmbikz: |
| return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); |
| case X86::VPDPWSSDZ128r: |
| case X86::VPDPWSSDZ128rk: |
| case X86::VPDPWSSDZ128rkz: |
| case X86::VPDPWSSDZ256r: |
| case X86::VPDPWSSDZ256rk: |
| case X86::VPDPWSSDZ256rkz: |
| case X86::VPDPWSSDZr: |
| case X86::VPDPWSSDZrk: |
| case X86::VPDPWSSDZrkz: |
| case X86::VPDPWSSDSZ128r: |
| case X86::VPDPWSSDSZ128rk: |
| case X86::VPDPWSSDSZ128rkz: |
| case X86::VPDPWSSDSZ256r: |
| case X86::VPDPWSSDSZ256rk: |
| case X86::VPDPWSSDSZ256rkz: |
| case X86::VPDPWSSDSZr: |
| case X86::VPDPWSSDSZrk: |
| case X86::VPDPWSSDSZrkz: |
| case X86::VPMADD52HUQZ128r: |
| case X86::VPMADD52HUQZ128rk: |
| case X86::VPMADD52HUQZ128rkz: |
| case X86::VPMADD52HUQZ256r: |
| case X86::VPMADD52HUQZ256rk: |
| case X86::VPMADD52HUQZ256rkz: |
| case X86::VPMADD52HUQZr: |
| case X86::VPMADD52HUQZrk: |
| case X86::VPMADD52HUQZrkz: |
| case X86::VPMADD52LUQZ128r: |
| case X86::VPMADD52LUQZ128rk: |
| case X86::VPMADD52LUQZ128rkz: |
| case X86::VPMADD52LUQZ256r: |
| case X86::VPMADD52LUQZ256rk: |
| case X86::VPMADD52LUQZ256rkz: |
| case X86::VPMADD52LUQZr: |
| case X86::VPMADD52LUQZrk: |
| case X86::VPMADD52LUQZrkz: { |
| unsigned CommutableOpIdx1 = 2; |
| unsigned CommutableOpIdx2 = 3; |
| if (X86II::isKMasked(Desc.TSFlags)) { |
| // Skip the mask register. |
| ++CommutableOpIdx1; |
| ++CommutableOpIdx2; |
| } |
| if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, |
| CommutableOpIdx1, CommutableOpIdx2)) |
| return false; |
| if (!MI.getOperand(SrcOpIdx1).isReg() || |
| !MI.getOperand(SrcOpIdx2).isReg()) |
| // No idea. |
| return false; |
| return true; |
| } |
| |
| default: |
| const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(), |
| MI.getDesc().TSFlags); |
| if (FMA3Group) |
| return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2, |
| FMA3Group->isIntrinsic()); |
| |
| // Handled masked instructions since we need to skip over the mask input |
| // and the preserved input. |
| if (X86II::isKMasked(Desc.TSFlags)) { |
| // First assume that the first input is the mask operand and skip past it. |
| unsigned CommutableOpIdx1 = Desc.getNumDefs() + 1; |
| unsigned CommutableOpIdx2 = Desc.getNumDefs() + 2; |
| // Check if the first input is tied. If there isn't one then we only |
| // need to skip the mask operand which we did above. |
| if ((MI.getDesc().getOperandConstraint(Desc.getNumDefs(), |
| MCOI::TIED_TO) != -1)) { |
| // If this is zero masking instruction with a tied operand, we need to |
| // move the first index back to the first input since this must |
| // be a 3 input instruction and we want the first two non-mask inputs. |
| // Otherwise this is a 2 input instruction with a preserved input and |
| // mask, so we need to move the indices to skip one more input. |
| if (X86II::isKMergeMasked(Desc.TSFlags)) { |
| ++CommutableOpIdx1; |
| ++CommutableOpIdx2; |
| } else { |
| --CommutableOpIdx1; |
| } |
| } |
| |
| if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, |
| CommutableOpIdx1, CommutableOpIdx2)) |
| return false; |
| |
| if (!MI.getOperand(SrcOpIdx1).isReg() || |
| !MI.getOperand(SrcOpIdx2).isReg()) |
| // No idea. |
| return false; |
| return true; |
| } |
| |
| return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); |
| } |
| return false; |
| } |
| |
| X86::CondCode X86::getCondFromBranch(const MachineInstr &MI) { |
| switch (MI.getOpcode()) { |
| default: return X86::COND_INVALID; |
| case X86::JCC_1: |
| return static_cast<X86::CondCode>( |
| MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm()); |
| } |
| } |
| |
| /// Return condition code of a SETCC opcode. |
| X86::CondCode X86::getCondFromSETCC(const MachineInstr &MI) { |
| switch (MI.getOpcode()) { |
| default: return X86::COND_INVALID; |
| case X86::SETCCr: case X86::SETCCm: |
| return static_cast<X86::CondCode>( |
| MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm()); |
| } |
| } |
| |
| /// Return condition code of a CMov opcode. |
| X86::CondCode X86::getCondFromCMov(const MachineInstr &MI) { |
| switch (MI.getOpcode()) { |
| default: return X86::COND_INVALID; |
| case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr: |
| case X86::CMOV16rm: case X86::CMOV32rm: case X86::CMOV64rm: |
| return static_cast<X86::CondCode>( |
| MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm()); |
| } |
| } |
| |
| /// Return the inverse of the specified condition, |
| /// e.g. turning COND_E to COND_NE. |
| X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { |
| switch (CC) { |
| default: llvm_unreachable("Illegal condition code!"); |
| case X86::COND_E: return X86::COND_NE; |
| case X86::COND_NE: return X86::COND_E; |
| case X86::COND_L: return X86::COND_GE; |
| case X86::COND_LE: return X86::COND_G; |
| case X86::COND_G: return X86::COND_LE; |
| case X86::COND_GE: return X86::COND_L; |
| case X86::COND_B: return X86::COND_AE; |
| case X86::COND_BE: return X86::COND_A; |
| case X86::COND_A: return X86::COND_BE; |
| case X86::COND_AE: return X86::COND_B; |
| case X86::COND_S: return X86::COND_NS; |
| case X86::COND_NS: return X86::COND_S; |
| case X86::COND_P: return X86::COND_NP; |
| case X86::COND_NP: return X86::COND_P; |
| case X86::COND_O: return X86::COND_NO; |
| case X86::COND_NO: return X86::COND_O; |
| case X86::COND_NE_OR_P: return X86::COND_E_AND_NP; |
| case X86::COND_E_AND_NP: return X86::COND_NE_OR_P; |
| } |
| } |
| |
| /// Assuming the flags are set by MI(a,b), return the condition code if we |
| /// modify the instructions such that flags are set by MI(b,a). |
| static X86::CondCode getSwappedCondition(X86::CondCode CC) { |
| switch (CC) { |
| default: return X86::COND_INVALID; |
| case X86::COND_E: return X86::COND_E; |
| case X86::COND_NE: return X86::COND_NE; |
| case X86::COND_L: return X86::COND_G; |
| case X86::COND_LE: return X86::COND_GE; |
| case X86::COND_G: return X86::COND_L; |
| case X86::COND_GE: return X86::COND_LE; |
| case X86::COND_B: return X86::COND_A; |
| case X86::COND_BE: return X86::COND_AE; |
| case X86::COND_A: return X86::COND_B; |
| case X86::COND_AE: return X86::COND_BE; |
| } |
| } |
| |
| std::pair<X86::CondCode, bool> |
| X86::getX86ConditionCode(CmpInst::Predicate Predicate) { |
| X86::CondCode CC = X86::COND_INVALID; |
| bool NeedSwap = false; |
| switch (Predicate) { |
| default: break; |
| // Floating-point Predicates |
| case CmpInst::FCMP_UEQ: CC = X86::COND_E; break; |
| case CmpInst::FCMP_OLT: NeedSwap = true; LLVM_FALLTHROUGH; |
| case CmpInst::FCMP_OGT: CC = X86::COND_A; break; |
| case CmpInst::FCMP_OLE: NeedSwap = true; LLVM_FALLTHROUGH; |
| case CmpInst::FCMP_OGE: CC = X86::COND_AE; break; |
| case CmpInst::FCMP_UGT: NeedSwap = true; LLVM_FALLTHROUGH; |
| case CmpInst::FCMP_ULT: CC = X86::COND_B; break; |
| case CmpInst::FCMP_UGE: NeedSwap = true; LLVM_FALLTHROUGH; |
| case CmpInst::FCMP_ULE: CC = X86::COND_BE; break; |
| case CmpInst::FCMP_ONE: CC = X86::COND_NE; break; |
| case CmpInst::FCMP_UNO: CC = X86::COND_P; break; |
| case CmpInst::FCMP_ORD: CC = X86::COND_NP; break; |
| case CmpInst::FCMP_OEQ: LLVM_FALLTHROUGH; |
| case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break; |
| |
| // Integer Predicates |
| case CmpInst::ICMP_EQ: CC = X86::COND_E; break; |
| case CmpInst::ICMP_NE: CC = X86::COND_NE; break; |
| case CmpInst::ICMP_UGT: CC = X86::COND_A; break; |
| case CmpInst::ICMP_UGE: CC = X86::COND_AE; break; |
| case CmpInst::ICMP_ULT: CC = X86::COND_B; break; |
| case CmpInst::ICMP_ULE: CC = X86::COND_BE; break; |
| case CmpInst::ICMP_SGT: CC = X86::COND_G; break; |
| case CmpInst::ICMP_SGE: CC = X86::COND_GE; break; |
| case CmpInst::ICMP_SLT: CC = X86::COND_L; break; |
| case CmpInst::ICMP_SLE: CC = X86::COND_LE; break; |
| } |
| |
| return std::make_pair(CC, NeedSwap); |
| } |
| |
| /// Return a setcc opcode based on whether it has memory operand. |
| unsigned X86::getSETOpc(bool HasMemoryOperand) { |
| return HasMemoryOperand ? X86::SETCCr : X86::SETCCm; |
| } |
| |
| /// Return a cmov opcode for the given register size in bytes, and operand type. |
| unsigned X86::getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand) { |
| switch(RegBytes) { |
| default: llvm_unreachable("Illegal register size!"); |
| case 2: return HasMemoryOperand ? X86::CMOV16rm : X86::CMOV16rr; |
| case 4: return HasMemoryOperand ? X86::CMOV32rm : X86::CMOV32rr; |
| case 8: return HasMemoryOperand ? X86::CMOV32rm : X86::CMOV64rr; |
| } |
| } |
| |
| /// Get the VPCMP immediate for the given condition. |
| unsigned X86::getVPCMPImmForCond(ISD::CondCode CC) { |
| switch (CC) { |
| default: llvm_unreachable("Unexpected SETCC condition"); |
| case ISD::SETNE: return 4; |
| case ISD::SETEQ: return 0; |
| case ISD::SETULT: |
| case ISD::SETLT: return 1; |
| case ISD::SETUGT: |
| case ISD::SETGT: return 6; |
| case ISD::SETUGE: |
| case ISD::SETGE: return 5; |
| case ISD::SETULE: |
| case ISD::SETLE: return 2; |
| } |
| } |
| |
| /// Get the VPCMP immediate if the operands are swapped. |
| unsigned X86::getSwappedVPCMPImm(unsigned Imm) { |
| switch (Imm) { |
| default: llvm_unreachable("Unreachable!"); |
| case 0x01: Imm = 0x06; break; // LT -> NLE |
| case 0x02: Imm = 0x05; break; // LE -> NLT |
| case 0x05: Imm = 0x02; break; // NLT -> LE |
| case 0x06: Imm = 0x01; break; // NLE -> LT |
| case 0x00: // EQ |
| case 0x03: // FALSE |
| case 0x04: // NE |
| case 0x07: // TRUE |
| break; |
| } |
| |
| return Imm; |
| } |
| |
| /// Get the VPCOM immediate if the operands are swapped. |
| unsigned X86::getSwappedVPCOMImm(unsigned Imm) { |
| switch (Imm) { |
| default: llvm_unreachable("Unreachable!"); |
| case 0x00: Imm = 0x02; break; // LT -> GT |
| case 0x01: Imm = 0x03; break; // LE -> GE |
| case 0x02: Imm = 0x00; break; // GT -> LT |
| case 0x03: Imm = 0x01; break; // GE -> LE |
| case 0x04: // EQ |
| case 0x05: // NE |
| case 0x06: // FALSE |
| case 0x07: // TRUE |
| break; |
| } |
| |
| return Imm; |
| } |
| |
| /// Get the VCMP immediate if the operands are swapped. |
| unsigned X86::getSwappedVCMPImm(unsigned Imm) { |
| // Only need the lower 2 bits to distinquish. |
| switch (Imm & 0x3) { |
| default: llvm_unreachable("Unreachable!"); |
| case 0x00: case 0x03: |
| // EQ/NE/TRUE/FALSE/ORD/UNORD don't change immediate when commuted. |
| break; |
| case 0x01: case 0x02: |
| // Need to toggle bits 3:0. Bit 4 stays the same. |
| Imm ^= 0xf; |
| break; |
| } |
| |
| return Imm; |
| } |
| |
| bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const { |
| if (!MI.isTerminator()) return false; |
| |
| // Conditional branch is a special case. |
| if (MI.isBranch() && !MI.isBarrier()) |
| return true; |
| if (!MI.isPredicable()) |
| return true; |
| return !isPredicated(MI); |
| } |
| |
| bool X86InstrInfo::isUnconditionalTailCall(const MachineInstr &MI) const { |
| switch (MI.getOpcode()) { |
| case X86::TCRETURNdi: |
| case X86::TCRETURNri: |
| case X86::TCRETURNmi: |
| case X86::TCRETURNdi64: |
| case X86::TCRETURNri64: |
| case X86::TCRETURNmi64: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool X86InstrInfo::canMakeTailCallConditional( |
| SmallVectorImpl<MachineOperand> &BranchCond, |
| const MachineInstr &TailCall) const { |
| if (TailCall.getOpcode() != X86::TCRETURNdi && |
| TailCall.getOpcode() != X86::TCRETURNdi64) { |
| // Only direct calls can be done with a conditional branch. |
| return false; |
| } |
| |
| const MachineFunction *MF = TailCall.getParent()->getParent(); |
| if (Subtarget.isTargetWin64() && MF->hasWinCFI()) { |
| // Conditional tail calls confuse the Win64 unwinder. |
| return false; |
| } |
| |
| assert(BranchCond.size() == 1); |
| if (BranchCond[0].getImm() > X86::LAST_VALID_COND) { |
| // Can't make a conditional tail call with this condition. |
| return false; |
| } |
| |
| const X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>(); |
| if (X86FI->getTCReturnAddrDelta() != 0 || |
| TailCall.getOperand(1).getImm() != 0) { |
| // A conditional tail call cannot do any stack adjustment. |
| return false; |
| } |
| |
| return true; |
| } |
| |
| void X86InstrInfo::replaceBranchWithTailCall( |
| MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &BranchCond, |
| const MachineInstr &TailCall) const { |
| assert(canMakeTailCallConditional(BranchCond, TailCall)); |
| |
| MachineBasicBlock::iterator I = MBB.end(); |
| while (I != MBB.begin()) { |
| --I; |
| if (I->isDebugInstr()) |
| continue; |
| if (!I->isBranch()) |
| assert(0 && "Can't find the branch to replace!"); |
| |
| X86::CondCode CC = X86::getCondFromBranch(*I); |
| assert(BranchCond.size() == 1); |
| if (CC != BranchCond[0].getImm()) |
| continue; |
| |
| break; |
| } |
| |
| unsigned Opc = TailCall.getOpcode() == X86::TCRETURNdi ? X86::TCRETURNdicc |
| : X86::TCRETURNdi64cc; |
| |
| auto MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opc)); |
| MIB->addOperand(TailCall.getOperand(0)); // Destination. |
| MIB.addImm(0); // Stack offset (not used). |
| MIB->addOperand(BranchCond[0]); // Condition. |
| MIB.copyImplicitOps(TailCall); // Regmask and (imp-used) parameters. |
| |
| // Add implicit uses and defs of all live regs potentially clobbered by the |
| // call. This way they still appear live across the call. |
| LivePhysRegs LiveRegs(getRegisterInfo()); |
| LiveRegs.addLiveOuts(MBB); |
| SmallVector<std::pair<MCPhysReg, const MachineOperand *>, 8> Clobbers; |
| LiveRegs.stepForward(*MIB, Clobbers); |
| for (const auto &C : Clobbers) { |
| MIB.addReg(C.first, RegState::Implicit); |
| MIB.addReg(C.first, RegState::Implicit | RegState::Define); |
| } |
| |
| I->eraseFromParent(); |
| } |
| |
| // Given a MBB and its TBB, find the FBB which was a fallthrough MBB (it may |
| // not be a fallthrough MBB now due to layout changes). Return nullptr if the |
| // fallthrough MBB cannot be identified. |
| static MachineBasicBlock *getFallThroughMBB(MachineBasicBlock *MBB, |
| MachineBasicBlock *TBB) { |
| // Look for non-EHPad successors other than TBB. If we find exactly one, it |
| // is the fallthrough MBB. If we find zero, then TBB is both the target MBB |
| // and fallthrough MBB. If we find more than one, we cannot identify the |
| // fallthrough MBB and should return nullptr. |
| MachineBasicBlock *FallthroughBB = nullptr; |
| for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { |
| if ((*SI)->isEHPad() || (*SI == TBB && FallthroughBB)) |
| continue; |
| // Return a nullptr if we found more than one fallthrough successor. |
| if (FallthroughBB && FallthroughBB != TBB) |
| return nullptr; |
| FallthroughBB = *SI; |
| } |
| return FallthroughBB; |
| } |
| |
| bool X86InstrInfo::AnalyzeBranchImpl( |
| MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, |
| SmallVectorImpl<MachineOperand> &Cond, |
| SmallVectorImpl<MachineInstr *> &CondBranches, bool AllowModify) const { |
| |
| // Start from the bottom of the block and work up, examining the |
| // terminator instructions. |
| MachineBasicBlock::iterator I = MBB.end(); |
| MachineBasicBlock::iterator UnCondBrIter = MBB.end(); |
| while (I != MBB.begin()) { |
| --I; |
| if (I->isDebugInstr()) |
| continue; |
| |
| // Working from the bottom, when we see a non-terminator instruction, we're |
| // done. |
| if (!isUnpredicatedTerminator(*I)) |
| break; |
| |
| // A terminator that isn't a branch can't easily be handled by this |
| // analysis. |
| if (!I->isBranch()) |
| return true; |
| |
| // Handle unconditional branches. |
| if (I->getOpcode() == X86::JMP_1) { |
| UnCondBrIter = I; |
| |
| if (!AllowModify) { |
| TBB = I->getOperand(0).getMBB(); |
| continue; |
| } |
| |
| // If the block has any instructions after a JMP, delete them. |
| while (std:: |