blob: ab14ee7fadf2d7ac19396aa638dc2cc801f8fe43 [file] [log] [blame]
//===-- X86InstrInfo.cpp - X86 Instruction Information --------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the X86 implementation of the TargetInstrInfo class.
//
//===----------------------------------------------------------------------===//
#include "X86InstrInfo.h"
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86InstrFoldTables.h"
#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
#define DEBUG_TYPE "x86-instr-info"
#define GET_INSTRINFO_CTOR_DTOR
#include "X86GenInstrInfo.inc"
static cl::opt<bool>
NoFusing("disable-spill-fusing",
cl::desc("Disable fusing of spill code into instructions"),
cl::Hidden);
static cl::opt<bool>
PrintFailedFusing("print-failed-fuse-candidates",
cl::desc("Print instructions that the allocator wants to"
" fuse, but the X86 backend currently can't"),
cl::Hidden);
static cl::opt<bool>
ReMatPICStubLoad("remat-pic-stub-load",
cl::desc("Re-materialize load from stub in PIC mode"),
cl::init(false), cl::Hidden);
static cl::opt<unsigned>
PartialRegUpdateClearance("partial-reg-update-clearance",
cl::desc("Clearance between two register writes "
"for inserting XOR to avoid partial "
"register update"),
cl::init(64), cl::Hidden);
static cl::opt<unsigned>
UndefRegClearance("undef-reg-clearance",
cl::desc("How many idle instructions we would like before "
"certain undef register reads"),
cl::init(128), cl::Hidden);
// Pin the vtable to this file.
void X86InstrInfo::anchor() {}
X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
: X86GenInstrInfo((STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64
: X86::ADJCALLSTACKDOWN32),
(STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64
: X86::ADJCALLSTACKUP32),
X86::CATCHRET,
(STI.is64Bit() ? X86::RETQ : X86::RETL)),
Subtarget(STI), RI(STI.getTargetTriple()) {
}
bool
X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SubIdx) const {
switch (MI.getOpcode()) {
default: break;
case X86::MOVSX16rr8:
case X86::MOVZX16rr8:
case X86::MOVSX32rr8:
case X86::MOVZX32rr8:
case X86::MOVSX64rr8:
if (!Subtarget.is64Bit())
// It's not always legal to reference the low 8-bit of the larger
// register in 32-bit mode.
return false;
LLVM_FALLTHROUGH;
case X86::MOVSX32rr16:
case X86::MOVZX32rr16:
case X86::MOVSX64rr16:
case X86::MOVSX64rr32: {
if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
// Be conservative.
return false;
SrcReg = MI.getOperand(1).getReg();
DstReg = MI.getOperand(0).getReg();
switch (MI.getOpcode()) {
default: llvm_unreachable("Unreachable!");
case X86::MOVSX16rr8:
case X86::MOVZX16rr8:
case X86::MOVSX32rr8:
case X86::MOVZX32rr8:
case X86::MOVSX64rr8:
SubIdx = X86::sub_8bit;
break;
case X86::MOVSX32rr16:
case X86::MOVZX32rr16:
case X86::MOVSX64rr16:
SubIdx = X86::sub_16bit;
break;
case X86::MOVSX64rr32:
SubIdx = X86::sub_32bit;
break;
}
return true;
}
}
return false;
}
int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const {
const MachineFunction *MF = MI.getParent()->getParent();
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
if (isFrameInstr(MI)) {
unsigned StackAlign = TFI->getStackAlignment();
int SPAdj = alignTo(getFrameSize(MI), StackAlign);
SPAdj -= getFrameAdjustment(MI);
if (!isFrameSetup(MI))
SPAdj = -SPAdj;
return SPAdj;
}
// To know whether a call adjusts the stack, we need information
// that is bound to the following ADJCALLSTACKUP pseudo.
// Look for the next ADJCALLSTACKUP that follows the call.
if (MI.isCall()) {
const MachineBasicBlock *MBB = MI.getParent();
auto I = ++MachineBasicBlock::const_iterator(MI);
for (auto E = MBB->end(); I != E; ++I) {
if (I->getOpcode() == getCallFrameDestroyOpcode() ||
I->isCall())
break;
}
// If we could not find a frame destroy opcode, then it has already
// been simplified, so we don't care.
if (I->getOpcode() != getCallFrameDestroyOpcode())
return 0;
return -(I->getOperand(1).getImm());
}
// Currently handle only PUSHes we can reasonably expect to see
// in call sequences
switch (MI.getOpcode()) {
default:
return 0;
case X86::PUSH32i8:
case X86::PUSH32r:
case X86::PUSH32rmm:
case X86::PUSH32rmr:
case X86::PUSHi32:
return 4;
case X86::PUSH64i8:
case X86::PUSH64r:
case X86::PUSH64rmm:
case X86::PUSH64rmr:
case X86::PUSH64i32:
return 8;
}
}
/// Return true and the FrameIndex if the specified
/// operand and follow operands form a reference to the stack frame.
bool X86InstrInfo::isFrameOperand(const MachineInstr &MI, unsigned int Op,
int &FrameIndex) const {
if (MI.getOperand(Op + X86::AddrBaseReg).isFI() &&
MI.getOperand(Op + X86::AddrScaleAmt).isImm() &&
MI.getOperand(Op + X86::AddrIndexReg).isReg() &&
MI.getOperand(Op + X86::AddrDisp).isImm() &&
MI.getOperand(Op + X86::AddrScaleAmt).getImm() == 1 &&
MI.getOperand(Op + X86::AddrIndexReg).getReg() == 0 &&
MI.getOperand(Op + X86::AddrDisp).getImm() == 0) {
FrameIndex = MI.getOperand(Op + X86::AddrBaseReg).getIndex();
return true;
}
return false;
}
static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
switch (Opcode) {
default:
return false;
case X86::MOV8rm:
case X86::KMOVBkm:
MemBytes = 1;
return true;
case X86::MOV16rm:
case X86::KMOVWkm:
MemBytes = 2;
return true;
case X86::MOV32rm:
case X86::MOVSSrm:
case X86::VMOVSSZrm:
case X86::VMOVSSrm:
case X86::KMOVDkm:
MemBytes = 4;
return true;
case X86::MOV64rm:
case X86::LD_Fp64m:
case X86::MOVSDrm:
case X86::VMOVSDrm:
case X86::VMOVSDZrm:
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
case X86::KMOVQkm:
MemBytes = 8;
return true;
case X86::MOVAPSrm:
case X86::MOVUPSrm:
case X86::MOVAPDrm:
case X86::MOVUPDrm:
case X86::MOVDQArm:
case X86::MOVDQUrm:
case X86::VMOVAPSrm:
case X86::VMOVUPSrm:
case X86::VMOVAPDrm:
case X86::VMOVUPDrm:
case X86::VMOVDQArm:
case X86::VMOVDQUrm:
case X86::VMOVAPSZ128rm:
case X86::VMOVUPSZ128rm:
case X86::VMOVAPSZ128rm_NOVLX:
case X86::VMOVUPSZ128rm_NOVLX:
case X86::VMOVAPDZ128rm:
case X86::VMOVUPDZ128rm:
case X86::VMOVDQU8Z128rm:
case X86::VMOVDQU16Z128rm:
case X86::VMOVDQA32Z128rm:
case X86::VMOVDQU32Z128rm:
case X86::VMOVDQA64Z128rm:
case X86::VMOVDQU64Z128rm:
MemBytes = 16;
return true;
case X86::VMOVAPSYrm:
case X86::VMOVUPSYrm:
case X86::VMOVAPDYrm:
case X86::VMOVUPDYrm:
case X86::VMOVDQAYrm:
case X86::VMOVDQUYrm:
case X86::VMOVAPSZ256rm:
case X86::VMOVUPSZ256rm:
case X86::VMOVAPSZ256rm_NOVLX:
case X86::VMOVUPSZ256rm_NOVLX:
case X86::VMOVAPDZ256rm:
case X86::VMOVUPDZ256rm:
case X86::VMOVDQU8Z256rm:
case X86::VMOVDQU16Z256rm:
case X86::VMOVDQA32Z256rm:
case X86::VMOVDQU32Z256rm:
case X86::VMOVDQA64Z256rm:
case X86::VMOVDQU64Z256rm:
MemBytes = 32;
return true;
case X86::VMOVAPSZrm:
case X86::VMOVUPSZrm:
case X86::VMOVAPDZrm:
case X86::VMOVUPDZrm:
case X86::VMOVDQU8Zrm:
case X86::VMOVDQU16Zrm:
case X86::VMOVDQA32Zrm:
case X86::VMOVDQU32Zrm:
case X86::VMOVDQA64Zrm:
case X86::VMOVDQU64Zrm:
MemBytes = 64;
return true;
}
}
static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
switch (Opcode) {
default:
return false;
case X86::MOV8mr:
case X86::KMOVBmk:
MemBytes = 1;
return true;
case X86::MOV16mr:
case X86::KMOVWmk:
MemBytes = 2;
return true;
case X86::MOV32mr:
case X86::MOVSSmr:
case X86::VMOVSSmr:
case X86::VMOVSSZmr:
case X86::KMOVDmk:
MemBytes = 4;
return true;
case X86::MOV64mr:
case X86::ST_FpP64m:
case X86::MOVSDmr:
case X86::VMOVSDmr:
case X86::VMOVSDZmr:
case X86::MMX_MOVD64mr:
case X86::MMX_MOVQ64mr:
case X86::MMX_MOVNTQmr:
case X86::KMOVQmk:
MemBytes = 8;
return true;
case X86::MOVAPSmr:
case X86::MOVUPSmr:
case X86::MOVAPDmr:
case X86::MOVUPDmr:
case X86::MOVDQAmr:
case X86::MOVDQUmr:
case X86::VMOVAPSmr:
case X86::VMOVUPSmr:
case X86::VMOVAPDmr:
case X86::VMOVUPDmr:
case X86::VMOVDQAmr:
case X86::VMOVDQUmr:
case X86::VMOVUPSZ128mr:
case X86::VMOVAPSZ128mr:
case X86::VMOVUPSZ128mr_NOVLX:
case X86::VMOVAPSZ128mr_NOVLX:
case X86::VMOVUPDZ128mr:
case X86::VMOVAPDZ128mr:
case X86::VMOVDQA32Z128mr:
case X86::VMOVDQU32Z128mr:
case X86::VMOVDQA64Z128mr:
case X86::VMOVDQU64Z128mr:
case X86::VMOVDQU8Z128mr:
case X86::VMOVDQU16Z128mr:
MemBytes = 16;
return true;
case X86::VMOVUPSYmr:
case X86::VMOVAPSYmr:
case X86::VMOVUPDYmr:
case X86::VMOVAPDYmr:
case X86::VMOVDQUYmr:
case X86::VMOVDQAYmr:
case X86::VMOVUPSZ256mr:
case X86::VMOVAPSZ256mr:
case X86::VMOVUPSZ256mr_NOVLX:
case X86::VMOVAPSZ256mr_NOVLX:
case X86::VMOVUPDZ256mr:
case X86::VMOVAPDZ256mr:
case X86::VMOVDQU8Z256mr:
case X86::VMOVDQU16Z256mr:
case X86::VMOVDQA32Z256mr:
case X86::VMOVDQU32Z256mr:
case X86::VMOVDQA64Z256mr:
case X86::VMOVDQU64Z256mr:
MemBytes = 32;
return true;
case X86::VMOVUPSZmr:
case X86::VMOVAPSZmr:
case X86::VMOVUPDZmr:
case X86::VMOVAPDZmr:
case X86::VMOVDQU8Zmr:
case X86::VMOVDQU16Zmr:
case X86::VMOVDQA32Zmr:
case X86::VMOVDQU32Zmr:
case X86::VMOVDQA64Zmr:
case X86::VMOVDQU64Zmr:
MemBytes = 64;
return true;
}
return false;
}
unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
unsigned Dummy;
return X86InstrInfo::isLoadFromStackSlot(MI, FrameIndex, Dummy);
}
unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex,
unsigned &MemBytes) const {
if (isFrameLoadOpcode(MI.getOpcode(), MemBytes))
if (MI.getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
return MI.getOperand(0).getReg();
return 0;
}
unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
int &FrameIndex) const {
unsigned Dummy;
if (isFrameLoadOpcode(MI.getOpcode(), Dummy)) {
unsigned Reg;
if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
return Reg;
// Check for post-frame index elimination operations
SmallVector<const MachineMemOperand *, 1> Accesses;
if (hasLoadFromStackSlot(MI, Accesses)) {
FrameIndex =
cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
->getFrameIndex();
return 1;
}
}
return 0;
}
unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
unsigned Dummy;
return X86InstrInfo::isStoreToStackSlot(MI, FrameIndex, Dummy);
}
unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex,
unsigned &MemBytes) const {
if (isFrameStoreOpcode(MI.getOpcode(), MemBytes))
if (MI.getOperand(X86::AddrNumOperands).getSubReg() == 0 &&
isFrameOperand(MI, 0, FrameIndex))
return MI.getOperand(X86::AddrNumOperands).getReg();
return 0;
}
unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
int &FrameIndex) const {
unsigned Dummy;
if (isFrameStoreOpcode(MI.getOpcode(), Dummy)) {
unsigned Reg;
if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
return Reg;
// Check for post-frame index elimination operations
SmallVector<const MachineMemOperand *, 1> Accesses;
if (hasStoreToStackSlot(MI, Accesses)) {
FrameIndex =
cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
->getFrameIndex();
return 1;
}
}
return 0;
}
/// Return true if register is PIC base; i.e.g defined by X86::MOVPC32r.
static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
// Don't waste compile time scanning use-def chains of physregs.
if (!TargetRegisterInfo::isVirtualRegister(BaseReg))
return false;
bool isPICBase = false;
for (MachineRegisterInfo::def_instr_iterator I = MRI.def_instr_begin(BaseReg),
E = MRI.def_instr_end(); I != E; ++I) {
MachineInstr *DefMI = &*I;
if (DefMI->getOpcode() != X86::MOVPC32r)
return false;
assert(!isPICBase && "More than one PIC base?");
isPICBase = true;
}
return isPICBase;
}
bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
AliasAnalysis *AA) const {
switch (MI.getOpcode()) {
default: break;
case X86::MOV8rm:
case X86::MOV8rm_NOREX:
case X86::MOV16rm:
case X86::MOV32rm:
case X86::MOV64rm:
case X86::LD_Fp64m:
case X86::MOVSSrm:
case X86::MOVSDrm:
case X86::MOVAPSrm:
case X86::MOVUPSrm:
case X86::MOVAPDrm:
case X86::MOVUPDrm:
case X86::MOVDQArm:
case X86::MOVDQUrm:
case X86::VMOVSSrm:
case X86::VMOVSDrm:
case X86::VMOVAPSrm:
case X86::VMOVUPSrm:
case X86::VMOVAPDrm:
case X86::VMOVUPDrm:
case X86::VMOVDQArm:
case X86::VMOVDQUrm:
case X86::VMOVAPSYrm:
case X86::VMOVUPSYrm:
case X86::VMOVAPDYrm:
case X86::VMOVUPDYrm:
case X86::VMOVDQAYrm:
case X86::VMOVDQUYrm:
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
// AVX-512
case X86::VMOVSSZrm:
case X86::VMOVSDZrm:
case X86::VMOVAPDZ128rm:
case X86::VMOVAPDZ256rm:
case X86::VMOVAPDZrm:
case X86::VMOVAPSZ128rm:
case X86::VMOVAPSZ256rm:
case X86::VMOVAPSZ128rm_NOVLX:
case X86::VMOVAPSZ256rm_NOVLX:
case X86::VMOVAPSZrm:
case X86::VMOVDQA32Z128rm:
case X86::VMOVDQA32Z256rm:
case X86::VMOVDQA32Zrm:
case X86::VMOVDQA64Z128rm:
case X86::VMOVDQA64Z256rm:
case X86::VMOVDQA64Zrm:
case X86::VMOVDQU16Z128rm:
case X86::VMOVDQU16Z256rm:
case X86::VMOVDQU16Zrm:
case X86::VMOVDQU32Z128rm:
case X86::VMOVDQU32Z256rm:
case X86::VMOVDQU32Zrm:
case X86::VMOVDQU64Z128rm:
case X86::VMOVDQU64Z256rm:
case X86::VMOVDQU64Zrm:
case X86::VMOVDQU8Z128rm:
case X86::VMOVDQU8Z256rm:
case X86::VMOVDQU8Zrm:
case X86::VMOVUPDZ128rm:
case X86::VMOVUPDZ256rm:
case X86::VMOVUPDZrm:
case X86::VMOVUPSZ128rm:
case X86::VMOVUPSZ256rm:
case X86::VMOVUPSZ128rm_NOVLX:
case X86::VMOVUPSZ256rm_NOVLX:
case X86::VMOVUPSZrm: {
// Loads from constant pools are trivially rematerializable.
if (MI.getOperand(1 + X86::AddrBaseReg).isReg() &&
MI.getOperand(1 + X86::AddrScaleAmt).isImm() &&
MI.getOperand(1 + X86::AddrIndexReg).isReg() &&
MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
MI.isDereferenceableInvariantLoad(AA)) {
unsigned BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
if (BaseReg == 0 || BaseReg == X86::RIP)
return true;
// Allow re-materialization of PIC load.
if (!ReMatPICStubLoad && MI.getOperand(1 + X86::AddrDisp).isGlobal())
return false;
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
return regIsPICBase(BaseReg, MRI);
}
return false;
}
case X86::LEA32r:
case X86::LEA64r: {
if (MI.getOperand(1 + X86::AddrScaleAmt).isImm() &&
MI.getOperand(1 + X86::AddrIndexReg).isReg() &&
MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
!MI.getOperand(1 + X86::AddrDisp).isReg()) {
// lea fi#, lea GV, etc. are all rematerializable.
if (!MI.getOperand(1 + X86::AddrBaseReg).isReg())
return true;
unsigned BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
if (BaseReg == 0)
return true;
// Allow re-materialization of lea PICBase + x.
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
return regIsPICBase(BaseReg, MRI);
}
return false;
}
}
// All other instructions marked M_REMATERIALIZABLE are always trivially
// rematerializable.
return true;
}
bool X86InstrInfo::isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
MachineBasicBlock::iterator E = MBB.end();
// For compile time consideration, if we are not able to determine the
// safety after visiting 4 instructions in each direction, we will assume
// it's not safe.
MachineBasicBlock::iterator Iter = I;
for (unsigned i = 0; Iter != E && i < 4; ++i) {
bool SeenDef = false;
for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
MachineOperand &MO = Iter->getOperand(j);
if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
SeenDef = true;
if (!MO.isReg())
continue;
if (MO.getReg() == X86::EFLAGS) {
if (MO.isUse())
return false;
SeenDef = true;
}
}
if (SeenDef)
// This instruction defines EFLAGS, no need to look any further.
return true;
++Iter;
// Skip over debug instructions.
while (Iter != E && Iter->isDebugInstr())
++Iter;
}
// It is safe to clobber EFLAGS at the end of a block of no successor has it
// live in.
if (Iter == E) {
for (MachineBasicBlock *S : MBB.successors())
if (S->isLiveIn(X86::EFLAGS))
return false;
return true;
}
MachineBasicBlock::iterator B = MBB.begin();
Iter = I;
for (unsigned i = 0; i < 4; ++i) {
// If we make it to the beginning of the block, it's safe to clobber
// EFLAGS iff EFLAGS is not live-in.
if (Iter == B)
return !MBB.isLiveIn(X86::EFLAGS);
--Iter;
// Skip over debug instructions.
while (Iter != B && Iter->isDebugInstr())
--Iter;
bool SawKill = false;
for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
MachineOperand &MO = Iter->getOperand(j);
// A register mask may clobber EFLAGS, but we should still look for a
// live EFLAGS def.
if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
SawKill = true;
if (MO.isReg() && MO.getReg() == X86::EFLAGS) {
if (MO.isDef()) return MO.isDead();
if (MO.isKill()) SawKill = true;
}
}
if (SawKill)
// This instruction kills EFLAGS and doesn't redefine it, so
// there's no need to look further.
return true;
}
// Conservative answer.
return false;
}
void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SubIdx,
const MachineInstr &Orig,
const TargetRegisterInfo &TRI) const {
bool ClobbersEFLAGS = false;
for (const MachineOperand &MO : Orig.operands()) {
if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS) {
ClobbersEFLAGS = true;
break;
}
}
if (ClobbersEFLAGS && !isSafeToClobberEFLAGS(MBB, I)) {
// The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side
// effects.
int Value;
switch (Orig.getOpcode()) {
case X86::MOV32r0: Value = 0; break;
case X86::MOV32r1: Value = 1; break;
case X86::MOV32r_1: Value = -1; break;
default:
llvm_unreachable("Unexpected instruction!");
}
const DebugLoc &DL = Orig.getDebugLoc();
BuildMI(MBB, I, DL, get(X86::MOV32ri))
.add(Orig.getOperand(0))
.addImm(Value);
} else {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
MBB.insert(I, MI);
}
MachineInstr &NewMI = *std::prev(I);
NewMI.substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
}
/// True if MI has a condition code def, e.g. EFLAGS, that is not marked dead.
bool X86InstrInfo::hasLiveCondCodeDef(MachineInstr &MI) const {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (MO.isReg() && MO.isDef() &&
MO.getReg() == X86::EFLAGS && !MO.isDead()) {
return true;
}
}
return false;
}
/// Check whether the shift count for a machine operand is non-zero.
inline static unsigned getTruncatedShiftCount(const MachineInstr &MI,
unsigned ShiftAmtOperandIdx) {
// The shift count is six bits with the REX.W prefix and five bits without.
unsigned ShiftCountMask = (MI.getDesc().TSFlags & X86II::REX_W) ? 63 : 31;
unsigned Imm = MI.getOperand(ShiftAmtOperandIdx).getImm();
return Imm & ShiftCountMask;
}
/// Check whether the given shift count is appropriate
/// can be represented by a LEA instruction.
inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
// Left shift instructions can be transformed into load-effective-address
// instructions if we can encode them appropriately.
// A LEA instruction utilizes a SIB byte to encode its scale factor.
// The SIB.scale field is two bits wide which means that we can encode any
// shift amount less than 4.
return ShAmt < 4 && ShAmt > 0;
}
bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
unsigned Opc, bool AllowSP, unsigned &NewSrc,
bool &isKill, MachineOperand &ImplicitOp,
LiveVariables *LV) const {
MachineFunction &MF = *MI.getParent()->getParent();
const TargetRegisterClass *RC;
if (AllowSP) {
RC = Opc != X86::LEA32r ? &X86::GR64RegClass : &X86::GR32RegClass;
} else {
RC = Opc != X86::LEA32r ?
&X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass;
}
unsigned SrcReg = Src.getReg();
// For both LEA64 and LEA32 the register already has essentially the right
// type (32-bit or 64-bit) we may just need to forbid SP.
if (Opc != X86::LEA64_32r) {
NewSrc = SrcReg;
isKill = Src.isKill();
assert(!Src.isUndef() && "Undef op doesn't need optimization");
if (TargetRegisterInfo::isVirtualRegister(NewSrc) &&
!MF.getRegInfo().constrainRegClass(NewSrc, RC))
return false;
return true;
}
// This is for an LEA64_32r and incoming registers are 32-bit. One way or
// another we need to add 64-bit registers to the final MI.
if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
ImplicitOp = Src;
ImplicitOp.setImplicit();
NewSrc = getX86SubSuperRegister(Src.getReg(), 64);
isKill = Src.isKill();
assert(!Src.isUndef() && "Undef op doesn't need optimization");
} else {
// Virtual register of the wrong class, we have to create a temporary 64-bit
// vreg to feed into the LEA.
NewSrc = MF.getRegInfo().createVirtualRegister(RC);
MachineInstr *Copy =
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY))
.addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
.add(Src);
// Which is obviously going to be dead after we're done with it.
isKill = true;
if (LV)
LV->replaceKillInstruction(SrcReg, MI, *Copy);
}
// We've set all the parameters without issue.
return true;
}
MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI,
LiveVariables *LV) const {
// We handle 8-bit adds and various 16-bit opcodes in the switch below.
bool Is16BitOp = !(MIOpc == X86::ADD8rr || MIOpc == X86::ADD8ri);
MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
assert((!Is16BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits(
*RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) &&
"Unexpected type for LEA transform");
// TODO: For a 32-bit target, we need to adjust the LEA variables with
// something like this:
// Opcode = X86::LEA32r;
// InRegLEA = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
// OutRegLEA =
// Is8BitOp ? RegInfo.createVirtualRegister(&X86::GR32ABCD_RegClass)
// : RegInfo.createVirtualRegister(&X86::GR32RegClass);
if (!Subtarget.is64Bit())
return nullptr;
unsigned Opcode = X86::LEA64_32r;
unsigned InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
unsigned OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass);
// Build and insert into an implicit UNDEF value. This is OK because
// we will be shifting and then extracting the lower 8/16-bits.
// This has the potential to cause partial register stall. e.g.
// movw (%rbp,%rcx,2), %dx
// leal -65(%rdx), %esi
// But testing has shown this *does* help performance in 64-bit mode (at
// least on modern x86 machines).
MachineBasicBlock::iterator MBBI = MI.getIterator();
unsigned Dest = MI.getOperand(0).getReg();
unsigned Src = MI.getOperand(1).getReg();
bool IsDead = MI.getOperand(0).isDead();
bool IsKill = MI.getOperand(1).isKill();
unsigned SubReg = Is16BitOp ? X86::sub_16bit : X86::sub_8bit;
assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization");
BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA);
MachineInstr *InsMI =
BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
.addReg(InRegLEA, RegState::Define, SubReg)
.addReg(Src, getKillRegState(IsKill));
MachineInstrBuilder MIB =
BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(Opcode), OutRegLEA);
switch (MIOpc) {
default: llvm_unreachable("Unreachable!");
case X86::SHL16ri: {
unsigned ShAmt = MI.getOperand(2).getImm();
MIB.addReg(0).addImm(1ULL << ShAmt)
.addReg(InRegLEA, RegState::Kill).addImm(0).addReg(0);
break;
}
case X86::INC16r:
addRegOffset(MIB, InRegLEA, true, 1);
break;
case X86::DEC16r:
addRegOffset(MIB, InRegLEA, true, -1);
break;
case X86::ADD8ri:
case X86::ADD16ri:
case X86::ADD16ri8:
case X86::ADD16ri_DB:
case X86::ADD16ri8_DB:
addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm());
break;
case X86::ADD8rr:
case X86::ADD16rr:
case X86::ADD16rr_DB: {
unsigned Src2 = MI.getOperand(2).getReg();
bool IsKill2 = MI.getOperand(2).isKill();
assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization");
unsigned InRegLEA2 = 0;
MachineInstr *InsMI2 = nullptr;
if (Src == Src2) {
// ADD8rr/ADD16rr killed %reg1028, %reg1028
// just a single insert_subreg.
addRegReg(MIB, InRegLEA, true, InRegLEA, false);
} else {
if (Subtarget.is64Bit())
InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
else
InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
// Build and insert into an implicit UNDEF value. This is OK because
// we will be shifting and then extracting the lower 8/16-bits.
BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA2);
InsMI2 = BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY))
.addReg(InRegLEA2, RegState::Define, SubReg)
.addReg(Src2, getKillRegState(IsKill2));
addRegReg(MIB, InRegLEA, true, InRegLEA2, true);
}
if (LV && IsKill2 && InsMI2)
LV->replaceKillInstruction(Src2, MI, *InsMI2);
break;
}
}
MachineInstr *NewMI = MIB;
MachineInstr *ExtMI =
BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
.addReg(Dest, RegState::Define | getDeadRegState(IsDead))
.addReg(OutRegLEA, RegState::Kill, SubReg);
if (LV) {
// Update live variables.
LV->getVarInfo(InRegLEA).Kills.push_back(NewMI);
LV->getVarInfo(OutRegLEA).Kills.push_back(ExtMI);
if (IsKill)
LV->replaceKillInstruction(Src, MI, *InsMI);
if (IsDead)
LV->replaceKillInstruction(Dest, MI, *ExtMI);
}
return ExtMI;
}
/// This method must be implemented by targets that
/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
/// may be able to convert a two-address instruction into a true
/// three-address instruction on demand. This allows the X86 target (for
/// example) to convert ADD and SHL instructions into LEA instructions if they
/// would require register copies due to two-addressness.
///
/// This method returns a null pointer if the transformation cannot be
/// performed, otherwise it returns the new instruction.
///
MachineInstr *
X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineInstr &MI, LiveVariables *LV) const {
// The following opcodes also sets the condition code register(s). Only
// convert them to equivalent lea if the condition code register def's
// are dead!
if (hasLiveCondCodeDef(MI))
return nullptr;
MachineFunction &MF = *MI.getParent()->getParent();
// All instructions input are two-addr instructions. Get the known operands.
const MachineOperand &Dest = MI.getOperand(0);
const MachineOperand &Src = MI.getOperand(1);
// Ideally, operations with undef should be folded before we get here, but we
// can't guarantee it. Bail out because optimizing undefs is a waste of time.
// Without this, we have to forward undef state to new register operands to
// avoid machine verifier errors.
if (Src.isUndef())
return nullptr;
if (MI.getNumOperands() > 2)
if (MI.getOperand(2).isReg() && MI.getOperand(2).isUndef())
return nullptr;
MachineInstr *NewMI = nullptr;
bool Is64Bit = Subtarget.is64Bit();
unsigned MIOpc = MI.getOpcode();
switch (MIOpc) {
default: return nullptr;
case X86::SHL64ri: {
assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
// LEA can't handle RSP.
if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) &&
!MF.getRegInfo().constrainRegClass(Src.getReg(),
&X86::GR64_NOSPRegClass))
return nullptr;
NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r))
.add(Dest)
.addReg(0)
.addImm(1ULL << ShAmt)
.add(Src)
.addImm(0)
.addReg(0);
break;
}
case X86::SHL32ri: {
assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
// LEA can't handle ESP.
bool isKill;
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
SrcReg, isKill, ImplicitOp, LV))
return nullptr;
MachineInstrBuilder MIB =
BuildMI(MF, MI.getDebugLoc(), get(Opc))
.add(Dest)
.addReg(0)
.addImm(1ULL << ShAmt)
.addReg(SrcReg, getKillRegState(isKill))
.addImm(0)
.addReg(0);
if (ImplicitOp.getReg() != 0)
MIB.add(ImplicitOp);
NewMI = MIB;
break;
}
case X86::SHL16ri: {
assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
if (!isTruncatedShiftCountForLEA(ShAmt))
return nullptr;
return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
}
case X86::INC64r:
case X86::INC32r: {
assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!");
unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r :
(Is64Bit ? X86::LEA64_32r : X86::LEA32r);
bool isKill;
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill,
ImplicitOp, LV))
return nullptr;
MachineInstrBuilder MIB =
BuildMI(MF, MI.getDebugLoc(), get(Opc))
.add(Dest)
.addReg(SrcReg, getKillRegState(isKill));
if (ImplicitOp.getReg() != 0)
MIB.add(ImplicitOp);
NewMI = addOffset(MIB, 1);
break;
}
case X86::INC16r:
return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
case X86::DEC64r:
case X86::DEC32r: {
assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
: (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
bool isKill;
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill,
ImplicitOp, LV))
return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
.add(Dest)
.addReg(SrcReg, getKillRegState(isKill));
if (ImplicitOp.getReg() != 0)
MIB.add(ImplicitOp);
NewMI = addOffset(MIB, -1);
break;
}
case X86::DEC16r:
return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
case X86::ADD64rr:
case X86::ADD64rr_DB:
case X86::ADD32rr:
case X86::ADD32rr_DB: {
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Opc;
if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB)
Opc = X86::LEA64r;
else
Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
bool isKill;
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
SrcReg, isKill, ImplicitOp, LV))
return nullptr;
const MachineOperand &Src2 = MI.getOperand(2);
bool isKill2;
unsigned SrcReg2;
MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false,
SrcReg2, isKill2, ImplicitOp2, LV))
return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)).add(Dest);
if (ImplicitOp.getReg() != 0)
MIB.add(ImplicitOp);
if (ImplicitOp2.getReg() != 0)
MIB.add(ImplicitOp2);
NewMI = addRegReg(MIB, SrcReg, isKill, SrcReg2, isKill2);
if (LV && Src2.isKill())
LV->replaceKillInstruction(SrcReg2, MI, *NewMI);
break;
}
case X86::ADD8rr:
case X86::ADD16rr:
case X86::ADD16rr_DB:
return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
case X86::ADD64ri32:
case X86::ADD64ri8:
case X86::ADD64ri32_DB:
case X86::ADD64ri8_DB:
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
NewMI = addOffset(
BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src),
MI.getOperand(2));
break;
case X86::ADD32ri:
case X86::ADD32ri8:
case X86::ADD32ri_DB:
case X86::ADD32ri8_DB: {
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
bool isKill;
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
SrcReg, isKill, ImplicitOp, LV))
return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
.add(Dest)
.addReg(SrcReg, getKillRegState(isKill));
if (ImplicitOp.getReg() != 0)
MIB.add(ImplicitOp);
NewMI = addOffset(MIB, MI.getOperand(2));
break;
}
case X86::ADD8ri:
case X86::ADD16ri:
case X86::ADD16ri8:
case X86::ADD16ri_DB:
case X86::ADD16ri8_DB:
return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
case X86::VMOVDQU8Z128rmk:
case X86::VMOVDQU8Z256rmk:
case X86::VMOVDQU8Zrmk:
case X86::VMOVDQU16Z128rmk:
case X86::VMOVDQU16Z256rmk:
case X86::VMOVDQU16Zrmk:
case X86::VMOVDQU32Z128rmk: case X86::VMOVDQA32Z128rmk:
case X86::VMOVDQU32Z256rmk: case X86::VMOVDQA32Z256rmk:
case X86::VMOVDQU32Zrmk: case X86::VMOVDQA32Zrmk:
case X86::VMOVDQU64Z128rmk: case X86::VMOVDQA64Z128rmk:
case X86::VMOVDQU64Z256rmk: case X86::VMOVDQA64Z256rmk:
case X86::VMOVDQU64Zrmk: case X86::VMOVDQA64Zrmk:
case X86::VMOVUPDZ128rmk: case X86::VMOVAPDZ128rmk:
case X86::VMOVUPDZ256rmk: case X86::VMOVAPDZ256rmk:
case X86::VMOVUPDZrmk: case X86::VMOVAPDZrmk:
case X86::VMOVUPSZ128rmk: case X86::VMOVAPSZ128rmk:
case X86::VMOVUPSZ256rmk: case X86::VMOVAPSZ256rmk:
case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk: {
unsigned Opc;
switch (MIOpc) {
default: llvm_unreachable("Unreachable!");
case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break;
case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break;
case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break;
case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break;
case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break;
case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break;
case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
}
NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
.add(Dest)
.add(MI.getOperand(2))
.add(Src)
.add(MI.getOperand(3))
.add(MI.getOperand(4))
.add(MI.getOperand(5))
.add(MI.getOperand(6))
.add(MI.getOperand(7));
break;
}
case X86::VMOVDQU8Z128rrk:
case X86::VMOVDQU8Z256rrk:
case X86::VMOVDQU8Zrrk:
case X86::VMOVDQU16Z128rrk:
case X86::VMOVDQU16Z256rrk:
case X86::VMOVDQU16Zrrk:
case X86::VMOVDQU32Z128rrk: case X86::VMOVDQA32Z128rrk:
case X86::VMOVDQU32Z256rrk: case X86::VMOVDQA32Z256rrk:
case X86::VMOVDQU32Zrrk: case X86::VMOVDQA32Zrrk:
case X86::VMOVDQU64Z128rrk: case X86::VMOVDQA64Z128rrk:
case X86::VMOVDQU64Z256rrk: case X86::VMOVDQA64Z256rrk:
case X86::VMOVDQU64Zrrk: case X86::VMOVDQA64Zrrk:
case X86::VMOVUPDZ128rrk: case X86::VMOVAPDZ128rrk:
case X86::VMOVUPDZ256rrk: case X86::VMOVAPDZ256rrk:
case X86::VMOVUPDZrrk: case X86::VMOVAPDZrrk:
case X86::VMOVUPSZ128rrk: case X86::VMOVAPSZ128rrk:
case X86::VMOVUPSZ256rrk: case X86::VMOVAPSZ256rrk:
case X86::VMOVUPSZrrk: case X86::VMOVAPSZrrk: {
unsigned Opc;
switch (MIOpc) {
default: llvm_unreachable("Unreachable!");
case X86::VMOVDQU8Z128rrk: Opc = X86::VPBLENDMBZ128rrk; break;
case X86::VMOVDQU8Z256rrk: Opc = X86::VPBLENDMBZ256rrk; break;
case X86::VMOVDQU8Zrrk: Opc = X86::VPBLENDMBZrrk; break;
case X86::VMOVDQU16Z128rrk: Opc = X86::VPBLENDMWZ128rrk; break;
case X86::VMOVDQU16Z256rrk: Opc = X86::VPBLENDMWZ256rrk; break;
case X86::VMOVDQU16Zrrk: Opc = X86::VPBLENDMWZrrk; break;
case X86::VMOVDQU32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break;
case X86::VMOVDQU32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break;
case X86::VMOVDQU32Zrrk: Opc = X86::VPBLENDMDZrrk; break;
case X86::VMOVDQU64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break;
case X86::VMOVDQU64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break;
case X86::VMOVDQU64Zrrk: Opc = X86::VPBLENDMQZrrk; break;
case X86::VMOVUPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break;
case X86::VMOVUPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break;
case X86::VMOVUPDZrrk: Opc = X86::VBLENDMPDZrrk; break;
case X86::VMOVUPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break;
case X86::VMOVUPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break;
case X86::VMOVUPSZrrk: Opc = X86::VBLENDMPSZrrk; break;
case X86::VMOVDQA32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break;
case X86::VMOVDQA32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break;
case X86::VMOVDQA32Zrrk: Opc = X86::VPBLENDMDZrrk; break;
case X86::VMOVDQA64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break;
case X86::VMOVDQA64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break;
case X86::VMOVDQA64Zrrk: Opc = X86::VPBLENDMQZrrk; break;
case X86::VMOVAPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break;
case X86::VMOVAPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break;
case X86::VMOVAPDZrrk: Opc = X86::VBLENDMPDZrrk; break;
case X86::VMOVAPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break;
case X86::VMOVAPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break;
case X86::VMOVAPSZrrk: Opc = X86::VBLENDMPSZrrk; break;
}
NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
.add(Dest)
.add(MI.getOperand(2))
.add(Src)
.add(MI.getOperand(3));
break;
}
}
if (!NewMI) return nullptr;
if (LV) { // Update live variables
if (Src.isKill())
LV->replaceKillInstruction(Src.getReg(), MI, *NewMI);
if (Dest.isDead())
LV->replaceKillInstruction(Dest.getReg(), MI, *NewMI);
}
MFI->insert(MI.getIterator(), NewMI); // Insert the new inst
return NewMI;
}
/// This determines which of three possible cases of a three source commute
/// the source indexes correspond to taking into account any mask operands.
/// All prevents commuting a passthru operand. Returns -1 if the commute isn't
/// possible.
/// Case 0 - Possible to commute the first and second operands.
/// Case 1 - Possible to commute the first and third operands.
/// Case 2 - Possible to commute the second and third operands.
static unsigned getThreeSrcCommuteCase(uint64_t TSFlags, unsigned SrcOpIdx1,
unsigned SrcOpIdx2) {
// Put the lowest index to SrcOpIdx1 to simplify the checks below.
if (SrcOpIdx1 > SrcOpIdx2)
std::swap(SrcOpIdx1, SrcOpIdx2);
unsigned Op1 = 1, Op2 = 2, Op3 = 3;
if (X86II::isKMasked(TSFlags)) {
Op2++;
Op3++;
}
if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op2)
return 0;
if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op3)
return 1;
if (SrcOpIdx1 == Op2 && SrcOpIdx2 == Op3)
return 2;
llvm_unreachable("Unknown three src commute case.");
}
unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands(
const MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2,
const X86InstrFMA3Group &FMA3Group) const {
unsigned Opc = MI.getOpcode();
// TODO: Commuting the 1st operand of FMA*_Int requires some additional
// analysis. The commute optimization is legal only if all users of FMA*_Int
// use only the lowest element of the FMA*_Int instruction. Such analysis are
// not implemented yet. So, just return 0 in that case.
// When such analysis are available this place will be the right place for
// calling it.
assert(!(FMA3Group.isIntrinsic() && (SrcOpIdx1 == 1 || SrcOpIdx2 == 1)) &&
"Intrinsic instructions can't commute operand 1");
// Determine which case this commute is or if it can't be done.
unsigned Case = getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1,
SrcOpIdx2);
assert(Case < 3 && "Unexpected case number!");
// Define the FMA forms mapping array that helps to map input FMA form
// to output FMA form to preserve the operation semantics after
// commuting the operands.
const unsigned Form132Index = 0;
const unsigned Form213Index = 1;
const unsigned Form231Index = 2;
static const unsigned FormMapping[][3] = {
// 0: SrcOpIdx1 == 1 && SrcOpIdx2 == 2;
// FMA132 A, C, b; ==> FMA231 C, A, b;
// FMA213 B, A, c; ==> FMA213 A, B, c;
// FMA231 C, A, b; ==> FMA132 A, C, b;
{ Form231Index, Form213Index, Form132Index },
// 1: SrcOpIdx1 == 1 && SrcOpIdx2 == 3;
// FMA132 A, c, B; ==> FMA132 B, c, A;
// FMA213 B, a, C; ==> FMA231 C, a, B;
// FMA231 C, a, B; ==> FMA213 B, a, C;
{ Form132Index, Form231Index, Form213Index },
// 2: SrcOpIdx1 == 2 && SrcOpIdx2 == 3;
// FMA132 a, C, B; ==> FMA213 a, B, C;
// FMA213 b, A, C; ==> FMA132 b, C, A;
// FMA231 c, A, B; ==> FMA231 c, B, A;
{ Form213Index, Form132Index, Form231Index }
};
unsigned FMAForms[3];
FMAForms[0] = FMA3Group.get132Opcode();
FMAForms[1] = FMA3Group.get213Opcode();
FMAForms[2] = FMA3Group.get231Opcode();
unsigned FormIndex;
for (FormIndex = 0; FormIndex < 3; FormIndex++)
if (Opc == FMAForms[FormIndex])
break;
// Everything is ready, just adjust the FMA opcode and return it.
FormIndex = FormMapping[Case][FormIndex];
return FMAForms[FormIndex];
}
static void commuteVPTERNLOG(MachineInstr &MI, unsigned SrcOpIdx1,
unsigned SrcOpIdx2) {
// Determine which case this commute is or if it can't be done.
unsigned Case = getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1,
SrcOpIdx2);
assert(Case < 3 && "Unexpected case value!");
// For each case we need to swap two pairs of bits in the final immediate.
static const uint8_t SwapMasks[3][4] = {
{ 0x04, 0x10, 0x08, 0x20 }, // Swap bits 2/4 and 3/5.
{ 0x02, 0x10, 0x08, 0x40 }, // Swap bits 1/4 and 3/6.
{ 0x02, 0x04, 0x20, 0x40 }, // Swap bits 1/2 and 5/6.
};
uint8_t Imm = MI.getOperand(MI.getNumOperands()-1).getImm();
// Clear out the bits we are swapping.
uint8_t NewImm = Imm & ~(SwapMasks[Case][0] | SwapMasks[Case][1] |
SwapMasks[Case][2] | SwapMasks[Case][3]);
// If the immediate had a bit of the pair set, then set the opposite bit.
if (Imm & SwapMasks[Case][0]) NewImm |= SwapMasks[Case][1];
if (Imm & SwapMasks[Case][1]) NewImm |= SwapMasks[Case][0];
if (Imm & SwapMasks[Case][2]) NewImm |= SwapMasks[Case][3];
if (Imm & SwapMasks[Case][3]) NewImm |= SwapMasks[Case][2];
MI.getOperand(MI.getNumOperands()-1).setImm(NewImm);
}
// Returns true if this is a VPERMI2 or VPERMT2 instruction that can be
// commuted.
static bool isCommutableVPERMV3Instruction(unsigned Opcode) {
#define VPERM_CASES(Suffix) \
case X86::VPERMI2##Suffix##128rr: case X86::VPERMT2##Suffix##128rr: \
case X86::VPERMI2##Suffix##256rr: case X86::VPERMT2##Suffix##256rr: \
case X86::VPERMI2##Suffix##rr: case X86::VPERMT2##Suffix##rr: \
case X86::VPERMI2##Suffix##128rm: case X86::VPERMT2##Suffix##128rm: \
case X86::VPERMI2##Suffix##256rm: case X86::VPERMT2##Suffix##256rm: \
case X86::VPERMI2##Suffix##rm: case X86::VPERMT2##Suffix##rm: \
case X86::VPERMI2##Suffix##128rrkz: case X86::VPERMT2##Suffix##128rrkz: \
case X86::VPERMI2##Suffix##256rrkz: case X86::VPERMT2##Suffix##256rrkz: \
case X86::VPERMI2##Suffix##rrkz: case X86::VPERMT2##Suffix##rrkz: \
case X86::VPERMI2##Suffix##128rmkz: case X86::VPERMT2##Suffix##128rmkz: \
case X86::VPERMI2##Suffix##256rmkz: case X86::VPERMT2##Suffix##256rmkz: \
case X86::VPERMI2##Suffix##rmkz: case X86::VPERMT2##Suffix##rmkz:
#define VPERM_CASES_BROADCAST(Suffix) \
VPERM_CASES(Suffix) \
case X86::VPERMI2##Suffix##128rmb: case X86::VPERMT2##Suffix##128rmb: \
case X86::VPERMI2##Suffix##256rmb: case X86::VPERMT2##Suffix##256rmb: \
case X86::VPERMI2##Suffix##rmb: case X86::VPERMT2##Suffix##rmb: \
case X86::VPERMI2##Suffix##128rmbkz: case X86::VPERMT2##Suffix##128rmbkz: \
case X86::VPERMI2##Suffix##256rmbkz: case X86::VPERMT2##Suffix##256rmbkz: \
case X86::VPERMI2##Suffix##rmbkz: case X86::VPERMT2##Suffix##rmbkz:
switch (Opcode) {
default: return false;
VPERM_CASES(B)
VPERM_CASES_BROADCAST(D)
VPERM_CASES_BROADCAST(PD)
VPERM_CASES_BROADCAST(PS)
VPERM_CASES_BROADCAST(Q)
VPERM_CASES(W)
return true;
}
#undef VPERM_CASES_BROADCAST
#undef VPERM_CASES
}
// Returns commuted opcode for VPERMI2 and VPERMT2 instructions by switching
// from the I opcode to the T opcode and vice versa.
static unsigned getCommutedVPERMV3Opcode(unsigned Opcode) {
#define VPERM_CASES(Orig, New) \
case X86::Orig##128rr: return X86::New##128rr; \
case X86::Orig##128rrkz: return X86::New##128rrkz; \
case X86::Orig##128rm: return X86::New##128rm; \
case X86::Orig##128rmkz: return X86::New##128rmkz; \
case X86::Orig##256rr: return X86::New##256rr; \
case X86::Orig##256rrkz: return X86::New##256rrkz; \
case X86::Orig##256rm: return X86::New##256rm; \
case X86::Orig##256rmkz: return X86::New##256rmkz; \
case X86::Orig##rr: return X86::New##rr; \
case X86::Orig##rrkz: return X86::New##rrkz; \
case X86::Orig##rm: return X86::New##rm; \
case X86::Orig##rmkz: return X86::New##rmkz;
#define VPERM_CASES_BROADCAST(Orig, New) \
VPERM_CASES(Orig, New) \
case X86::Orig##128rmb: return X86::New##128rmb; \
case X86::Orig##128rmbkz: return X86::New##128rmbkz; \
case X86::Orig##256rmb: return X86::New##256rmb; \
case X86::Orig##256rmbkz: return X86::New##256rmbkz; \
case X86::Orig##rmb: return X86::New##rmb; \
case X86::Orig##rmbkz: return X86::New##rmbkz;
switch (Opcode) {
VPERM_CASES(VPERMI2B, VPERMT2B)
VPERM_CASES_BROADCAST(VPERMI2D, VPERMT2D)
VPERM_CASES_BROADCAST(VPERMI2PD, VPERMT2PD)
VPERM_CASES_BROADCAST(VPERMI2PS, VPERMT2PS)
VPERM_CASES_BROADCAST(VPERMI2Q, VPERMT2Q)
VPERM_CASES(VPERMI2W, VPERMT2W)
VPERM_CASES(VPERMT2B, VPERMI2B)
VPERM_CASES_BROADCAST(VPERMT2D, VPERMI2D)
VPERM_CASES_BROADCAST(VPERMT2PD, VPERMI2PD)
VPERM_CASES_BROADCAST(VPERMT2PS, VPERMI2PS)
VPERM_CASES_BROADCAST(VPERMT2Q, VPERMI2Q)
VPERM_CASES(VPERMT2W, VPERMI2W)
}
llvm_unreachable("Unreachable!");
#undef VPERM_CASES_BROADCAST
#undef VPERM_CASES
}
MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
unsigned OpIdx1,
unsigned OpIdx2) const {
auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {
if (NewMI)
return *MI.getParent()->getParent()->CloneMachineInstr(&MI);
return MI;
};
switch (MI.getOpcode()) {
case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I)
case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I)
case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I)
case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I)
case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I)
case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I)
unsigned Opc;
unsigned Size;
switch (MI.getOpcode()) {
default: llvm_unreachable("Unreachable!");
case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break;
case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break;
case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break;
case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break;
case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break;
case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break;
}
unsigned Amt = MI.getOperand(3).getImm();
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.setDesc(get(Opc));
WorkingMI.getOperand(3).setImm(Size - Amt);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
case X86::PFSUBrr:
case X86::PFSUBRrr: {
// PFSUB x, y: x = x - y
// PFSUBR x, y: x = y - x
unsigned Opc =
(X86::PFSUBRrr == MI.getOpcode() ? X86::PFSUBrr : X86::PFSUBRrr);
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.setDesc(get(Opc));
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
case X86::BLENDPDrri:
case X86::BLENDPSrri:
case X86::VBLENDPDrri:
case X86::VBLENDPSrri:
// If we're optimizing for size, try to use MOVSD/MOVSS.
if (MI.getParent()->getParent()->getFunction().optForSize()) {
unsigned Mask, Opc;
switch (MI.getOpcode()) {
default: llvm_unreachable("Unreachable!");
case X86::BLENDPDrri: Opc = X86::MOVSDrr; Mask = 0x03; break;
case X86::BLENDPSrri: Opc = X86::MOVSSrr; Mask = 0x0F; break;
case X86::VBLENDPDrri: Opc = X86::VMOVSDrr; Mask = 0x03; break;
case X86::VBLENDPSrri: Opc = X86::VMOVSSrr; Mask = 0x0F; break;
}
if ((MI.getOperand(3).getImm() ^ Mask) == 1) {
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.setDesc(get(Opc));
WorkingMI.RemoveOperand(3);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI,
/*NewMI=*/false,
OpIdx1, OpIdx2);
}
}
LLVM_FALLTHROUGH;
case X86::PBLENDWrri:
case X86::VBLENDPDYrri:
case X86::VBLENDPSYrri:
case X86::VPBLENDDrri:
case X86::VPBLENDWrri:
case X86::VPBLENDDYrri:
case X86::VPBLENDWYrri:{
unsigned Mask;
switch (MI.getOpcode()) {
default: llvm_unreachable("Unreachable!");
case X86::BLENDPDrri: Mask = 0x03; break;
case X86::BLENDPSrri: Mask = 0x0F; break;
case X86::PBLENDWrri: Mask = 0xFF; break;
case X86::VBLENDPDrri: Mask = 0x03; break;
case X86::VBLENDPSrri: Mask = 0x0F; break;
case X86::VBLENDPDYrri: Mask = 0x0F; break;
case X86::VBLENDPSYrri: Mask = 0xFF; break;
case X86::VPBLENDDrri: Mask = 0x0F; break;
case X86::VPBLENDWrri: Mask = 0xFF; break;
case X86::VPBLENDDYrri: Mask = 0xFF; break;
case X86::VPBLENDWYrri: Mask = 0xFF; break;
}
// Only the least significant bits of Imm are used.
unsigned Imm = MI.getOperand(3).getImm() & Mask;
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.getOperand(3).setImm(Mask ^ Imm);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
case X86::MOVSDrr:
case X86::MOVSSrr:
case X86::VMOVSDrr:
case X86::VMOVSSrr:{
// On SSE41 or later we can commute a MOVSS/MOVSD to a BLENDPS/BLENDPD.
assert(Subtarget.hasSSE41() && "Commuting MOVSD/MOVSS requires SSE41!");
unsigned Mask, Opc;
switch (MI.getOpcode()) {
default: llvm_unreachable("Unreachable!");
case X86::MOVSDrr: Opc = X86::BLENDPDrri; Mask = 0x02; break;
case X86::MOVSSrr: Opc = X86::BLENDPSrri; Mask = 0x0E; break;
case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break;
case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break;
}
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.setDesc(get(Opc));
WorkingMI.addOperand(MachineOperand::CreateImm(Mask));
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
case X86::PCLMULQDQrr:
case X86::VPCLMULQDQrr:
case X86::VPCLMULQDQYrr:
case X86::VPCLMULQDQZrr:
case X86::VPCLMULQDQZ128rr:
case X86::VPCLMULQDQZ256rr: {
// SRC1 64bits = Imm[0] ? SRC1[127:64] : SRC1[63:0]
// SRC2 64bits = Imm[4] ? SRC2[127:64] : SRC2[63:0]
unsigned Imm = MI.getOperand(3).getImm();
unsigned Src1Hi = Imm & 0x01;
unsigned Src2Hi = Imm & 0x10;
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.getOperand(3).setImm((Src1Hi << 4) | (Src2Hi >> 4));
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri:
case X86::VPCMPBZ256rri: case X86::VPCMPUBZ256rri:
case X86::VPCMPBZrri: case X86::VPCMPUBZrri:
case X86::VPCMPDZ128rri: case X86::VPCMPUDZ128rri:
case X86::VPCMPDZ256rri: case X86::VPCMPUDZ256rri:
case X86::VPCMPDZrri: case X86::VPCMPUDZrri:
case X86::VPCMPQZ128rri: case X86::VPCMPUQZ128rri:
case X86::VPCMPQZ256rri: case X86::VPCMPUQZ256rri:
case X86::VPCMPQZrri: case X86::VPCMPUQZrri:
case X86::VPCMPWZ128rri: case X86::VPCMPUWZ128rri:
case X86::VPCMPWZ256rri: case X86::VPCMPUWZ256rri:
case X86::VPCMPWZrri: case X86::VPCMPUWZrri:
case X86::VPCMPBZ128rrik: case X86::VPCMPUBZ128rrik:
case X86::VPCMPBZ256rrik: case X86::VPCMPUBZ256rrik:
case X86::VPCMPBZrrik: case X86::VPCMPUBZrrik:
case X86::VPCMPDZ128rrik: case X86::VPCMPUDZ128rrik:
case X86::VPCMPDZ256rrik: case X86::VPCMPUDZ256rrik:
case X86::VPCMPDZrrik: case X86::VPCMPUDZrrik:
case X86::VPCMPQZ128rrik: case X86::VPCMPUQZ128rrik:
case X86::VPCMPQZ256rrik: case X86::VPCMPUQZ256rrik:
case X86::VPCMPQZrrik: case X86::VPCMPUQZrrik:
case X86::VPCMPWZ128rrik: case X86::VPCMPUWZ128rrik:
case X86::VPCMPWZ256rrik: case X86::VPCMPUWZ256rrik:
case X86::VPCMPWZrrik: case X86::VPCMPUWZrrik: {
// Flip comparison mode immediate (if necessary).
unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x7;
Imm = X86::getSwappedVPCMPImm(Imm);
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
case X86::VPCOMBri: case X86::VPCOMUBri:
case X86::VPCOMDri: case X86::VPCOMUDri:
case X86::VPCOMQri: case X86::VPCOMUQri:
case X86::VPCOMWri: case X86::VPCOMUWri: {
// Flip comparison mode immediate (if necessary).
unsigned Imm = MI.getOperand(3).getImm() & 0x7;
Imm = X86::getSwappedVPCOMImm(Imm);
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.getOperand(3).setImm(Imm);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
case X86::VPERM2F128rr:
case X86::VPERM2I128rr: {
// Flip permute source immediate.
// Imm & 0x02: lo = if set, select Op1.lo/hi else Op0.lo/hi.
// Imm & 0x20: hi = if set, select Op1.lo/hi else Op0.lo/hi.
unsigned Imm = MI.getOperand(3).getImm() & 0xFF;
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.getOperand(3).setImm(Imm ^ 0x22);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
case X86::MOVHLPSrr:
case X86::UNPCKHPDrr:
case X86::VMOVHLPSrr:
case X86::VUNPCKHPDrr:
case X86::VMOVHLPSZrr:
case X86::VUNPCKHPDZ128rr: {
assert(Subtarget.hasSSE2() && "Commuting MOVHLP/UNPCKHPD requires SSE2!");
unsigned Opc = MI.getOpcode();
switch (Opc) {
default: llvm_unreachable("Unreachable!");
case X86::MOVHLPSrr: Opc = X86::UNPCKHPDrr; break;
case X86::UNPCKHPDrr: Opc = X86::MOVHLPSrr; break;
case X86::VMOVHLPSrr: Opc = X86::VUNPCKHPDrr; break;
case X86::VUNPCKHPDrr: Opc = X86::VMOVHLPSrr; break;
case X86::VMOVHLPSZrr: Opc = X86::VUNPCKHPDZ128rr; break;
case X86::VUNPCKHPDZ128rr: Opc = X86::VMOVHLPSZrr; break;
}
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.setDesc(get(Opc));
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr:
case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr:
case X86::CMOVBE16rr: case X86::CMOVBE32rr: case X86::CMOVBE64rr:
case X86::CMOVA16rr: case X86::CMOVA32rr: case X86::CMOVA64rr:
case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr:
case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr:
case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr:
case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr:
case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr:
case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr:
case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr:
case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr:
case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr:
case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr: {
unsigned Opc;
switch (MI.getOpcode()) {
default: llvm_unreachable("Unreachable!");
case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break;
case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break;
case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break;
case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break;
case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break;
case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break;
case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break;
case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break;
case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break;
case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break;
case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break;
case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break;
case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break;
case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break;
case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break;
case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break;
case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break;
case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break;
case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break;
case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break;
case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break;
case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break;
case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break;
case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break;
case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break;
case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break;
case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break;
case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break;
case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break;
case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break;
case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break;
case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break;
case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break;
case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break;
case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break;
case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break;
case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break;
case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break;
case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break;
case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break;
case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break;
case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break;
case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break;
case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break;
case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break;
case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break;
case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break;
case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break;
}
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.setDesc(get(Opc));
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
case X86::VPTERNLOGDZrri: case X86::VPTERNLOGDZrmi:
case X86::VPTERNLOGDZ128rri: case X86::VPTERNLOGDZ128rmi:
case X86::VPTERNLOGDZ256rri: case X86::VPTERNLOGDZ256rmi:
case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi:
case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi:
case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi:
case X86::VPTERNLOGDZrrik:
case X86::VPTERNLOGDZ128rrik:
case X86::VPTERNLOGDZ256rrik:
case X86::VPTERNLOGQZrrik:
case X86::VPTERNLOGQZ128rrik:
case X86::VPTERNLOGQZ256rrik:
case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz:
case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz:
case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz:
case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz:
case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz:
case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz:
case X86::VPTERNLOGDZ128rmbi:
case X86::VPTERNLOGDZ256rmbi:
case X86::VPTERNLOGDZrmbi:
case X86::VPTERNLOGQZ128rmbi:
case X86::VPTERNLOGQZ256rmbi:
case X86::VPTERNLOGQZrmbi:
case X86::VPTERNLOGDZ128rmbikz:
case X86::VPTERNLOGDZ256rmbikz:
case X86::VPTERNLOGDZrmbikz:
case X86::VPTERNLOGQZ128rmbikz:
case X86::VPTERNLOGQZ256rmbikz:
case X86::VPTERNLOGQZrmbikz: {
auto &WorkingMI = cloneIfNew(MI);
commuteVPTERNLOG(WorkingMI, OpIdx1, OpIdx2);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
default: {
if (isCommutableVPERMV3Instruction(MI.getOpcode())) {
unsigned Opc = getCommutedVPERMV3Opcode(MI.getOpcode());
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.setDesc(get(Opc));
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(),
MI.getDesc().TSFlags);
if (FMA3Group) {
unsigned Opc =
getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2, *FMA3Group);
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.setDesc(get(Opc));
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
}
}
}
bool
X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI,
unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2,
bool IsIntrinsic) const {
uint64_t TSFlags = MI.getDesc().TSFlags;
unsigned FirstCommutableVecOp = 1;
unsigned LastCommutableVecOp = 3;
unsigned KMaskOp = -1U;
if (X86II::isKMasked(TSFlags)) {
// For k-zero-masked operations it is Ok to commute the first vector
// operand.
// For regular k-masked operations a conservative choice is done as the
// elements of the first vector operand, for which the corresponding bit
// in the k-mask operand is set to 0, are copied to the result of the
// instruction.
// TODO/FIXME: The commute still may be legal if it is known that the
// k-mask operand is set to either all ones or all zeroes.
// It is also Ok to commute the 1st operand if all users of MI use only
// the elements enabled by the k-mask operand. For example,
// v4 = VFMADD213PSZrk v1, k, v2, v3; // v1[i] = k[i] ? v2[i]*v1[i]+v3[i]
// : v1[i];
// VMOVAPSZmrk <mem_addr>, k, v4; // this is the ONLY user of v4 ->
// // Ok, to commute v1 in FMADD213PSZrk.
// The k-mask operand has index = 2 for masked and zero-masked operations.
KMaskOp = 2;
// The operand with index = 1 is used as a source for those elements for
// which the corresponding bit in the k-mask is set to 0.
if (X86II::isKMergeMasked(TSFlags))
FirstCommutableVecOp = 3;
LastCommutableVecOp++;
} else if (IsIntrinsic) {
// Commuting the first operand of an intrinsic instruction isn't possible
// unless we can prove that only the lowest element of the result is used.
FirstCommutableVecOp = 2;
}
if (isMem(MI, LastCommutableVecOp))
LastCommutableVecOp--;
// Only the first RegOpsNum operands are commutable.
// Also, the value 'CommuteAnyOperandIndex' is valid here as it means
// that the operand is not specified/fixed.
if (SrcOpIdx1 != CommuteAnyOperandIndex &&
(SrcOpIdx1 < FirstCommutableVecOp || SrcOpIdx1 > LastCommutableVecOp ||
SrcOpIdx1 == KMaskOp))
return false;
if (SrcOpIdx2 != CommuteAnyOperandIndex &&
(SrcOpIdx2 < FirstCommutableVecOp || SrcOpIdx2 > LastCommutableVecOp ||
SrcOpIdx2 == KMaskOp))
return false;
// Look for two different register operands assumed to be commutable
// regardless of the FMA opcode. The FMA opcode is adjusted later.
if (SrcOpIdx1 == CommuteAnyOperandIndex ||
SrcOpIdx2 == CommuteAnyOperandIndex) {
unsigned CommutableOpIdx1 = SrcOpIdx1;
unsigned CommutableOpIdx2 = SrcOpIdx2;
// At least one of operands to be commuted is not specified and
// this method is free to choose appropriate commutable operands.
if (SrcOpIdx1 == SrcOpIdx2)
// Both of operands are not fixed. By default set one of commutable
// operands to the last register operand of the instruction.
CommutableOpIdx2 = LastCommutableVecOp;
else if (SrcOpIdx2 == CommuteAnyOperandIndex)
// Only one of operands is not fixed.
CommutableOpIdx2 = SrcOpIdx1;
// CommutableOpIdx2 is well defined now. Let's choose another commutable
// operand and assign its index to CommutableOpIdx1.
unsigned Op2Reg = MI.getOperand(CommutableOpIdx2).getReg();
for (CommutableOpIdx1 = LastCommutableVecOp;
CommutableOpIdx1 >= FirstCommutableVecOp; CommutableOpIdx1--) {
// Just ignore and skip the k-mask operand.
if (CommutableOpIdx1 == KMaskOp)
continue;
// The commuted operands must have different registers.
// Otherwise, the commute transformation does not change anything and
// is useless then.
if (Op2Reg != MI.getOperand(CommutableOpIdx1).getReg())
break;
}
// No appropriate commutable operands were found.
if (CommutableOpIdx1 < FirstCommutableVecOp)
return false;
// Assign the found pair of commutable indices to SrcOpIdx1 and SrcOpidx2
// to return those values.
if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
CommutableOpIdx1, CommutableOpIdx2))
return false;
}
return true;
}
bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const {
const MCInstrDesc &Desc = MI.getDesc();
if (!Desc.isCommutable())
return false;
switch (MI.getOpcode()) {
case X86::CMPSDrr:
case X86::CMPSSrr:
case X86::CMPPDrri:
case X86::CMPPSrri:
case X86::VCMPSDrr:
case X86::VCMPSSrr:
case X86::VCMPPDrri:
case X86::VCMPPSrri:
case X86::VCMPPDYrri:
case X86::VCMPPSYrri:
case X86::VCMPSDZrr:
case X86::VCMPSSZrr:
case X86::VCMPPDZrri:
case X86::VCMPPSZrri:
case X86::VCMPPDZ128rri:
case X86::VCMPPSZ128rri:
case X86::VCMPPDZ256rri:
case X86::VCMPPSZ256rri: {
// Float comparison can be safely commuted for
// Ordered/Unordered/Equal/NotEqual tests
unsigned Imm = MI.getOperand(3).getImm() & 0x7;
switch (Imm) {
case 0x00: // EQUAL
case 0x03: // UNORDERED
case 0x04: // NOT EQUAL
case 0x07: // ORDERED
// The indices of the commutable operands are 1 and 2.
// Assign them to the returned operand indices here.
return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2);
}
return false;
}
case X86::MOVSDrr:
case X86::MOVSSrr:
case X86::VMOVSDrr:
case X86::VMOVSSrr:
if (Subtarget.hasSSE41())
return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
return false;
case X86::MOVHLPSrr:
case X86::UNPCKHPDrr:
case X86::VMOVHLPSrr:
case X86::VUNPCKHPDrr:
case X86::VMOVHLPSZrr:
case X86::VUNPCKHPDZ128rr:
if (Subtarget.hasSSE2())
return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
return false;
case X86::VPTERNLOGDZrri: case X86::VPTERNLOGDZrmi:
case X86::VPTERNLOGDZ128rri: case X86::VPTERNLOGDZ128rmi:
case X86::VPTERNLOGDZ256rri: case X86::VPTERNLOGDZ256rmi:
case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi:
case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi:
case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi:
case X86::VPTERNLOGDZrrik:
case X86::VPTERNLOGDZ128rrik:
case X86::VPTERNLOGDZ256rrik:
case X86::VPTERNLOGQZrrik:
case X86::VPTERNLOGQZ128rrik:
case X86::VPTERNLOGQZ256rrik:
case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz:
case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz:
case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz:
case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz:
case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz:
case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz:
case X86::VPTERNLOGDZ128rmbi:
case X86::VPTERNLOGDZ256rmbi:
case X86::VPTERNLOGDZrmbi:
case X86::VPTERNLOGQZ128rmbi:
case X86::VPTERNLOGQZ256rmbi:
case X86::VPTERNLOGQZrmbi:
case X86::VPTERNLOGDZ128rmbikz:
case X86::VPTERNLOGDZ256rmbikz:
case X86::VPTERNLOGDZrmbikz:
case X86::VPTERNLOGQZ128rmbikz:
case X86::VPTERNLOGQZ256rmbikz:
case X86::VPTERNLOGQZrmbikz:
return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
case X86::VPMADD52HUQZ128r:
case X86::VPMADD52HUQZ128rk:
case X86::VPMADD52HUQZ128rkz:
case X86::VPMADD52HUQZ256r:
case X86::VPMADD52HUQZ256rk:
case X86::VPMADD52HUQZ256rkz:
case X86::VPMADD52HUQZr:
case X86::VPMADD52HUQZrk:
case X86::VPMADD52HUQZrkz:
case X86::VPMADD52LUQZ128r:
case X86::VPMADD52LUQZ128rk:
case X86::VPMADD52LUQZ128rkz:
case X86::VPMADD52LUQZ256r:
case X86::VPMADD52LUQZ256rk:
case X86::VPMADD52LUQZ256rkz:
case X86::VPMADD52LUQZr:
case X86::VPMADD52LUQZrk:
case X86::VPMADD52LUQZrkz: {
unsigned CommutableOpIdx1 = 2;
unsigned CommutableOpIdx2 = 3;
if (X86II::isKMasked(Desc.TSFlags)) {
// Skip the mask register.
++CommutableOpIdx1;
++CommutableOpIdx2;
}
if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
CommutableOpIdx1, CommutableOpIdx2))
return false;
if (!MI.getOperand(SrcOpIdx1).isReg() ||
!MI.getOperand(SrcOpIdx2).isReg())
// No idea.
return false;
return true;
}
default:
const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(),
MI.getDesc().TSFlags);
if (FMA3Group)
return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2,
FMA3Group->isIntrinsic());
// Handled masked instructions since we need to skip over the mask input
// and the preserved input.
if (X86II::isKMasked(Desc.TSFlags)) {
// First assume that the first input is the mask operand and skip past it.
unsigned CommutableOpIdx1 = Desc.getNumDefs() + 1;
unsigned CommutableOpIdx2 = Desc.getNumDefs() + 2;
// Check if the first input is tied. If there isn't one then we only
// need to skip the mask operand which we did above.
if ((MI.getDesc().getOperandConstraint(Desc.getNumDefs(),
MCOI::TIED_TO) != -1)) {
// If this is zero masking instruction with a tied operand, we need to
// move the first index back to the first input since this must
// be a 3 input instruction and we want the first two non-mask inputs.
// Otherwise this is a 2 input instruction with a preserved input and
// mask, so we need to move the indices to skip one more input.
if (X86II::isKMergeMasked(Desc.TSFlags)) {
++CommutableOpIdx1;
++CommutableOpIdx2;
} else {
--CommutableOpIdx1;
}
}
if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
CommutableOpIdx1, CommutableOpIdx2))
return false;
if (!MI.getOperand(SrcOpIdx1).isReg() ||
!MI.getOperand(SrcOpIdx2).isReg())
// No idea.
return false;
return true;
}
return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
}
return false;
}
X86::CondCode X86::getCondFromBranchOpc(unsigned BrOpc) {
switch (BrOpc) {
default: return X86::COND_INVALID;
case X86::JE_1: return X86::COND_E;
case X86::JNE_1: return X86::COND_NE;
case X86::JL_1: return X86::COND_L;
case X86::JLE_1: return X86::COND_LE;
case X86::JG_1: return X86::COND_G;
case X86::JGE_1: return X86::COND_GE;
case X86::JB_1: return X86::COND_B;
case X86::JBE_1: return X86::COND_BE;
case X86::JA_1: return X86::COND_A;
case X86::JAE_1: return X86::COND_AE;
case X86::JS_1: return X86::COND_S;
case X86::JNS_1: return X86::COND_NS;
case X86::JP_1: return X86::COND_P;
case X86::JNP_1: return X86::COND_NP;
case X86::JO_1: return X86::COND_O;
case X86::JNO_1: return X86::COND_NO;
}
}
/// Return condition code of a SET opcode.
X86::CondCode X86::getCondFromSETOpc(unsigned Opc) {
switch (Opc) {
default: return X86::COND_INVALID;
case X86::SETAr: case X86::SETAm: return X86::COND_A;
case X86::SETAEr: case X86::SETAEm: return X86::COND_AE;
case X86::SETBr: case X86::SETBm: return X86::COND_B;
case X86::SETBEr: case X86::SETBEm: return X86::COND_BE;
case X86::SETEr: case X86::SETEm: return X86::COND_E;
case X86::SETGr: case X86::SETGm: return X86::COND_G;
case X86::SETGEr: case X86::SETGEm: return X86::COND_GE;
case X86::SETLr: case X86::SETLm: return X86::COND_L;
case X86::SETLEr: case X86::SETLEm: return X86::COND_LE;
case X86::SETNEr: case X86::SETNEm: return X86::COND_NE;
case X86::SETNOr: case X86::SETNOm: return X86::COND_NO;
case X86::SETNPr: case X86::SETNPm: return X86::COND_NP;
case X86::SETNSr: case X86::SETNSm: return X86::COND_NS;
case X86::SETOr: case X86::SETOm: return X86::COND_O;
case X86::SETPr: case X86::SETPm: return X86::COND_P;
case X86::SETSr: case X86::SETSm: return X86::COND_S;
}
}
/// Return condition code of a CMov opcode.
X86::CondCode X86::getCondFromCMovOpc(unsigned Opc) {
switch (Opc) {
default: return X86::COND_INVALID;
case X86::CMOVA16rm: case X86::CMOVA16rr: case X86::CMOVA32rm:
case X86::CMOVA32rr: case X86::CMOVA64rm: case X86::CMOVA64rr:
return X86::COND_A;
case X86::CMOVAE16rm: case X86::CMOVAE16rr: case X86::CMOVAE32rm:
case X86::CMOVAE32rr: case X86::CMOVAE64rm: case X86::CMOVAE64rr:
return X86::COND_AE;
case X86::CMOVB16rm: case X86::CMOVB16rr: case X86::CMOVB32rm:
case X86::CMOVB32rr: case X86::CMOVB64rm: case X86::CMOVB64rr:
return X86::COND_B;
case X86::CMOVBE16rm: case X86::CMOVBE16rr: case X86::CMOVBE32rm:
case X86::CMOVBE32rr: case X86::CMOVBE64rm: case X86::CMOVBE64rr:
return X86::COND_BE;
case X86::CMOVE16rm: case X86::CMOVE16rr: case X86::CMOVE32rm:
case X86::CMOVE32rr: case X86::CMOVE64rm: case X86::CMOVE64rr:
return X86::COND_E;
case X86::CMOVG16rm: case X86::CMOVG16rr: case X86::CMOVG32rm:
case X86::CMOVG32rr: case X86::CMOVG64rm: case X86::CMOVG64rr:
return X86::COND_G;
case X86::CMOVGE16rm: case X86::CMOVGE16rr: case X86::CMOVGE32rm:
case X86::CMOVGE32rr: case X86::CMOVGE64rm: case X86::CMOVGE64rr:
return X86::COND_GE;
case X86::CMOVL16rm: case X86::CMOVL16rr: case X86::CMOVL32rm:
case X86::CMOVL32rr: case X86::CMOVL64rm: case X86::CMOVL64rr:
return X86::COND_L;
case X86::CMOVLE16rm: case X86::CMOVLE16rr: case X86::CMOVLE32rm:
case X86::CMOVLE32rr: case X86::CMOVLE64rm: case X86::CMOVLE64rr:
return X86::COND_LE;
case X86::CMOVNE16rm: case X86::CMOVNE16rr: case X86::CMOVNE32rm:
case X86::CMOVNE32rr: case X86::CMOVNE64rm: case X86::CMOVNE64rr:
return X86::COND_NE;
case X86::CMOVNO16rm: case X86::CMOVNO16rr: case X86::CMOVNO32rm:
case X86::CMOVNO32rr: case X86::CMOVNO64rm: case X86::CMOVNO64rr:
return X86::COND_NO;
case X86::CMOVNP16rm: case X86::CMOVNP16rr: case X86::CMOVNP32rm:
case X86::CMOVNP32rr: case X86::CMOVNP64rm: case X86::CMOVNP64rr:
return X86::COND_NP;
case X86::CMOVNS16rm: case X86::CMOVNS16rr: case X86::CMOVNS32rm:
case X86::CMOVNS32rr: case X86::CMOVNS64rm: case X86::CMOVNS64rr:
return X86::COND_NS;
case X86::CMOVO16rm: case X86::CMOVO16rr: case X86::CMOVO32rm:
case X86::CMOVO32rr: case X86::CMOVO64rm: case X86::CMOVO64rr:
return X86::COND_O;
case X86::CMOVP16rm: case X86::CMOVP16rr: case X86::CMOVP32rm:
case X86::CMOVP32rr: case X86::CMOVP64rm: case X86::CMOVP64rr:
return X86::COND_P;
case X86::CMOVS16rm: case X86::CMOVS16rr: case X86::CMOVS32rm:
case X86::CMOVS32rr: case X86::CMOVS64rm: case X86::CMOVS64rr:
return X86::COND_S;
}
}
unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
switch (CC) {
default: llvm_unreachable("Illegal condition code!");
case X86::COND_E: return X86::JE_1;
case X86::COND_NE: return X86::JNE_1;
case X86::COND_L: return X86::JL_1;
case X86::COND_LE: return X86::JLE_1;
case X86::COND_G: return X86::JG_1;
case X86::COND_GE: return X86::JGE_1;
case X86::COND_B: return X86::JB_1;
case X86::COND_BE: return X86::JBE_1;
case X86::COND_A: return X86::JA_1;
case X86::COND_AE: return X86::JAE_1;
case X86::COND_S: return X86::JS_1;
case X86::COND_NS: return X86::JNS_1;
case X86::COND_P: return X86::JP_1;
case X86::COND_NP: return X86::JNP_1;
case X86::COND_O: return X86::JO_1;
case X86::COND_NO: return X86::JNO_1;
}
}
/// Return the inverse of the specified condition,
/// e.g. turning COND_E to COND_NE.
X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
switch (CC) {
default: llvm_unreachable("Illegal condition code!");
case X86::COND_E: return X86::COND_NE;
case X86::COND_NE: return X86::COND_E;
case X86::COND_L: return X86::COND_GE;
case X86::COND_LE: return X86::COND_G;
case X86::COND_G: return X86::COND_LE;
case X86::COND_GE: return X86::COND_L;
case X86::COND_B: return X86::COND_AE;
case X86::COND_BE: return X86::COND_A;
case X86::COND_A: return X86::COND_BE;
case X86::COND_AE: return X86::COND_B;
case X86::COND_S: return X86::COND_NS;
case X86::COND_NS: return X86::COND_S;
case X86::COND_P: return X86::COND_NP;
case X86::COND_NP: return X86::COND_P;
case X86::COND_O: return X86::COND_NO;
case X86::COND_NO: return X86::COND_O;
case X86::COND_NE_OR_P: return X86::COND_E_AND_NP;
case X86::COND_E_AND_NP: return X86::COND_NE_OR_P;
}
}
/// Assuming the flags are set by MI(a,b), return the condition code if we
/// modify the instructions such that flags are set by MI(b,a).
static X86::CondCode getSwappedCondition(X86::CondCode CC) {
switch (CC) {
default: return X86::COND_INVALID;
case X86::COND_E: return X86::COND_E;
case X86::COND_NE: return X86::COND_NE;
case X86::COND_L: return X86::COND_G;
case X86::COND_LE: return X86::COND_GE;
case X86::COND_G: return X86::COND_L;
case X86::COND_GE: return X86::COND_LE;
case X86::COND_B: return X86::COND_A;