blob: 8ba00c815fa0ee8fdd8803feb8e37a68f653a06c [file] [log] [blame] [edit]
//===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements a function pass that inserts VSETVLI instructions where
// needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
// instructions.
//
// This pass consists of 3 phases:
//
// Phase 1 collects how each basic block affects VL/VTYPE.
//
// Phase 2 uses the information from phase 1 to do a data flow analysis to
// propagate the VL/VTYPE changes through the function. This gives us the
// VL/VTYPE at the start of each basic block.
//
// Phase 3 inserts VSETVLI instructions in each basic block. Information from
// phase 2 is used to prevent inserting a VSETVLI before the first vector
// instruction in the block if possible.
//
//===----------------------------------------------------------------------===//
#include "RISCV.h"
#include "RISCVSubtarget.h"
#include "RISCVVSETVLIInfoAnalysis.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include <queue>
using namespace llvm;
using namespace RISCV;
#define DEBUG_TYPE "riscv-insert-vsetvli"
#define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
static cl::opt<bool> EnsureWholeVectorRegisterMoveValidVTYPE(
DEBUG_TYPE "-whole-vector-register-move-valid-vtype", cl::Hidden,
cl::desc("Insert vsetvlis before vmvNr.vs to ensure vtype is valid and "
"vill is cleared"),
cl::init(true));
namespace {
/// Given a virtual register \p Reg, return the corresponding VNInfo for it.
/// This will return nullptr if the virtual register is an implicit_def or
/// if LiveIntervals is not available.
static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI,
const LiveIntervals *LIS) {
assert(Reg.isVirtual());
if (!LIS)
return nullptr;
auto &LI = LIS->getInterval(Reg);
SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI);
return LI.getVNInfoBefore(SI);
}
static unsigned getVLOpNum(const MachineInstr &MI) {
return RISCVII::getVLOpNum(MI.getDesc());
}
struct BlockData {
// The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
// block. Calculated in Phase 2.
VSETVLIInfo Exit;
// The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
// blocks. Calculated in Phase 2, and used by Phase 3.
VSETVLIInfo Pred;
// Keeps track of whether the block is already in the queue.
bool InQueue = false;
BlockData() = default;
};
enum TKTMMode {
VSETTK = 0,
VSETTM = 1,
};
class RISCVInsertVSETVLI : public MachineFunctionPass {
const RISCVSubtarget *ST;
const TargetInstrInfo *TII;
MachineRegisterInfo *MRI;
// Possibly null!
LiveIntervals *LIS;
RISCVVSETVLIInfoAnalysis VIA;
std::vector<BlockData> BlockInfo;
std::queue<const MachineBasicBlock *> WorkList;
public:
static char ID;
RISCVInsertVSETVLI() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addUsedIfAvailable<LiveIntervalsWrapperPass>();
AU.addPreserved<LiveIntervalsWrapperPass>();
AU.addPreserved<SlotIndexesWrapperPass>();
AU.addPreserved<LiveDebugVariablesWrapperLegacy>();
AU.addPreserved<LiveStacksWrapperLegacy>();
MachineFunctionPass::getAnalysisUsage(AU);
}
StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
private:
bool needVSETVLI(const DemandedFields &Used, const VSETVLIInfo &Require,
const VSETVLIInfo &CurInfo) const;
bool needVSETVLIPHI(const VSETVLIInfo &Require,
const MachineBasicBlock &MBB) const;
void insertVSETVLI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPt, DebugLoc DL,
const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
VSETVLIInfo &Info) const;
void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
void emitVSETVLIs(MachineBasicBlock &MBB);
void doPRE(MachineBasicBlock &MBB);
void insertReadVL(MachineBasicBlock &MBB);
bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI,
const DemandedFields &Used) const;
void coalesceVSETVLIs(MachineBasicBlock &MBB) const;
bool insertVSETMTK(MachineBasicBlock &MBB, TKTMMode Mode) const;
};
} // end anonymous namespace
char RISCVInsertVSETVLI::ID = 0;
char &llvm::RISCVInsertVSETVLIID = RISCVInsertVSETVLI::ID;
INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
false, false)
void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPt,
DebugLoc DL, const VSETVLIInfo &Info,
const VSETVLIInfo &PrevInfo) {
++NumInsertedVSETVL;
if (Info.getTWiden()) {
if (Info.hasAVLVLMAX()) {
Register DestReg = MRI->createVirtualRegister(&RISCV::GPRNoX0RegClass);
auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoSF_VSETTNTX0))
.addReg(DestReg, RegState::Define | RegState::Dead)
.addReg(RISCV::X0, RegState::Kill)
.addImm(Info.encodeVTYPE());
if (LIS) {
LIS->InsertMachineInstrInMaps(*MI);
LIS->createAndComputeVirtRegInterval(DestReg);
}
} else {
auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoSF_VSETTNT))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addReg(Info.getAVLReg())
.addImm(Info.encodeVTYPE());
if (LIS)
LIS->InsertMachineInstrInMaps(*MI);
}
return;
}
if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
// Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
// VLMAX.
if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0X0))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addReg(RISCV::X0, RegState::Kill)
.addImm(Info.encodeVTYPE())
.addReg(RISCV::VL, RegState::Implicit);
if (LIS)
LIS->InsertMachineInstrInMaps(*MI);
return;
}
// If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
// it has the same VLMAX we want and the last VL/VTYPE we observed is the
// same, we can use the X0, X0 form.
if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) {
if (const MachineInstr *DefMI = Info.getAVLDefMI(LIS);
DefMI && RISCVInstrInfo::isVectorConfigInstr(*DefMI)) {
VSETVLIInfo DefInfo = VIA.getInfoForVSETVLI(*DefMI);
if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
auto MI =
BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0X0))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addReg(RISCV::X0, RegState::Kill)
.addImm(Info.encodeVTYPE())
.addReg(RISCV::VL, RegState::Implicit);
if (LIS)
LIS->InsertMachineInstrInMaps(*MI);
return;
}
}
}
}
if (Info.hasAVLImm()) {
auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addImm(Info.getAVLImm())
.addImm(Info.encodeVTYPE());
if (LIS)
LIS->InsertMachineInstrInMaps(*MI);
return;
}
if (Info.hasAVLVLMAX()) {
Register DestReg = MRI->createVirtualRegister(&RISCV::GPRNoX0RegClass);
auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
.addReg(DestReg, RegState::Define | RegState::Dead)
.addReg(RISCV::X0, RegState::Kill)
.addImm(Info.encodeVTYPE());
if (LIS) {
LIS->InsertMachineInstrInMaps(*MI);
LIS->createAndComputeVirtRegInterval(DestReg);
}
return;
}
Register AVLReg = Info.getAVLReg();
MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLI))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addReg(AVLReg)
.addImm(Info.encodeVTYPE());
if (LIS) {
LIS->InsertMachineInstrInMaps(*MI);
LiveInterval &LI = LIS->getInterval(AVLReg);
SlotIndex SI = LIS->getInstructionIndex(*MI).getRegSlot();
const VNInfo *CurVNI = Info.getAVLVNInfo();
// If the AVL value isn't live at MI, do a quick check to see if it's easily
// extendable. Otherwise, we need to copy it.
if (LI.getVNInfoBefore(SI) != CurVNI) {
if (!LI.liveAt(SI) && LI.containsOneValue())
LIS->extendToIndices(LI, SI);
else {
Register AVLCopyReg =
MRI->createVirtualRegister(&RISCV::GPRNoX0RegClass);
MachineBasicBlock *MBB = LIS->getMBBFromIndex(CurVNI->def);
MachineBasicBlock::iterator II;
if (CurVNI->isPHIDef())
II = MBB->getFirstNonPHI();
else {
II = LIS->getInstructionFromIndex(CurVNI->def);
II = std::next(II);
}
assert(II.isValid());
auto AVLCopy = BuildMI(*MBB, II, DL, TII->get(RISCV::COPY), AVLCopyReg)
.addReg(AVLReg);
LIS->InsertMachineInstrInMaps(*AVLCopy);
MI->getOperand(1).setReg(AVLCopyReg);
LIS->createAndComputeVirtRegInterval(AVLCopyReg);
}
}
}
}
/// Return true if a VSETVLI is required to transition from CurInfo to Require
/// given a set of DemandedFields \p Used.
bool RISCVInsertVSETVLI::needVSETVLI(const DemandedFields &Used,
const VSETVLIInfo &Require,
const VSETVLIInfo &CurInfo) const {
if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
return true;
if (CurInfo.isCompatible(Used, Require, LIS))
return false;
return true;
}
// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
// places.
static VSETVLIInfo adjustIncoming(const VSETVLIInfo &PrevInfo,
const VSETVLIInfo &NewInfo,
DemandedFields &Demanded) {
VSETVLIInfo Info = NewInfo;
if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
!PrevInfo.isUnknown()) {
if (auto NewVLMul = RISCVVType::getSameRatioLMUL(PrevInfo.getSEWLMULRatio(),
Info.getSEW()))
Info.setVLMul(*NewVLMul);
Demanded.LMUL = DemandedFields::LMULEqual;
}
return Info;
}
// Given an incoming state reaching MI, minimally modifies that state so that it
// is compatible with MI. The resulting state is guaranteed to be semantically
// legal for MI, but may not be the state requested by MI.
void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
const MachineInstr &MI) const {
if (RISCV::isVectorCopy(ST->getRegisterInfo(), MI) &&
(Info.isUnknown() || !Info.isValid() || Info.hasSEWLMULRatioOnly())) {
// Use an arbitrary but valid AVL and VTYPE so vill will be cleared. It may
// be coalesced into another vsetvli since we won't demand any fields.
VSETVLIInfo NewInfo; // Need a new VSETVLIInfo to clear SEWLMULRatioOnly
NewInfo.setAVLImm(1);
NewInfo.setVTYPE(RISCVVType::LMUL_1, /*sew*/ 8, /*ta*/ true, /*ma*/ true,
/*AltFmt*/ false, /*W*/ 0);
Info = NewInfo;
return;
}
if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
return;
DemandedFields Demanded = getDemanded(MI, ST);
const VSETVLIInfo NewInfo = VIA.computeInfoForInstr(MI);
assert(NewInfo.isValid() && !NewInfo.isUnknown());
if (Info.isValid() && !needVSETVLI(Demanded, NewInfo, Info))
return;
const VSETVLIInfo PrevInfo = Info;
if (!Info.isValid() || Info.isUnknown())
Info = NewInfo;
const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded);
// If MI only demands that VL has the same zeroness, we only need to set the
// AVL if the zeroness differs. This removes a vsetvli entirely if the types
// match or allows use of cheaper avl preserving variant if VLMAX doesn't
// change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
// variant, so we avoid the transform to prevent extending live range of an
// avl register operand.
// TODO: We can probably relax this for immediates.
bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, LIS) &&
IncomingInfo.hasSameVLMAX(PrevInfo);
if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
Info.setAVL(IncomingInfo);
// If we only knew the sew/lmul ratio previously, replace the VTYPE.
if (Info.hasSEWLMULRatioOnly()) {
VSETVLIInfo RatiolessInfo = IncomingInfo;
RatiolessInfo.setAVL(Info);
Info = RatiolessInfo;
} else {
Info.setVTYPE(
((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
.getVLMUL(),
((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
.getSEW(),
// Prefer tail/mask agnostic since it can be relaxed to undisturbed
// later if needed.
(Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
IncomingInfo.getTailAgnostic(),
(Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
IncomingInfo.getMaskAgnostic(),
(Demanded.AltFmt ? IncomingInfo : Info).getAltFmt(),
Demanded.TWiden ? IncomingInfo.getTWiden() : 0);
}
}
// Given a state with which we evaluated MI (see transferBefore above for why
// this might be different that the state MI requested), modify the state to
// reflect the changes MI might make.
void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
const MachineInstr &MI) const {
if (RISCVInstrInfo::isVectorConfigInstr(MI)) {
Info = VIA.getInfoForVSETVLI(MI);
return;
}
if (RISCVInstrInfo::isFaultOnlyFirstLoad(MI)) {
// Update AVL to vl-output of the fault first load.
assert(MI.getOperand(1).getReg().isVirtual());
if (LIS) {
auto &LI = LIS->getInterval(MI.getOperand(1).getReg());
SlotIndex SI =
LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
VNInfo *VNI = LI.getVNInfoAt(SI);
Info.setAVLRegDef(VNI, MI.getOperand(1).getReg());
} else
Info.setAVLRegDef(nullptr, MI.getOperand(1).getReg());
return;
}
// If this is something that updates VL/VTYPE that we don't know about, set
// the state to unknown.
if (MI.isCall() || MI.isInlineAsm() ||
MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
Info = VSETVLIInfo::getUnknown();
}
bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
VSETVLIInfo &Info) const {
bool HadVectorOp = false;
Info = BlockInfo[MBB.getNumber()].Pred;
for (const MachineInstr &MI : MBB) {
transferBefore(Info, MI);
if (RISCVInstrInfo::isVectorConfigInstr(MI) ||
RISCVII::hasSEWOp(MI.getDesc().TSFlags) ||
RISCV::isVectorCopy(ST->getRegisterInfo(), MI) ||
RISCVInstrInfo::isXSfmmVectorConfigInstr(MI))
HadVectorOp = true;
transferAfter(Info, MI);
}
return HadVectorOp;
}
void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
BlockData &BBInfo = BlockInfo[MBB.getNumber()];
BBInfo.InQueue = false;
// Start with the previous entry so that we keep the most conservative state
// we have ever found.
VSETVLIInfo InInfo = BBInfo.Pred;
if (MBB.pred_empty()) {
// There are no predecessors, so use the default starting status.
InInfo.setUnknown();
} else {
for (MachineBasicBlock *P : MBB.predecessors())
InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
}
// If we don't have any valid predecessor value, wait until we do.
if (!InInfo.isValid())
return;
// If no change, no need to rerun block
if (InInfo == BBInfo.Pred)
return;
BBInfo.Pred = InInfo;
LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
<< " changed to " << BBInfo.Pred << "\n");
// Note: It's tempting to cache the state changes here, but due to the
// compatibility checks performed a blocks output state can change based on
// the input state. To cache, we'd have to add logic for finding
// never-compatible state changes.
VSETVLIInfo TmpStatus;
computeVLVTYPEChanges(MBB, TmpStatus);
// If the new exit value matches the old exit value, we don't need to revisit
// any blocks.
if (BBInfo.Exit == TmpStatus)
return;
BBInfo.Exit = TmpStatus;
LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
<< " changed to " << BBInfo.Exit << "\n");
// Add the successors to the work list so we can propagate the changed exit
// status.
for (MachineBasicBlock *S : MBB.successors())
if (!BlockInfo[S->getNumber()].InQueue) {
BlockInfo[S->getNumber()].InQueue = true;
WorkList.push(S);
}
}
// If we weren't able to prove a vsetvli was directly unneeded, it might still
// be unneeded if the AVL was a phi node where all incoming values are VL
// outputs from the last VSETVLI in their respective basic blocks.
bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
const MachineBasicBlock &MBB) const {
if (!Require.hasAVLReg())
return true;
if (!LIS)
return true;
// We need the AVL to have been produced by a PHI node in this basic block.
const VNInfo *Valno = Require.getAVLVNInfo();
if (!Valno->isPHIDef() || LIS->getMBBFromIndex(Valno->def) != &MBB)
return true;
const LiveRange &LR = LIS->getInterval(Require.getAVLReg());
for (auto *PBB : MBB.predecessors()) {
const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit;
// We need the PHI input to the be the output of a VSET(I)VLI.
const VNInfo *Value = LR.getVNInfoBefore(LIS->getMBBEndIdx(PBB));
if (!Value)
return true;
MachineInstr *DefMI = LIS->getInstructionFromIndex(Value->def);
if (!DefMI || !RISCVInstrInfo::isVectorConfigInstr(*DefMI))
return true;
// We found a VSET(I)VLI make sure it matches the output of the
// predecessor block.
VSETVLIInfo DefInfo = VIA.getInfoForVSETVLI(*DefMI);
if (DefInfo != PBBExit)
return true;
// Require has the same VL as PBBExit, so if the exit from the
// predecessor has the VTYPE we are looking for we might be able
// to avoid a VSETVLI.
if (PBBExit.isUnknown() || !PBBExit.hasSameVTYPE(Require))
return true;
}
// If all the incoming values to the PHI checked out, we don't need
// to insert a VSETVLI.
return false;
}
void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
// Track whether the prefix of the block we've scanned is transparent
// (meaning has not yet changed the abstract state).
bool PrefixTransparent = true;
for (MachineInstr &MI : MBB) {
const VSETVLIInfo PrevInfo = CurInfo;
transferBefore(CurInfo, MI);
// If this is an explicit VSETVLI or VSETIVLI, update our state.
if (RISCVInstrInfo::isVectorConfigInstr(MI)) {
// Conservatively, mark the VL and VTYPE as live.
assert(MI.getOperand(3).getReg() == RISCV::VL &&
MI.getOperand(4).getReg() == RISCV::VTYPE &&
"Unexpected operands where VL and VTYPE should be");
MI.getOperand(3).setIsDead(false);
MI.getOperand(4).setIsDead(false);
PrefixTransparent = false;
}
if (EnsureWholeVectorRegisterMoveValidVTYPE &&
RISCV::isVectorCopy(ST->getRegisterInfo(), MI)) {
if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) {
insertVSETVLI(MBB, MI, MI.getDebugLoc(), CurInfo, PrevInfo);
PrefixTransparent = false;
}
MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
/*isImp*/ true));
}
uint64_t TSFlags = MI.getDesc().TSFlags;
if (RISCVII::hasSEWOp(TSFlags)) {
if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) {
// If this is the first implicit state change, and the state change
// requested can be proven to produce the same register contents, we
// can skip emitting the actual state change and continue as if we
// had since we know the GPR result of the implicit state change
// wouldn't be used and VL/VTYPE registers are correct. Note that
// we *do* need to model the state as if it changed as while the
// register contents are unchanged, the abstract model can change.
if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
insertVSETVLI(MBB, MI, MI.getDebugLoc(), CurInfo, PrevInfo);
PrefixTransparent = false;
}
if (RISCVII::hasVLOp(TSFlags)) {
MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
if (VLOp.isReg()) {
Register Reg = VLOp.getReg();
// Erase the AVL operand from the instruction.
VLOp.setReg(Register());
VLOp.setIsKill(false);
if (LIS) {
LiveInterval &LI = LIS->getInterval(Reg);
SmallVector<MachineInstr *> DeadMIs;
LIS->shrinkToUses(&LI, &DeadMIs);
// We might have separate components that need split due to
// needVSETVLIPHI causing us to skip inserting a new VL def.
SmallVector<LiveInterval *> SplitLIs;
LIS->splitSeparateComponents(LI, SplitLIs);
// If the AVL was an immediate > 31, then it would have been emitted
// as an ADDI. However, the ADDI might not have been used in the
// vsetvli, or a vsetvli might not have been emitted, so it may be
// dead now.
for (MachineInstr *DeadMI : DeadMIs) {
if (!TII->isAddImmediate(*DeadMI, Reg))
continue;
LIS->RemoveMachineInstrFromMaps(*DeadMI);
Register AddReg = DeadMI->getOperand(1).getReg();
DeadMI->eraseFromParent();
if (AddReg.isVirtual())
LIS->shrinkToUses(&LIS->getInterval(AddReg));
}
}
}
MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
/*isImp*/ true));
}
MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
/*isImp*/ true));
}
if (MI.isInlineAsm()) {
MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ true,
/*isImp*/ true));
MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ true,
/*isImp*/ true));
}
if (MI.isCall() || MI.isInlineAsm() ||
MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
PrefixTransparent = false;
transferAfter(CurInfo, MI);
}
const auto &Info = BlockInfo[MBB.getNumber()];
if (CurInfo != Info.Exit) {
LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n");
LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n");
LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n");
}
assert(CurInfo == Info.Exit && "InsertVSETVLI dataflow invariant violated");
}
/// Perform simple partial redundancy elimination of the VSETVLI instructions
/// we're about to insert by looking for cases where we can PRE from the
/// beginning of one block to the end of one of its predecessors. Specifically,
/// this is geared to catch the common case of a fixed length vsetvl in a single
/// block loop when it could execute once in the preheader instead.
void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
return;
MachineBasicBlock *UnavailablePred = nullptr;
VSETVLIInfo AvailableInfo;
for (MachineBasicBlock *P : MBB.predecessors()) {
const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
if (PredInfo.isUnknown()) {
if (UnavailablePred)
return;
UnavailablePred = P;
} else if (!AvailableInfo.isValid()) {
AvailableInfo = PredInfo;
} else if (AvailableInfo != PredInfo) {
return;
}
}
// Unreachable, single pred, or full redundancy. Note that FRE is handled by
// phase 3.
if (!UnavailablePred || !AvailableInfo.isValid())
return;
if (!LIS)
return;
// If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of
// the unavailable pred.
if (AvailableInfo.hasSEWLMULRatioOnly())
return;
// Critical edge - TODO: consider splitting?
if (UnavailablePred->succ_size() != 1)
return;
// If the AVL value is a register (other than our VLMAX sentinel),
// we need to prove the value is available at the point we're going
// to insert the vsetvli at.
if (AvailableInfo.hasAVLReg()) {
SlotIndex SI = AvailableInfo.getAVLVNInfo()->def;
// This is an inline dominance check which covers the case of
// UnavailablePred being the preheader of a loop.
if (LIS->getMBBFromIndex(SI) != UnavailablePred)
return;
if (!UnavailablePred->terminators().empty() &&
SI >= LIS->getInstructionIndex(*UnavailablePred->getFirstTerminator()))
return;
}
// Model the effect of changing the input state of the block MBB to
// AvailableInfo. We're looking for two issues here; one legality,
// one profitability.
// 1) If the block doesn't use some of the fields from VL or VTYPE, we
// may hit the end of the block with a different end state. We can
// not make this change without reflowing later blocks as well.
// 2) If we don't actually remove a transition, inserting a vsetvli
// into the predecessor block would be correct, but unprofitable.
VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
VSETVLIInfo CurInfo = AvailableInfo;
int TransitionsRemoved = 0;
for (const MachineInstr &MI : MBB) {
const VSETVLIInfo LastInfo = CurInfo;
const VSETVLIInfo LastOldInfo = OldInfo;
transferBefore(CurInfo, MI);
transferBefore(OldInfo, MI);
if (CurInfo == LastInfo)
TransitionsRemoved++;
if (LastOldInfo == OldInfo)
TransitionsRemoved--;
transferAfter(CurInfo, MI);
transferAfter(OldInfo, MI);
if (CurInfo == OldInfo)
// Convergence. All transitions after this must match by construction.
break;
}
if (CurInfo != OldInfo || TransitionsRemoved <= 0)
// Issues 1 and 2 above
return;
// Finally, update both data flow state and insert the actual vsetvli.
// Doing both keeps the code in sync with the dataflow results, which
// is critical for correctness of phase 3.
auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
<< UnavailablePred->getName() << " with state "
<< AvailableInfo << "\n");
BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
// Note there's an implicit assumption here that terminators never use
// or modify VL or VTYPE. Also, fallthrough will return end().
auto InsertPt = UnavailablePred->getFirstInstrTerminator();
insertVSETVLI(*UnavailablePred, InsertPt,
UnavailablePred->findDebugLoc(InsertPt),
AvailableInfo, OldExit);
}
// Return true if we can mutate PrevMI to match MI without changing any the
// fields which would be observed.
bool RISCVInsertVSETVLI::canMutatePriorConfig(
const MachineInstr &PrevMI, const MachineInstr &MI,
const DemandedFields &Used) const {
// If the VL values aren't equal, return false if either a) the former is
// demanded, or b) we can't rewrite the former to be the later for
// implementation reasons.
if (!RISCVInstrInfo::isVLPreservingConfig(MI)) {
if (Used.VLAny)
return false;
if (Used.VLZeroness) {
if (RISCVInstrInfo::isVLPreservingConfig(PrevMI))
return false;
if (!VIA.getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(
VIA.getInfoForVSETVLI(MI), LIS))
return false;
}
auto &AVL = MI.getOperand(1);
// If the AVL is a register, we need to make sure its definition is the same
// at PrevMI as it was at MI.
if (AVL.isReg() && AVL.getReg() != RISCV::X0) {
VNInfo *VNI = getVNInfoFromReg(AVL.getReg(), MI, LIS);
VNInfo *PrevVNI = getVNInfoFromReg(AVL.getReg(), PrevMI, LIS);
if (!VNI || !PrevVNI || VNI != PrevVNI)
return false;
}
// If we define VL and need to move the definition up, check we can extend
// the live interval upwards from MI to PrevMI.
Register VL = MI.getOperand(0).getReg();
if (VL.isVirtual() && LIS &&
LIS->getInterval(VL).overlaps(LIS->getInstructionIndex(PrevMI),
LIS->getInstructionIndex(MI)))
return false;
}
assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm());
auto PriorVType = PrevMI.getOperand(2).getImm();
auto VType = MI.getOperand(2).getImm();
return areCompatibleVTYPEs(PriorVType, VType, Used);
}
void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
MachineInstr *NextMI = nullptr;
// We can have arbitrary code in successors, so VL and VTYPE
// must be considered demanded.
DemandedFields Used;
Used.demandVL();
Used.demandVTYPE();
SmallVector<MachineInstr*> ToDelete;
auto dropAVLUse = [&](MachineOperand &MO) {
if (!MO.isReg() || !MO.getReg().isVirtual())
return;
Register OldVLReg = MO.getReg();
MO.setReg(Register());
if (LIS)
LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
MRI->use_nodbg_empty(OldVLReg))
ToDelete.push_back(VLOpDef);
};
for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) {
// TODO: Support XSfmm.
if (RISCVII::hasTWidenOp(MI.getDesc().TSFlags) ||
RISCVInstrInfo::isXSfmmVectorConfigInstr(MI)) {
NextMI = nullptr;
continue;
}
if (!RISCVInstrInfo::isVectorConfigInstr(MI)) {
Used.doUnion(getDemanded(MI, ST));
if (MI.isCall() || MI.isInlineAsm() ||
MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
NextMI = nullptr;
continue;
}
if (!MI.getOperand(0).isDead())
Used.demandVL();
if (NextMI) {
if (!Used.usedVL() && !Used.usedVTYPE()) {
dropAVLUse(MI.getOperand(1));
if (LIS)
LIS->RemoveMachineInstrFromMaps(MI);
MI.eraseFromParent();
NumCoalescedVSETVL++;
// Leave NextMI unchanged
continue;
}
if (canMutatePriorConfig(MI, *NextMI, Used)) {
if (!RISCVInstrInfo::isVLPreservingConfig(*NextMI)) {
Register DefReg = NextMI->getOperand(0).getReg();
MI.getOperand(0).setReg(DefReg);
MI.getOperand(0).setIsDead(false);
// Move the AVL from NextMI to MI
dropAVLUse(MI.getOperand(1));
if (NextMI->getOperand(1).isImm())
MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
else
MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(),
false);
dropAVLUse(NextMI->getOperand(1));
// The def of DefReg moved to MI, so extend the LiveInterval up to
// it.
if (DefReg.isVirtual() && LIS) {
LiveInterval &DefLI = LIS->getInterval(DefReg);
SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
SlotIndex NextMISlot =
LIS->getInstructionIndex(*NextMI).getRegSlot();
VNInfo *DefVNI = DefLI.getVNInfoAt(NextMISlot);
LiveInterval::Segment S(MISlot, NextMISlot, DefVNI);
DefLI.addSegment(S);
DefVNI->def = MISlot;
// Mark DefLI as spillable if it was previously unspillable
DefLI.setWeight(0);
// DefReg may have had no uses, in which case we need to shrink
// the LiveInterval up to MI.
LIS->shrinkToUses(&DefLI);
}
MI.setDesc(NextMI->getDesc());
}
MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
dropAVLUse(NextMI->getOperand(1));
if (LIS)
LIS->RemoveMachineInstrFromMaps(*NextMI);
NextMI->eraseFromParent();
NumCoalescedVSETVL++;
// fallthrough
}
}
NextMI = &MI;
Used = getDemanded(MI, ST);
}
// Loop over the dead AVL values, and delete them now. This has
// to be outside the above loop to avoid invalidating iterators.
for (auto *MI : ToDelete) {
assert(MI->getOpcode() == RISCV::ADDI);
Register AddReg = MI->getOperand(1).getReg();
if (LIS) {
LIS->removeInterval(MI->getOperand(0).getReg());
LIS->RemoveMachineInstrFromMaps(*MI);
}
MI->eraseFromParent();
if (LIS && AddReg.isVirtual())
LIS->shrinkToUses(&LIS->getInterval(AddReg));
}
}
void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
MachineInstr &MI = *I++;
if (RISCVInstrInfo::isFaultOnlyFirstLoad(MI)) {
Register VLOutput = MI.getOperand(1).getReg();
assert(VLOutput.isVirtual());
if (!MI.getOperand(1).isDead()) {
auto ReadVLMI = BuildMI(MBB, I, MI.getDebugLoc(),
TII->get(RISCV::PseudoReadVL), VLOutput);
// Move the LiveInterval's definition down to PseudoReadVL.
if (LIS) {
SlotIndex NewDefSI =
LIS->InsertMachineInstrInMaps(*ReadVLMI).getRegSlot();
LiveInterval &DefLI = LIS->getInterval(VLOutput);
LiveRange::Segment *DefSeg = DefLI.getSegmentContaining(NewDefSI);
VNInfo *DefVNI = DefLI.getVNInfoAt(DefSeg->start);
DefLI.removeSegment(DefSeg->start, NewDefSI);
DefVNI->def = NewDefSI;
}
}
// We don't use the vl output of the VLEFF/VLSEGFF anymore.
MI.getOperand(1).setReg(RISCV::X0);
MI.addRegisterDefined(RISCV::VL, MRI->getTargetRegisterInfo());
}
}
}
bool RISCVInsertVSETVLI::insertVSETMTK(MachineBasicBlock &MBB,
TKTMMode Mode) const {
bool Changed = false;
for (auto &MI : MBB) {
uint64_t TSFlags = MI.getDesc().TSFlags;
if (RISCVInstrInfo::isXSfmmVectorConfigTMTKInstr(MI) ||
!RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasTWidenOp(TSFlags))
continue;
VSETVLIInfo CurrInfo = VIA.computeInfoForInstr(MI);
if (Mode == VSETTK && !RISCVII::hasTKOp(TSFlags))
continue;
if (Mode == VSETTM && !RISCVII::hasTMOp(TSFlags))
continue;
unsigned OpNum = 0;
unsigned Opcode = 0;
switch (Mode) {
case VSETTK:
OpNum = RISCVII::getTKOpNum(MI.getDesc());
Opcode = RISCV::PseudoSF_VSETTK;
break;
case VSETTM:
OpNum = RISCVII::getTMOpNum(MI.getDesc());
Opcode = RISCV::PseudoSF_VSETTM;
break;
}
assert(OpNum && Opcode && "Invalid OpNum or Opcode");
MachineOperand &Op = MI.getOperand(OpNum);
auto TmpMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(Opcode))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addReg(Op.getReg())
.addImm(Log2_32(CurrInfo.getSEW()))
.addImm(CurrInfo.getTWiden());
Changed = true;
Register Reg = Op.getReg();
Op.setReg(Register());
Op.setIsKill(false);
if (LIS) {
LIS->InsertMachineInstrInMaps(*TmpMI);
LiveInterval &LI = LIS->getInterval(Reg);
// Erase the AVL operand from the instruction.
LIS->shrinkToUses(&LI);
// TODO: Enable this once needVSETVLIPHI is supported.
// SmallVector<LiveInterval *> SplitLIs;
// LIS->splitSeparateComponents(LI, SplitLIs);
}
}
return Changed;
}
bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
// Skip if the vector extension is not enabled.
ST = &MF.getSubtarget<RISCVSubtarget>();
if (!ST->hasVInstructions())
return false;
LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
TII = ST->getInstrInfo();
MRI = &MF.getRegInfo();
auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
VIA = RISCVVSETVLIInfoAnalysis(ST, LIS);
assert(BlockInfo.empty() && "Expect empty block infos");
BlockInfo.resize(MF.getNumBlockIDs());
bool HaveVectorOp = false;
// Phase 1 - determine how VL/VTYPE are affected by the each block.
for (const MachineBasicBlock &MBB : MF) {
VSETVLIInfo TmpStatus;
HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus);
// Initial exit state is whatever change we found in the block.
BlockData &BBInfo = BlockInfo[MBB.getNumber()];
BBInfo.Exit = TmpStatus;
LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
<< " is " << BBInfo.Exit << "\n");
}
// If we didn't find any instructions that need VSETVLI, we're done.
if (!HaveVectorOp) {
BlockInfo.clear();
return false;
}
// Phase 2 - determine the exit VL/VTYPE from each block. We add all
// blocks to the list here, but will also add any that need to be revisited
// during Phase 2 processing.
for (const MachineBasicBlock &MBB : MF) {
WorkList.push(&MBB);
BlockInfo[MBB.getNumber()].InQueue = true;
}
while (!WorkList.empty()) {
const MachineBasicBlock &MBB = *WorkList.front();
WorkList.pop();
computeIncomingVLVTYPE(MBB);
}
// Perform partial redundancy elimination of vsetvli transitions.
for (MachineBasicBlock &MBB : MF)
doPRE(MBB);
// Phase 3 - add any vsetvli instructions needed in the block. Use the
// Phase 2 information to avoid adding vsetvlis before the first vector
// instruction in the block if the VL/VTYPE is satisfied by its
// predecessors.
for (MachineBasicBlock &MBB : MF)
emitVSETVLIs(MBB);
// Now that all vsetvlis are explicit, go through and do block local
// DSE and peephole based demanded fields based transforms. Note that
// this *must* be done outside the main dataflow so long as we allow
// any cross block analysis within the dataflow. We can't have both
// demanded fields based mutation and non-local analysis in the
// dataflow at the same time without introducing inconsistencies.
// We're visiting blocks from the bottom up because a VSETVLI in the
// earlier block might become dead when its uses in later blocks are
// optimized away.
for (MachineBasicBlock *MBB : post_order(&MF))
coalesceVSETVLIs(*MBB);
// Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
// of VLEFF/VLSEGFF.
for (MachineBasicBlock &MBB : MF)
insertReadVL(MBB);
for (MachineBasicBlock &MBB : MF) {
insertVSETMTK(MBB, VSETTM);
insertVSETMTK(MBB, VSETTK);
}
BlockInfo.clear();
return HaveVectorOp;
}
/// Returns an instance of the Insert VSETVLI pass.
FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
return new RISCVInsertVSETVLI();
}