blob: 0c7acafca0d8bb3803f92a5e3eb3c4c8d00865f7 [file] [log] [blame] [edit]
//=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file implements a pass that removes irreducible control flow.
/// Irreducible control flow means multiple-entry loops, which this pass
/// transforms to have a single entry.
///
/// Note that LLVM has a generic pass that lowers irreducible control flow, but
/// it linearizes control flow, turning diamonds into two triangles, which is
/// both unnecessary and undesirable for WebAssembly.
///
/// The big picture: We recursively process each "region", defined as a group
/// of blocks with a single entry and no branches back to that entry. A region
/// may be the entire function body, or the inner part of a loop, i.e., the
/// loop's body without branches back to the loop entry. In each region we
/// identify all the strongly-connected components (SCCs). We fix up multi-entry
/// loops (SCCs) by adding a new block that can dispatch to each of the loop
/// entries, based on the value of a label "helper" variable, and we replace
/// direct branches to the entries with assignments to the label variable and a
/// branch to the dispatch block. Then the dispatch block is the single entry in
/// the loop containing the previous multiple entries. Each time we fix some
/// irreducibility, we recalculate the SCCs. After ensuring all the SCCs in a
/// region are reducible, we recurse into them. The total time complexity of
/// this pass is roughly:
/// O((NumBlocks + NumEdges) * (NumNestedLoops + NumIrreducibleLoops))
///
/// This pass is similar to what the Relooper [1] does. Both identify looping
/// code that requires multiple entries, and resolve it in a similar way (in
/// Relooper terminology, we implement a Multiple shape in a Loop shape). Note
/// also that like the Relooper, we implement a "minimal" intervention: we only
/// use the "label" helper for the blocks we absolutely must and no others. We
/// also prioritize code size and do not duplicate code in order to resolve
/// irreducibility. The graph algorithms for finding loops and entries and so
/// forth are also similar to the Relooper. The main differences between this
/// pass and the Relooper are:
///
/// * We just care about irreducibility, so we just look at loops.
/// * The Relooper emits structured control flow (with ifs etc.), while we
/// emit a CFG.
///
/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
/// Proceedings of the ACM international conference companion on Object oriented
/// programming systems languages and applications companion (SPLASH '11). ACM,
/// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
/// http://doi.acm.org/10.1145/2048147.2048224
///
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
#include "WebAssemblySubtarget.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Support/Debug.h"
#include <limits>
using namespace llvm;
#define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
namespace {
using BlockVector = SmallVector<MachineBasicBlock *, 4>;
using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
static BlockVector getSortedEntries(const BlockSet &Entries) {
BlockVector SortedEntries(Entries.begin(), Entries.end());
llvm::sort(SortedEntries,
[](const MachineBasicBlock *A, const MachineBasicBlock *B) {
auto ANum = A->getNumber();
auto BNum = B->getNumber();
return ANum < BNum;
});
return SortedEntries;
}
struct ReachabilityNode {
MachineBasicBlock *MBB;
SmallVector<ReachabilityNode *, 4> Succs;
unsigned SCCId = std::numeric_limits<unsigned>::max();
};
// Analyzes the SCC (strongly-connected component) structure in a region.
// Ignores branches to blocks outside of the region, and ignores branches to the
// region entry (for the case where the region is the inner part of a loop).
class ReachabilityGraph {
public:
ReachabilityGraph(MachineBasicBlock *Entry, const BlockSet &Blocks)
: Entry(Entry), Blocks(Blocks) {
#ifndef NDEBUG
// The region must have a single entry.
for (auto *MBB : Blocks) {
if (MBB != Entry) {
for (auto *Pred : MBB->predecessors()) {
assert(inRegion(Pred));
}
}
}
#endif
calculate();
}
// Get all blocks that are loop entries.
const BlockSet &getLoopEntries() const { return LoopEntries; }
const BlockSet &getLoopEntriesForSCC(unsigned SCCId) const {
return LoopEntriesBySCC[SCCId];
}
unsigned getSCCId(MachineBasicBlock *MBB) const {
return getNode(MBB)->SCCId;
}
friend struct GraphTraits<ReachabilityGraph *>;
private:
MachineBasicBlock *Entry;
const BlockSet &Blocks;
BlockSet LoopEntries;
SmallVector<BlockSet, 0> LoopEntriesBySCC;
bool inRegion(MachineBasicBlock *MBB) const { return Blocks.count(MBB); }
SmallVector<ReachabilityNode, 0> Nodes;
DenseMap<MachineBasicBlock *, ReachabilityNode *> MBBToNodeMap;
ReachabilityNode *getNode(MachineBasicBlock *MBB) const {
return MBBToNodeMap.at(MBB);
}
void calculate();
};
} // end anonymous namespace
namespace llvm {
template <> struct GraphTraits<ReachabilityGraph *> {
using NodeRef = ReachabilityNode *;
using ChildIteratorType = SmallVectorImpl<NodeRef>::iterator;
static NodeRef getEntryNode(ReachabilityGraph *G) {
return G->getNode(G->Entry);
}
static inline ChildIteratorType child_begin(NodeRef N) {
return N->Succs.begin();
}
static inline ChildIteratorType child_end(NodeRef N) {
return N->Succs.end();
}
};
} // end namespace llvm
namespace {
void ReachabilityGraph::calculate() {
auto NumBlocks = Blocks.size();
Nodes.assign(NumBlocks, {});
MBBToNodeMap.clear();
MBBToNodeMap.reserve(NumBlocks);
// Initialize mappings.
unsigned MBBIdx = 0;
for (auto *MBB : Blocks) {
auto &Node = Nodes[MBBIdx++];
Node.MBB = MBB;
MBBToNodeMap[MBB] = &Node;
}
// Add all relevant direct branches.
MBBIdx = 0;
for (auto *MBB : Blocks) {
auto &Node = Nodes[MBBIdx++];
for (auto *Succ : MBB->successors()) {
if (Succ != Entry && inRegion(Succ)) {
Node.Succs.push_back(getNode(Succ));
}
}
}
unsigned CurrSCCIdx = 0;
for (auto &SCC : make_range(scc_begin(this), scc_end(this))) {
LoopEntriesBySCC.push_back({});
auto &SCCLoopEntries = LoopEntriesBySCC.back();
for (auto *Node : SCC) {
// Make sure nodes are only ever assigned one SCC
assert(Node->SCCId == std::numeric_limits<unsigned>::max());
Node->SCCId = CurrSCCIdx;
}
bool SelfLoop = false;
if (SCC.size() == 1) {
auto &Node = SCC[0];
for (auto *Succ : Node->Succs) {
if (Succ == Node) {
SelfLoop = true;
break;
}
}
}
// Blocks outside any (multi-block) loop will be isolated in their own
// single-element SCC. Thus blocks that are in a loop are those in
// multi-element SCCs or are self-looping.
if (SCC.size() > 1 || SelfLoop) {
// Find the loop entries - loop body blocks with predecessors outside
// their SCC
for (auto *Node : SCC) {
if (Node->MBB == Entry)
continue;
for (auto *Pred : Node->MBB->predecessors()) {
// This test is accurate despite not having assigned all nodes an SCC
// yet. We only care if a node has been assigned into this SCC or not.
if (getSCCId(Pred) != CurrSCCIdx) {
LoopEntries.insert(Node->MBB);
SCCLoopEntries.insert(Node->MBB);
}
}
}
}
++CurrSCCIdx;
}
#ifndef NDEBUG
// Make sure all nodes have been processed
for (auto &Node : Nodes) {
assert(Node.SCCId != std::numeric_limits<unsigned>::max());
}
#endif
}
class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
StringRef getPassName() const override {
return "WebAssembly Fix Irreducible Control Flow";
}
bool runOnMachineFunction(MachineFunction &MF) override;
bool processRegion(MachineBasicBlock *Entry, BlockSet &Blocks,
MachineFunction &MF);
void makeSingleEntryLoop(const BlockSet &Entries, BlockSet &Blocks,
MachineFunction &MF, const ReachabilityGraph &Graph);
public:
static char ID; // Pass identification, replacement for typeid
WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
};
bool WebAssemblyFixIrreducibleControlFlow::processRegion(
MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) {
bool Changed = false;
// Remove irreducibility before processing child loops, which may take
// multiple iterations.
while (true) {
ReachabilityGraph Graph(Entry, Blocks);
bool FoundIrreducibility = false;
for (auto *LoopEntry : getSortedEntries(Graph.getLoopEntries())) {
// Find mutual entries - all entries which can reach this one, and
// are reached by it (that always includes LoopEntry itself). All mutual
// entries must be in the same SCC, so if we have more than one, then we
// have irreducible control flow.
//
// (Note that we need to sort the entries here, as otherwise the order can
// matter: being mutual is a symmetric relationship, and each set of
// mutuals will be handled properly no matter which we see first. However,
// there can be multiple disjoint sets of mutuals, and which we process
// first changes the output.)
//
// Note that irreducibility may involve inner loops, e.g. imagine A
// starts one loop, and it has B inside it which starts an inner loop.
// If we add a branch from all the way on the outside to B, then in a
// sense B is no longer an "inner" loop, semantically speaking. We will
// fix that irreducibility by adding a block that dispatches to either
// either A or B, so B will no longer be an inner loop in our output.
// (A fancier approach might try to keep it as such.)
//
// Note that we still need to recurse into inner loops later, to handle
// the case where the irreducibility is entirely nested - we would not
// be able to identify that at this point, since the enclosing loop is
// a group of blocks all of whom can reach each other. (We'll see the
// irreducibility after removing branches to the top of that enclosing
// loop.)
auto &MutualLoopEntries =
Graph.getLoopEntriesForSCC(Graph.getSCCId(LoopEntry));
if (MutualLoopEntries.size() > 1) {
makeSingleEntryLoop(MutualLoopEntries, Blocks, MF, Graph);
FoundIrreducibility = true;
Changed = true;
break;
}
}
// Only go on to actually process the inner loops when we are done
// removing irreducible control flow and changing the graph. Modifying
// the graph as we go is possible, and that might let us avoid looking at
// the already-fixed loops again if we are careful, but all that is
// complex and bug-prone. Since irreducible loops are rare, just starting
// another iteration is best.
if (FoundIrreducibility) {
continue;
}
for (auto *LoopEntry : Graph.getLoopEntries()) {
BlockSet InnerBlocks;
auto EntrySCCId = Graph.getSCCId(LoopEntry);
for (auto *Block : Blocks) {
if (EntrySCCId == Graph.getSCCId(Block)) {
InnerBlocks.insert(Block);
}
}
// Each of these calls to processRegion may change the graph, but are
// guaranteed not to interfere with each other. The only changes we make
// to the graph are to add blocks on the way to a loop entry. As the
// loops are disjoint, that means we may only alter branches that exit
// another loop, which are ignored when recursing into that other loop
// anyhow.
if (processRegion(LoopEntry, InnerBlocks, MF)) {
Changed = true;
}
}
return Changed;
}
}
// Given a set of entries to a single loop, create a single entry for that
// loop by creating a dispatch block for them, routing control flow using
// a helper variable. Also updates Blocks with any new blocks created, so
// that we properly track all the blocks in the region. But this does not update
// ReachabilityGraph; this will be updated in the caller of this function as
// needed.
void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
const BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF,
const ReachabilityGraph &Graph) {
assert(Entries.size() >= 2);
// Sort the entries to ensure a deterministic build.
BlockVector SortedEntries = getSortedEntries(Entries);
#ifndef NDEBUG
for (auto *Block : SortedEntries)
assert(Block->getNumber() != -1);
if (SortedEntries.size() > 1) {
for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; I != E;
++I) {
auto ANum = (*I)->getNumber();
auto BNum = (*(std::next(I)))->getNumber();
assert(ANum != BNum);
}
}
#endif
// Create a dispatch block which will contain a jump table to the entries.
MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
MF.insert(MF.end(), Dispatch);
Blocks.insert(Dispatch);
// Add the jump table.
const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
MachineInstrBuilder MIB =
BuildMI(Dispatch, DebugLoc(), TII.get(WebAssembly::BR_TABLE_I32));
// Add the register which will be used to tell the jump table which block to
// jump to.
MachineRegisterInfo &MRI = MF.getRegInfo();
Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
MIB.addReg(Reg);
// Compute the indices in the superheader, one for each bad block, and
// add them as successors.
DenseMap<MachineBasicBlock *, unsigned> Indices;
for (auto *Entry : SortedEntries) {
auto Pair = Indices.try_emplace(Entry);
assert(Pair.second);
unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
Pair.first->second = Index;
MIB.addMBB(Entry);
Dispatch->addSuccessor(Entry);
}
// Rewrite the problematic successors for every block that wants to reach
// the bad blocks. For simplicity, we just introduce a new block for every
// edge we need to rewrite. (Fancier things are possible.)
BlockVector AllPreds;
for (auto *Entry : SortedEntries) {
for (auto *Pred : Entry->predecessors()) {
if (Pred != Dispatch) {
AllPreds.push_back(Pred);
}
}
}
// This set stores predecessors within this loop.
DenseSet<MachineBasicBlock *> InLoop;
for (auto *Pred : AllPreds) {
auto PredSCCId = Graph.getSCCId(Pred);
for (auto *Entry : Pred->successors()) {
if (!Entries.count(Entry))
continue;
if (Graph.getSCCId(Entry) == PredSCCId) {
InLoop.insert(Pred);
break;
}
}
}
// Record if each entry has a layout predecessor. This map stores
// <<loop entry, Predecessor is within the loop?>, layout predecessor>
DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *>
EntryToLayoutPred;
for (auto *Pred : AllPreds) {
bool PredInLoop = InLoop.count(Pred);
for (auto *Entry : Pred->successors())
if (Entries.count(Entry) && Pred->isLayoutSuccessor(Entry))
EntryToLayoutPred[{Entry, PredInLoop}] = Pred;
}
// We need to create at most two routing blocks per entry: one for
// predecessors outside the loop and one for predecessors inside the loop.
// This map stores
// <<loop entry, Predecessor is within the loop?>, routing block>
DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *>
Map;
for (auto *Pred : AllPreds) {
bool PredInLoop = InLoop.count(Pred);
for (auto *Entry : Pred->successors()) {
if (!Entries.count(Entry) || Map.count({Entry, PredInLoop}))
continue;
// If there exists a layout predecessor of this entry and this predecessor
// is not that, we rather create a routing block after that layout
// predecessor to save a branch.
if (auto *OtherPred = EntryToLayoutPred.lookup({Entry, PredInLoop}))
if (OtherPred != Pred)
continue;
// This is a successor we need to rewrite.
MachineBasicBlock *Routing = MF.CreateMachineBasicBlock();
MF.insert(Pred->isLayoutSuccessor(Entry)
? MachineFunction::iterator(Entry)
: MF.end(),
Routing);
Blocks.insert(Routing);
// Set the jump table's register of the index of the block we wish to
// jump to, and jump to the jump table.
BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg)
.addImm(Indices[Entry]);
BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch);
Routing->addSuccessor(Dispatch);
Map[{Entry, PredInLoop}] = Routing;
}
}
for (auto *Pred : AllPreds) {
bool PredInLoop = InLoop.count(Pred);
// Remap the terminator operands and the successor list.
for (MachineInstr &Term : Pred->terminators())
for (auto &Op : Term.explicit_uses())
if (Op.isMBB() && Indices.count(Op.getMBB()))
Op.setMBB(Map[{Op.getMBB(), PredInLoop}]);
for (auto *Succ : Pred->successors()) {
if (!Entries.count(Succ))
continue;
auto *Routing = Map[{Succ, PredInLoop}];
Pred->replaceSuccessor(Succ, Routing);
}
}
// Create a fake default label, because br_table requires one.
MIB.addMBB(MIB.getInstr()
->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1)
.getMBB());
}
} // end anonymous namespace
char WebAssemblyFixIrreducibleControlFlow::ID = 0;
INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
"Removes irreducible control flow", false, false)
FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
return new WebAssemblyFixIrreducibleControlFlow();
}
// Test whether the given register has an ARGUMENT def.
static bool hasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {
for (const auto &Def : MRI.def_instructions(Reg))
if (WebAssembly::isArgument(Def.getOpcode()))
return true;
return false;
}
// Add a register definition with IMPLICIT_DEFs for every register to cover for
// register uses that don't have defs in every possible path.
// TODO: This is fairly heavy-handed; find a better approach.
static void addImplicitDefs(MachineFunction &MF) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
MachineBasicBlock &Entry = *MF.begin();
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {
Register Reg = Register::index2VirtReg(I);
// Skip unused registers.
if (MRI.use_nodbg_empty(Reg))
continue;
// Skip registers that have an ARGUMENT definition.
if (hasArgumentDef(Reg, MRI))
continue;
BuildMI(Entry, Entry.begin(), DebugLoc(),
TII.get(WebAssembly::IMPLICIT_DEF), Reg);
}
// Move ARGUMENT_* instructions to the top of the entry block, so that their
// liveness reflects the fact that these really are live-in values.
for (MachineInstr &MI : llvm::make_early_inc_range(Entry)) {
if (WebAssembly::isArgument(MI.getOpcode())) {
MI.removeFromParent();
Entry.insert(Entry.begin(), &MI);
}
}
}
bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"
"********** Function: "
<< MF.getName() << '\n');
// Start the recursive process on the entire function body.
BlockSet AllBlocks;
for (auto &MBB : MF) {
AllBlocks.insert(&MBB);
}
if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) {
// We rewrote part of the function; recompute relevant things.
MF.RenumberBlocks();
// Now we've inserted dispatch blocks, some register uses can have incoming
// paths without a def. For example, before this pass register %a was
// defined in BB1 and used in BB2, and there was only one path from BB1 and
// BB2. But if this pass inserts a dispatch block having multiple
// predecessors between the two BBs, now there are paths to BB2 without
// visiting BB1, and %a's use in BB2 is not dominated by its def. Adding
// IMPLICIT_DEFs to all regs is one simple way to fix it.
addImplicitDefs(MF);
return true;
}
return false;
}