| //===- Utils.cpp ---- Misc utilities for analysis -------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements miscellaneous analysis routines for non-loop IR |
| // structures. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "mlir/Dialect/Affine/Analysis/Utils.h" |
| |
| #include "mlir/Analysis/Presburger/PresburgerRelation.h" |
| #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h" |
| #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" |
| #include "mlir/Dialect/Affine/IR/AffineOps.h" |
| #include "mlir/Dialect/Affine/IR/AffineValueMap.h" |
| #include "mlir/Dialect/MemRef/IR/MemRef.h" |
| #include "mlir/Dialect/Utils/StaticValueUtils.h" |
| #include "mlir/IR/IntegerSet.h" |
| #include "llvm/ADT/SetVector.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/DebugLog.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include <optional> |
| |
| #define DEBUG_TYPE "analysis-utils" |
| |
| using namespace mlir; |
| using namespace affine; |
| using namespace presburger; |
| |
| using llvm::SmallDenseMap; |
| |
| using Node = MemRefDependenceGraph::Node; |
| |
| // LoopNestStateCollector walks loop nests and collects load and store |
| // operations, and whether or not a region holding op other than ForOp and IfOp |
| // was encountered in the loop nest. |
| void LoopNestStateCollector::collect(Operation *opToWalk) { |
| opToWalk->walk([&](Operation *op) { |
| if (auto forOp = dyn_cast<AffineForOp>(op)) { |
| forOps.push_back(forOp); |
| } else if (isa<AffineReadOpInterface>(op)) { |
| loadOpInsts.push_back(op); |
| } else if (isa<AffineWriteOpInterface>(op)) { |
| storeOpInsts.push_back(op); |
| } else { |
| auto memInterface = dyn_cast<MemoryEffectOpInterface>(op); |
| if (!memInterface) { |
| if (op->hasTrait<OpTrait::HasRecursiveMemoryEffects>()) |
| // This op itself is memory-effect free. |
| return; |
| // Check operands. Eg. ops like the `call` op are handled here. |
| for (Value v : op->getOperands()) { |
| if (!isa<MemRefType>(v.getType())) |
| continue; |
| // Conservatively, we assume the memref is read and written to. |
| memrefLoads.push_back(op); |
| memrefStores.push_back(op); |
| } |
| } else { |
| // Non-affine loads and stores. |
| if (hasEffect<MemoryEffects::Read>(op)) |
| memrefLoads.push_back(op); |
| if (hasEffect<MemoryEffects::Write>(op)) |
| memrefStores.push_back(op); |
| if (hasEffect<MemoryEffects::Free>(op)) |
| memrefFrees.push_back(op); |
| } |
| } |
| }); |
| } |
| |
| unsigned Node::getLoadOpCount(Value memref) const { |
| unsigned loadOpCount = 0; |
| for (Operation *loadOp : loads) { |
| // Common case: affine reads. |
| if (auto affineLoad = dyn_cast<AffineReadOpInterface>(loadOp)) { |
| if (memref == affineLoad.getMemRef()) |
| ++loadOpCount; |
| } else if (hasEffect<MemoryEffects::Read>(loadOp, memref)) { |
| ++loadOpCount; |
| } |
| } |
| return loadOpCount; |
| } |
| |
| // Returns the store op count for 'memref'. |
| unsigned Node::getStoreOpCount(Value memref) const { |
| unsigned storeOpCount = 0; |
| for (auto *storeOp : llvm::concat<Operation *const>(stores, memrefStores)) { |
| // Common case: affine writes. |
| if (auto affineStore = dyn_cast<AffineWriteOpInterface>(storeOp)) { |
| if (memref == affineStore.getMemRef()) |
| ++storeOpCount; |
| } else if (hasEffect<MemoryEffects::Write>(const_cast<Operation *>(storeOp), |
| memref)) { |
| ++storeOpCount; |
| } |
| } |
| return storeOpCount; |
| } |
| |
| // Returns the store op count for 'memref'. |
| unsigned Node::hasStore(Value memref) const { |
| return llvm::any_of( |
| llvm::concat<Operation *const>(stores, memrefStores), |
| [&](Operation *storeOp) { |
| if (auto affineStore = dyn_cast<AffineWriteOpInterface>(storeOp)) { |
| if (memref == affineStore.getMemRef()) |
| return true; |
| } else if (hasEffect<MemoryEffects::Write>(storeOp, memref)) { |
| return true; |
| } |
| return false; |
| }); |
| } |
| |
| unsigned Node::hasFree(Value memref) const { |
| return llvm::any_of(memrefFrees, [&](Operation *freeOp) { |
| return hasEffect<MemoryEffects::Free>(freeOp, memref); |
| }); |
| } |
| |
| // Returns all store ops in 'storeOps' which access 'memref'. |
| void Node::getStoreOpsForMemref(Value memref, |
| SmallVectorImpl<Operation *> *storeOps) const { |
| for (Operation *storeOp : stores) { |
| if (memref == cast<AffineWriteOpInterface>(storeOp).getMemRef()) |
| storeOps->push_back(storeOp); |
| } |
| } |
| |
| // Returns all load ops in 'loadOps' which access 'memref'. |
| void Node::getLoadOpsForMemref(Value memref, |
| SmallVectorImpl<Operation *> *loadOps) const { |
| for (Operation *loadOp : loads) { |
| if (memref == cast<AffineReadOpInterface>(loadOp).getMemRef()) |
| loadOps->push_back(loadOp); |
| } |
| } |
| |
| // Returns all memrefs in 'loadAndStoreMemrefSet' for which this node |
| // has at least one load and store operation. |
| void Node::getLoadAndStoreMemrefSet( |
| DenseSet<Value> *loadAndStoreMemrefSet) const { |
| llvm::SmallDenseSet<Value, 2> loadMemrefs; |
| for (Operation *loadOp : loads) { |
| loadMemrefs.insert(cast<AffineReadOpInterface>(loadOp).getMemRef()); |
| } |
| for (Operation *storeOp : stores) { |
| auto memref = cast<AffineWriteOpInterface>(storeOp).getMemRef(); |
| if (loadMemrefs.count(memref) > 0) |
| loadAndStoreMemrefSet->insert(memref); |
| } |
| } |
| |
| /// Returns the values that this op has a memref effect of type `EffectTys` on, |
| /// not considering recursive effects. |
| template <typename... EffectTys> |
| static void getEffectedValues(Operation *op, SmallVectorImpl<Value> &values) { |
| auto memOp = dyn_cast<MemoryEffectOpInterface>(op); |
| if (!memOp) { |
| if (op->hasTrait<OpTrait::HasRecursiveMemoryEffects>()) |
| // No effects. |
| return; |
| // Memref operands have to be considered as being affected. |
| for (Value operand : op->getOperands()) { |
| if (isa<MemRefType>(operand.getType())) |
| values.push_back(operand); |
| } |
| return; |
| } |
| SmallVector<SideEffects::EffectInstance<MemoryEffects::Effect>, 4> effects; |
| memOp.getEffects(effects); |
| for (auto &effect : effects) { |
| Value effectVal = effect.getValue(); |
| if (isa<EffectTys...>(effect.getEffect()) && effectVal && |
| isa<MemRefType>(effectVal.getType())) |
| values.push_back(effectVal); |
| }; |
| } |
| |
| /// Add `op` to MDG creating a new node and adding its memory accesses (affine |
| /// or non-affine to memrefAccesses (memref -> list of nodes with accesses) map. |
| static Node * |
| addNodeToMDG(Operation *nodeOp, MemRefDependenceGraph &mdg, |
| DenseMap<Value, SetVector<unsigned>> &memrefAccesses) { |
| auto &nodes = mdg.nodes; |
| // Create graph node 'id' to represent top-level 'forOp' and record |
| // all loads and store accesses it contains. |
| LoopNestStateCollector collector; |
| collector.collect(nodeOp); |
| unsigned newNodeId = mdg.nextNodeId++; |
| Node &node = nodes.insert({newNodeId, Node(newNodeId, nodeOp)}).first->second; |
| for (Operation *op : collector.loadOpInsts) { |
| node.loads.push_back(op); |
| auto memref = cast<AffineReadOpInterface>(op).getMemRef(); |
| memrefAccesses[memref].insert(node.id); |
| } |
| for (Operation *op : collector.storeOpInsts) { |
| node.stores.push_back(op); |
| auto memref = cast<AffineWriteOpInterface>(op).getMemRef(); |
| memrefAccesses[memref].insert(node.id); |
| } |
| for (Operation *op : collector.memrefLoads) { |
| SmallVector<Value> effectedValues; |
| getEffectedValues<MemoryEffects::Read>(op, effectedValues); |
| if (llvm::any_of(((ValueRange)effectedValues).getTypes(), |
| [](Type type) { return !isa<MemRefType>(type); })) |
| // We do not know the interaction here. |
| return nullptr; |
| for (Value memref : effectedValues) |
| memrefAccesses[memref].insert(node.id); |
| node.memrefLoads.push_back(op); |
| } |
| for (Operation *op : collector.memrefStores) { |
| SmallVector<Value> effectedValues; |
| getEffectedValues<MemoryEffects::Write>(op, effectedValues); |
| if (llvm::any_of((ValueRange(effectedValues)).getTypes(), |
| [](Type type) { return !isa<MemRefType>(type); })) |
| return nullptr; |
| for (Value memref : effectedValues) |
| memrefAccesses[memref].insert(node.id); |
| node.memrefStores.push_back(op); |
| } |
| for (Operation *op : collector.memrefFrees) { |
| SmallVector<Value> effectedValues; |
| getEffectedValues<MemoryEffects::Free>(op, effectedValues); |
| if (llvm::any_of((ValueRange(effectedValues)).getTypes(), |
| [](Type type) { return !isa<MemRefType>(type); })) |
| return nullptr; |
| for (Value memref : effectedValues) |
| memrefAccesses[memref].insert(node.id); |
| node.memrefFrees.push_back(op); |
| } |
| |
| return &node; |
| } |
| |
| /// Returns the memref being read/written by a memref/affine load/store op. |
| static Value getMemRef(Operation *memOp) { |
| if (auto memrefLoad = dyn_cast<memref::LoadOp>(memOp)) |
| return memrefLoad.getMemRef(); |
| if (auto affineLoad = dyn_cast<AffineReadOpInterface>(memOp)) |
| return affineLoad.getMemRef(); |
| if (auto memrefStore = dyn_cast<memref::StoreOp>(memOp)) |
| return memrefStore.getMemRef(); |
| if (auto affineStore = dyn_cast<AffineWriteOpInterface>(memOp)) |
| return affineStore.getMemRef(); |
| llvm_unreachable("unexpected op"); |
| } |
| |
| /// Returns true if there may be a dependence on `memref` from srcNode's |
| /// memory ops to dstNode's memory ops, while using the affine memory |
| /// dependence analysis checks. The method assumes that there is at least one |
| /// memory op in srcNode's loads and stores on `memref`, and similarly for |
| /// `dstNode`. `srcNode.op` and `destNode.op` are expected to be nested in the |
| /// same block and so the dependences are tested at the depth of that block. |
| static bool mayDependence(const Node &srcNode, const Node &dstNode, |
| Value memref) { |
| assert(srcNode.op->getBlock() == dstNode.op->getBlock()); |
| if (!isa<AffineForOp>(srcNode.op) || !isa<AffineForOp>(dstNode.op)) |
| return true; |
| |
| // Conservatively handle dependences involving non-affine load/stores. Return |
| // true if there exists a conflicting read/write access involving such. |
| |
| // Check whether there is a dependence from a source read/write op to a |
| // destination read/write one; all expected to be memref/affine load/store. |
| auto hasNonAffineDep = [&](ArrayRef<Operation *> srcMemOps, |
| ArrayRef<Operation *> dstMemOps) { |
| return llvm::any_of(srcMemOps, [&](Operation *srcOp) { |
| Value srcMemref = getMemRef(srcOp); |
| if (srcMemref != memref) |
| return false; |
| return llvm::find_if(dstMemOps, [&](Operation *dstOp) { |
| return srcMemref == getMemRef(dstOp); |
| }) != dstMemOps.end(); |
| }); |
| }; |
| |
| SmallVector<Operation *> dstOps; |
| // Between non-affine src stores and dst load/store. |
| llvm::append_range(dstOps, llvm::concat<Operation *const>( |
| dstNode.loads, dstNode.stores, |
| dstNode.memrefLoads, dstNode.memrefStores)); |
| if (hasNonAffineDep(srcNode.memrefStores, dstOps)) |
| return true; |
| // Between non-affine loads and dst stores. |
| dstOps.clear(); |
| llvm::append_range(dstOps, llvm::concat<Operation *const>( |
| dstNode.stores, dstNode.memrefStores)); |
| if (hasNonAffineDep(srcNode.memrefLoads, dstOps)) |
| return true; |
| // Between affine stores and memref load/stores. |
| dstOps.clear(); |
| llvm::append_range(dstOps, llvm::concat<Operation *const>( |
| dstNode.memrefLoads, dstNode.memrefStores)); |
| if (hasNonAffineDep(srcNode.stores, dstOps)) |
| return true; |
| // Between affine loads and memref stores. |
| dstOps.clear(); |
| llvm::append_range(dstOps, dstNode.memrefStores); |
| if (hasNonAffineDep(srcNode.loads, dstOps)) |
| return true; |
| |
| // Affine load/store pairs. We don't need to check for locally allocated |
| // memrefs since the dependence analysis here is between mem ops from |
| // srcNode's for op to dstNode's for op at the depth at which those |
| // `affine.for` ops are nested, i.e., dependences at depth `d + 1` where |
| // `d` is the number of common surrounding loops. |
| for (auto *srcMemOp : |
| llvm::concat<Operation *const>(srcNode.stores, srcNode.loads)) { |
| MemRefAccess srcAcc(srcMemOp); |
| if (srcAcc.memref != memref) |
| continue; |
| for (auto *destMemOp : |
| llvm::concat<Operation *const>(dstNode.stores, dstNode.loads)) { |
| MemRefAccess destAcc(destMemOp); |
| if (destAcc.memref != memref) |
| continue; |
| // Check for a top-level dependence between srcNode and destNode's ops. |
| if (!noDependence(checkMemrefAccessDependence( |
| srcAcc, destAcc, getNestingDepth(srcNode.op) + 1))) |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| bool MemRefDependenceGraph::init(bool fullAffineDependences) { |
| LDBG() << "--- Initializing MDG ---"; |
| // Map from a memref to the set of ids of the nodes that have ops accessing |
| // the memref. |
| DenseMap<Value, SetVector<unsigned>> memrefAccesses; |
| |
| // Create graph nodes. |
| DenseMap<Operation *, unsigned> forToNodeMap; |
| for (Operation &op : block) { |
| if (auto forOp = dyn_cast<AffineForOp>(op)) { |
| Node *node = addNodeToMDG(&op, *this, memrefAccesses); |
| if (!node) |
| return false; |
| forToNodeMap[&op] = node->id; |
| } else if (isa<AffineReadOpInterface>(op)) { |
| // Create graph node for top-level load op. |
| Node node(nextNodeId++, &op); |
| node.loads.push_back(&op); |
| auto memref = cast<AffineReadOpInterface>(op).getMemRef(); |
| memrefAccesses[memref].insert(node.id); |
| nodes.insert({node.id, node}); |
| } else if (isa<AffineWriteOpInterface>(op)) { |
| // Create graph node for top-level store op. |
| Node node(nextNodeId++, &op); |
| node.stores.push_back(&op); |
| auto memref = cast<AffineWriteOpInterface>(op).getMemRef(); |
| memrefAccesses[memref].insert(node.id); |
| nodes.insert({node.id, node}); |
| } else if (op.getNumResults() > 0 && !op.use_empty()) { |
| // Create graph node for top-level producer of SSA values, which |
| // could be used by loop nest nodes. |
| Node *node = addNodeToMDG(&op, *this, memrefAccesses); |
| if (!node) |
| return false; |
| } else if (!isMemoryEffectFree(&op) && |
| (op.getNumRegions() == 0 || isa<RegionBranchOpInterface>(op))) { |
| // Create graph node for top-level op unless it is known to be |
| // memory-effect free. This covers all unknown/unregistered ops, |
| // non-affine ops with memory effects, and region-holding ops with a |
| // well-defined control flow. During the fusion validity checks, edges |
| // to/from these ops get looked at. |
| Node *node = addNodeToMDG(&op, *this, memrefAccesses); |
| if (!node) |
| return false; |
| } else if (op.getNumRegions() != 0 && !isa<RegionBranchOpInterface>(op)) { |
| // Return false if non-handled/unknown region-holding ops are found. We |
| // won't know what such ops do or what its regions mean; for e.g., it may |
| // not be an imperative op. |
| LDBG() << "MDG init failed; unknown region-holding op found!"; |
| return false; |
| } |
| // We aren't creating nodes for memory-effect free ops either with no |
| // regions (unless it has results being used) or those with branch op |
| // interface. |
| } |
| |
| LDBG() << "Created " << nodes.size() << " nodes"; |
| |
| // Add dependence edges between nodes which produce SSA values and their |
| // users. Load ops can be considered as the ones producing SSA values. |
| for (auto &idAndNode : nodes) { |
| const Node &node = idAndNode.second; |
| // Stores don't define SSA values, skip them. |
| if (!node.stores.empty()) |
| continue; |
| Operation *opInst = node.op; |
| for (Value value : opInst->getResults()) { |
| for (Operation *user : value.getUsers()) { |
| // Ignore users outside of the block. |
| if (block.getParent()->findAncestorOpInRegion(*user)->getBlock() != |
| &block) |
| continue; |
| SmallVector<AffineForOp, 4> loops; |
| getAffineForIVs(*user, &loops); |
| // Find the surrounding affine.for nested immediately within the |
| // block. |
| auto *it = llvm::find_if(loops, [&](AffineForOp loop) { |
| return loop->getBlock() == █ |
| }); |
| if (it == loops.end()) |
| continue; |
| assert(forToNodeMap.count(*it) > 0 && "missing mapping"); |
| unsigned userLoopNestId = forToNodeMap[*it]; |
| addEdge(node.id, userLoopNestId, value); |
| } |
| } |
| } |
| |
| // Walk memref access lists and add graph edges between dependent nodes. |
| for (auto &memrefAndList : memrefAccesses) { |
| unsigned n = memrefAndList.second.size(); |
| Value srcMemRef = memrefAndList.first; |
| // Add edges between all dependent pairs among the node IDs on this memref. |
| for (unsigned i = 0; i < n; ++i) { |
| unsigned srcId = memrefAndList.second[i]; |
| Node *srcNode = getNode(srcId); |
| bool srcHasStoreOrFree = |
| srcNode->hasStore(srcMemRef) || srcNode->hasFree(srcMemRef); |
| for (unsigned j = i + 1; j < n; ++j) { |
| unsigned dstId = memrefAndList.second[j]; |
| Node *dstNode = getNode(dstId); |
| bool dstHasStoreOrFree = |
| dstNode->hasStore(srcMemRef) || dstNode->hasFree(srcMemRef); |
| if ((srcHasStoreOrFree || dstHasStoreOrFree)) { |
| // Check precise affine deps if asked for; otherwise, conservative. |
| if (!fullAffineDependences || |
| mayDependence(*srcNode, *dstNode, srcMemRef)) |
| addEdge(srcId, dstId, srcMemRef); |
| } |
| } |
| } |
| } |
| return true; |
| } |
| |
| // Returns the graph node for 'id'. |
| const Node *MemRefDependenceGraph::getNode(unsigned id) const { |
| auto it = nodes.find(id); |
| assert(it != nodes.end()); |
| return &it->second; |
| } |
| |
| // Returns the graph node for 'forOp'. |
| const Node *MemRefDependenceGraph::getForOpNode(AffineForOp forOp) const { |
| for (auto &idAndNode : nodes) |
| if (idAndNode.second.op == forOp) |
| return &idAndNode.second; |
| return nullptr; |
| } |
| |
| // Adds a node with 'op' to the graph and returns its unique identifier. |
| unsigned MemRefDependenceGraph::addNode(Operation *op) { |
| Node node(nextNodeId++, op); |
| nodes.insert({node.id, node}); |
| return node.id; |
| } |
| |
| // Remove node 'id' (and its associated edges) from graph. |
| void MemRefDependenceGraph::removeNode(unsigned id) { |
| // Remove each edge in 'inEdges[id]'. |
| if (inEdges.count(id) > 0) { |
| SmallVector<Edge, 2> oldInEdges = inEdges[id]; |
| for (auto &inEdge : oldInEdges) { |
| removeEdge(inEdge.id, id, inEdge.value); |
| } |
| } |
| // Remove each edge in 'outEdges[id]'. |
| if (outEdges.contains(id)) { |
| SmallVector<Edge, 2> oldOutEdges = outEdges[id]; |
| for (auto &outEdge : oldOutEdges) { |
| removeEdge(id, outEdge.id, outEdge.value); |
| } |
| } |
| // Erase remaining node state. |
| inEdges.erase(id); |
| outEdges.erase(id); |
| nodes.erase(id); |
| } |
| |
| // Returns true if node 'id' writes to any memref which escapes (or is an |
| // argument to) the block. Returns false otherwise. |
| bool MemRefDependenceGraph::writesToLiveInOrEscapingMemrefs(unsigned id) const { |
| const Node *node = getNode(id); |
| for (auto *storeOpInst : node->stores) { |
| auto memref = cast<AffineWriteOpInterface>(storeOpInst).getMemRef(); |
| auto *op = memref.getDefiningOp(); |
| // Return true if 'memref' is a block argument. |
| if (!op) |
| return true; |
| // Return true if any use of 'memref' does not deference it in an affine |
| // way. |
| for (auto *user : memref.getUsers()) |
| if (!isa<AffineMapAccessInterface>(*user)) |
| return true; |
| } |
| return false; |
| } |
| |
| // Returns true iff there is an edge from node 'srcId' to node 'dstId' which |
| // is for 'value' if non-null, or for any value otherwise. Returns false |
| // otherwise. |
| bool MemRefDependenceGraph::hasEdge(unsigned srcId, unsigned dstId, |
| Value value) const { |
| if (!outEdges.contains(srcId) || !inEdges.contains(dstId)) { |
| return false; |
| } |
| bool hasOutEdge = llvm::any_of(outEdges.lookup(srcId), [=](const Edge &edge) { |
| return edge.id == dstId && (!value || edge.value == value); |
| }); |
| bool hasInEdge = llvm::any_of(inEdges.lookup(dstId), [=](const Edge &edge) { |
| return edge.id == srcId && (!value || edge.value == value); |
| }); |
| return hasOutEdge && hasInEdge; |
| } |
| |
| // Adds an edge from node 'srcId' to node 'dstId' for 'value'. |
| void MemRefDependenceGraph::addEdge(unsigned srcId, unsigned dstId, |
| Value value) { |
| if (!hasEdge(srcId, dstId, value)) { |
| outEdges[srcId].push_back({dstId, value}); |
| inEdges[dstId].push_back({srcId, value}); |
| if (isa<MemRefType>(value.getType())) |
| memrefEdgeCount[value]++; |
| } |
| } |
| |
| // Removes an edge from node 'srcId' to node 'dstId' for 'value'. |
| void MemRefDependenceGraph::removeEdge(unsigned srcId, unsigned dstId, |
| Value value) { |
| assert(inEdges.count(dstId) > 0); |
| assert(outEdges.count(srcId) > 0); |
| if (isa<MemRefType>(value.getType())) { |
| assert(memrefEdgeCount.count(value) > 0); |
| memrefEdgeCount[value]--; |
| } |
| // Remove 'srcId' from 'inEdges[dstId]'. |
| for (auto *it = inEdges[dstId].begin(); it != inEdges[dstId].end(); ++it) { |
| if ((*it).id == srcId && (*it).value == value) { |
| inEdges[dstId].erase(it); |
| break; |
| } |
| } |
| // Remove 'dstId' from 'outEdges[srcId]'. |
| for (auto *it = outEdges[srcId].begin(); it != outEdges[srcId].end(); ++it) { |
| if ((*it).id == dstId && (*it).value == value) { |
| outEdges[srcId].erase(it); |
| break; |
| } |
| } |
| } |
| |
| // Returns true if there is a path in the dependence graph from node 'srcId' |
| // to node 'dstId'. Returns false otherwise. `srcId`, `dstId`, and the |
| // operations that the edges connected are expected to be from the same block. |
| bool MemRefDependenceGraph::hasDependencePath(unsigned srcId, |
| unsigned dstId) const { |
| // Worklist state is: <node-id, next-output-edge-index-to-visit> |
| SmallVector<std::pair<unsigned, unsigned>, 4> worklist; |
| worklist.push_back({srcId, 0}); |
| Operation *dstOp = getNode(dstId)->op; |
| // Run DFS traversal to see if 'dstId' is reachable from 'srcId'. |
| while (!worklist.empty()) { |
| auto &idAndIndex = worklist.back(); |
| // Return true if we have reached 'dstId'. |
| if (idAndIndex.first == dstId) |
| return true; |
| // Pop and continue if node has no out edges, or if all out edges have |
| // already been visited. |
| if (!outEdges.contains(idAndIndex.first) || |
| idAndIndex.second == outEdges.lookup(idAndIndex.first).size()) { |
| worklist.pop_back(); |
| continue; |
| } |
| // Get graph edge to traverse. |
| const Edge edge = outEdges.lookup(idAndIndex.first)[idAndIndex.second]; |
| // Increment next output edge index for 'idAndIndex'. |
| ++idAndIndex.second; |
| // Add node at 'edge.id' to the worklist. We don't need to consider |
| // nodes that are "after" dstId in the containing block; one can't have a |
| // path to `dstId` from any of those nodes. |
| bool afterDst = dstOp->isBeforeInBlock(getNode(edge.id)->op); |
| if (!afterDst && edge.id != idAndIndex.first) |
| worklist.push_back({edge.id, 0}); |
| } |
| return false; |
| } |
| |
| // Returns the input edge count for node 'id' and 'memref' from src nodes |
| // which access 'memref' with a store operation. |
| unsigned MemRefDependenceGraph::getIncomingMemRefAccesses(unsigned id, |
| Value memref) const { |
| unsigned inEdgeCount = 0; |
| for (const Edge &inEdge : inEdges.lookup(id)) { |
| if (inEdge.value == memref) { |
| const Node *srcNode = getNode(inEdge.id); |
| // Only count in edges from 'srcNode' if 'srcNode' accesses 'memref' |
| if (srcNode->getStoreOpCount(memref) > 0) |
| ++inEdgeCount; |
| } |
| } |
| return inEdgeCount; |
| } |
| |
| // Returns the output edge count for node 'id' and 'memref' (if non-null), |
| // otherwise returns the total output edge count from node 'id'. |
| unsigned MemRefDependenceGraph::getOutEdgeCount(unsigned id, |
| Value memref) const { |
| unsigned outEdgeCount = 0; |
| for (const auto &outEdge : outEdges.lookup(id)) |
| if (!memref || outEdge.value == memref) |
| ++outEdgeCount; |
| return outEdgeCount; |
| } |
| |
| /// Return all nodes which define SSA values used in node 'id'. |
| void MemRefDependenceGraph::gatherDefiningNodes( |
| unsigned id, DenseSet<unsigned> &definingNodes) const { |
| for (const Edge &edge : inEdges.lookup(id)) |
| // By definition of edge, if the edge value is a non-memref value, |
| // then the dependence is between a graph node which defines an SSA value |
| // and another graph node which uses the SSA value. |
| if (!isa<MemRefType>(edge.value.getType())) |
| definingNodes.insert(edge.id); |
| } |
| |
| // Computes and returns an insertion point operation, before which the |
| // the fused <srcId, dstId> loop nest can be inserted while preserving |
| // dependences. Returns nullptr if no such insertion point is found. |
| Operation * |
| MemRefDependenceGraph::getFusedLoopNestInsertionPoint(unsigned srcId, |
| unsigned dstId) const { |
| if (!outEdges.contains(srcId)) |
| return getNode(dstId)->op; |
| |
| // Skip if there is any defining node of 'dstId' that depends on 'srcId'. |
| DenseSet<unsigned> definingNodes; |
| gatherDefiningNodes(dstId, definingNodes); |
| if (llvm::any_of(definingNodes, |
| [&](unsigned id) { return hasDependencePath(srcId, id); })) { |
| LDBG() << "Can't fuse: a defining op with a user in the dst " |
| << "loop has dependence from the src loop"; |
| return nullptr; |
| } |
| |
| // Build set of insts in range (srcId, dstId) which depend on 'srcId'. |
| llvm::SmallPtrSet<Operation *, 2> srcDepInsts; |
| for (auto &outEdge : outEdges.lookup(srcId)) |
| if (outEdge.id != dstId) |
| srcDepInsts.insert(getNode(outEdge.id)->op); |
| |
| // Build set of insts in range (srcId, dstId) on which 'dstId' depends. |
| llvm::SmallPtrSet<Operation *, 2> dstDepInsts; |
| for (auto &inEdge : inEdges.lookup(dstId)) |
| if (inEdge.id != srcId) |
| dstDepInsts.insert(getNode(inEdge.id)->op); |
| |
| Operation *srcNodeInst = getNode(srcId)->op; |
| Operation *dstNodeInst = getNode(dstId)->op; |
| |
| // Computing insertion point: |
| // *) Walk all operation positions in Block operation list in the |
| // range (src, dst). For each operation 'op' visited in this search: |
| // *) Store in 'firstSrcDepPos' the first position where 'op' has a |
| // dependence edge from 'srcNode'. |
| // *) Store in 'lastDstDepPost' the last position where 'op' has a |
| // dependence edge to 'dstNode'. |
| // *) Compare 'firstSrcDepPos' and 'lastDstDepPost' to determine the |
| // operation insertion point (or return null pointer if no such |
| // insertion point exists: 'firstSrcDepPos' <= 'lastDstDepPos'). |
| SmallVector<Operation *, 2> depInsts; |
| std::optional<unsigned> firstSrcDepPos; |
| std::optional<unsigned> lastDstDepPos; |
| unsigned pos = 0; |
| for (Block::iterator it = std::next(Block::iterator(srcNodeInst)); |
| it != Block::iterator(dstNodeInst); ++it) { |
| Operation *op = &(*it); |
| if (srcDepInsts.count(op) > 0 && firstSrcDepPos == std::nullopt) |
| firstSrcDepPos = pos; |
| if (dstDepInsts.count(op) > 0) |
| lastDstDepPos = pos; |
| depInsts.push_back(op); |
| ++pos; |
| } |
| |
| if (firstSrcDepPos.has_value()) { |
| if (lastDstDepPos.has_value()) { |
| if (*firstSrcDepPos <= *lastDstDepPos) { |
| // No valid insertion point exists which preserves dependences. |
| return nullptr; |
| } |
| } |
| // Return the insertion point at 'firstSrcDepPos'. |
| return depInsts[*firstSrcDepPos]; |
| } |
| // No dependence targets in range (or only dst deps in range), return |
| // 'dstNodInst' insertion point. |
| return dstNodeInst; |
| } |
| |
| // Updates edge mappings from node 'srcId' to node 'dstId' after fusing them, |
| // taking into account that: |
| // *) if 'removeSrcId' is true, 'srcId' will be removed after fusion, |
| // *) memrefs in 'privateMemRefs' has been replaced in node at 'dstId' by a |
| // private memref. |
| void MemRefDependenceGraph::updateEdges(unsigned srcId, unsigned dstId, |
| const DenseSet<Value> &privateMemRefs, |
| bool removeSrcId) { |
| // For each edge in 'inEdges[srcId]': add new edge remapping to 'dstId'. |
| if (inEdges.count(srcId) > 0) { |
| SmallVector<Edge, 2> oldInEdges = inEdges[srcId]; |
| for (auto &inEdge : oldInEdges) { |
| // Add edge from 'inEdge.id' to 'dstId' if it's not a private memref. |
| if (!privateMemRefs.contains(inEdge.value)) |
| addEdge(inEdge.id, dstId, inEdge.value); |
| } |
| } |
| // For each edge in 'outEdges[srcId]': remove edge from 'srcId' to 'dstId'. |
| // If 'srcId' is going to be removed, remap all the out edges to 'dstId'. |
| if (outEdges.count(srcId) > 0) { |
| SmallVector<Edge, 2> oldOutEdges = outEdges[srcId]; |
| for (auto &outEdge : oldOutEdges) { |
| // Remove any out edges from 'srcId' to 'dstId' across memrefs. |
| if (outEdge.id == dstId) |
| removeEdge(srcId, outEdge.id, outEdge.value); |
| else if (removeSrcId) { |
| addEdge(dstId, outEdge.id, outEdge.value); |
| removeEdge(srcId, outEdge.id, outEdge.value); |
| } |
| } |
| } |
| // Remove any edges in 'inEdges[dstId]' on 'oldMemRef' (which is being |
| // replaced by a private memref). These edges could come from nodes |
| // other than 'srcId' which were removed in the previous step. |
| if (inEdges.count(dstId) > 0 && !privateMemRefs.empty()) { |
| SmallVector<Edge, 2> oldInEdges = inEdges[dstId]; |
| for (auto &inEdge : oldInEdges) |
| if (privateMemRefs.count(inEdge.value) > 0) |
| removeEdge(inEdge.id, dstId, inEdge.value); |
| } |
| } |
| |
| // Update edge mappings for nodes 'sibId' and 'dstId' to reflect fusion |
| // of sibling node 'sibId' into node 'dstId'. |
| void MemRefDependenceGraph::updateEdges(unsigned sibId, unsigned dstId) { |
| // For each edge in 'inEdges[sibId]': |
| // *) Add new edge from source node 'inEdge.id' to 'dstNode'. |
| // *) Remove edge from source node 'inEdge.id' to 'sibNode'. |
| if (inEdges.count(sibId) > 0) { |
| SmallVector<Edge, 2> oldInEdges = inEdges[sibId]; |
| for (auto &inEdge : oldInEdges) { |
| addEdge(inEdge.id, dstId, inEdge.value); |
| removeEdge(inEdge.id, sibId, inEdge.value); |
| } |
| } |
| |
| // For each edge in 'outEdges[sibId]' to node 'id' |
| // *) Add new edge from 'dstId' to 'outEdge.id'. |
| // *) Remove edge from 'sibId' to 'outEdge.id'. |
| if (outEdges.count(sibId) > 0) { |
| SmallVector<Edge, 2> oldOutEdges = outEdges[sibId]; |
| for (auto &outEdge : oldOutEdges) { |
| addEdge(dstId, outEdge.id, outEdge.value); |
| removeEdge(sibId, outEdge.id, outEdge.value); |
| } |
| } |
| } |
| |
| // Adds ops in 'loads' and 'stores' to node at 'id'. |
| void MemRefDependenceGraph::addToNode(unsigned id, ArrayRef<Operation *> loads, |
| ArrayRef<Operation *> stores, |
| ArrayRef<Operation *> memrefLoads, |
| ArrayRef<Operation *> memrefStores, |
| ArrayRef<Operation *> memrefFrees) { |
| Node *node = getNode(id); |
| llvm::append_range(node->loads, loads); |
| llvm::append_range(node->stores, stores); |
| llvm::append_range(node->memrefLoads, memrefLoads); |
| llvm::append_range(node->memrefStores, memrefStores); |
| llvm::append_range(node->memrefFrees, memrefFrees); |
| } |
| |
| void MemRefDependenceGraph::clearNodeLoadAndStores(unsigned id) { |
| Node *node = getNode(id); |
| node->loads.clear(); |
| node->stores.clear(); |
| } |
| |
| // Calls 'callback' for each input edge incident to node 'id' which carries a |
| // memref dependence. |
| void MemRefDependenceGraph::forEachMemRefInputEdge( |
| unsigned id, const std::function<void(Edge)> &callback) { |
| if (inEdges.count(id) > 0) |
| forEachMemRefEdge(inEdges.at(id), callback); |
| } |
| |
| // Calls 'callback' for each output edge from node 'id' which carries a |
| // memref dependence. |
| void MemRefDependenceGraph::forEachMemRefOutputEdge( |
| unsigned id, const std::function<void(Edge)> &callback) { |
| if (outEdges.count(id) > 0) |
| forEachMemRefEdge(outEdges.at(id), callback); |
| } |
| |
| // Calls 'callback' for each edge in 'edges' which carries a memref |
| // dependence. |
| void MemRefDependenceGraph::forEachMemRefEdge( |
| ArrayRef<Edge> edges, const std::function<void(Edge)> &callback) { |
| for (const auto &edge : edges) { |
| // Skip if 'edge' is not a memref dependence edge. |
| if (!isa<MemRefType>(edge.value.getType())) |
| continue; |
| assert(nodes.count(edge.id) > 0); |
| // Visit current input edge 'edge'. |
| callback(edge); |
| } |
| } |
| |
| void MemRefDependenceGraph::print(raw_ostream &os) const { |
| os << "\nMemRefDependenceGraph\n"; |
| os << "\nNodes:\n"; |
| for (const auto &idAndNode : nodes) { |
| os << "Node: " << idAndNode.first << "\n"; |
| auto it = inEdges.find(idAndNode.first); |
| if (it != inEdges.end()) { |
| for (const auto &e : it->second) |
| os << " InEdge: " << e.id << " " << e.value << "\n"; |
| } |
| it = outEdges.find(idAndNode.first); |
| if (it != outEdges.end()) { |
| for (const auto &e : it->second) |
| os << " OutEdge: " << e.id << " " << e.value << "\n"; |
| } |
| } |
| } |
| |
| void mlir::affine::getAffineForIVs(Operation &op, |
| SmallVectorImpl<AffineForOp> *loops) { |
| auto *currOp = op.getParentOp(); |
| AffineForOp currAffineForOp; |
| // Traverse up the hierarchy collecting all 'affine.for' operation while |
| // skipping over 'affine.if' operations. |
| while (currOp && !currOp->hasTrait<OpTrait::AffineScope>()) { |
| if (auto currAffineForOp = dyn_cast<AffineForOp>(currOp)) |
| loops->push_back(currAffineForOp); |
| currOp = currOp->getParentOp(); |
| } |
| std::reverse(loops->begin(), loops->end()); |
| } |
| |
| void mlir::affine::getEnclosingAffineOps(Operation &op, |
| SmallVectorImpl<Operation *> *ops) { |
| ops->clear(); |
| Operation *currOp = op.getParentOp(); |
| |
| // Traverse up the hierarchy collecting all `affine.for`, `affine.if`, and |
| // affine.parallel operations. |
| while (currOp && !currOp->hasTrait<OpTrait::AffineScope>()) { |
| if (isa<AffineIfOp, AffineForOp, AffineParallelOp>(currOp)) |
| ops->push_back(currOp); |
| currOp = currOp->getParentOp(); |
| } |
| std::reverse(ops->begin(), ops->end()); |
| } |
| |
| // Populates 'cst' with FlatAffineValueConstraints which represent original |
| // domain of the loop bounds that define 'ivs'. |
| LogicalResult ComputationSliceState::getSourceAsConstraints( |
| FlatAffineValueConstraints &cst) const { |
| assert(!ivs.empty() && "Cannot have a slice without its IVs"); |
| cst = FlatAffineValueConstraints(/*numDims=*/ivs.size(), /*numSymbols=*/0, |
| /*numLocals=*/0, ivs); |
| for (Value iv : ivs) { |
| AffineForOp loop = getForInductionVarOwner(iv); |
| assert(loop && "Expected affine for"); |
| if (failed(cst.addAffineForOpDomain(loop))) |
| return failure(); |
| } |
| return success(); |
| } |
| |
| // Populates 'cst' with FlatAffineValueConstraints which represent slice bounds. |
| LogicalResult |
| ComputationSliceState::getAsConstraints(FlatAffineValueConstraints *cst) const { |
| assert(!lbOperands.empty()); |
| // Adds src 'ivs' as dimension variables in 'cst'. |
| unsigned numDims = ivs.size(); |
| // Adds operands (dst ivs and symbols) as symbols in 'cst'. |
| unsigned numSymbols = lbOperands[0].size(); |
| |
| SmallVector<Value, 4> values(ivs); |
| // Append 'ivs' then 'operands' to 'values'. |
| values.append(lbOperands[0].begin(), lbOperands[0].end()); |
| *cst = FlatAffineValueConstraints(numDims, numSymbols, 0, values); |
| |
| // Add loop bound constraints for values which are loop IVs of the destination |
| // of fusion and equality constraints for symbols which are constants. |
| for (unsigned i = numDims, end = values.size(); i < end; ++i) { |
| Value value = values[i]; |
| assert(cst->containsVar(value) && "value expected to be present"); |
| if (isValidSymbol(value)) { |
| // Check if the symbol is a constant. |
| if (std::optional<int64_t> cOp = getConstantIntValue(value)) |
| cst->addBound(BoundType::EQ, value, cOp.value()); |
| } else if (auto loop = getForInductionVarOwner(value)) { |
| if (failed(cst->addAffineForOpDomain(loop))) |
| return failure(); |
| } |
| } |
| |
| // Add slices bounds on 'ivs' using maps 'lbs'/'ubs' with 'lbOperands[0]' |
| LogicalResult ret = cst->addSliceBounds(ivs, lbs, ubs, lbOperands[0]); |
| assert(succeeded(ret) && |
| "should not fail as we never have semi-affine slice maps"); |
| (void)ret; |
| return success(); |
| } |
| |
| // Clears state bounds and operand state. |
| void ComputationSliceState::clearBounds() { |
| lbs.clear(); |
| ubs.clear(); |
| lbOperands.clear(); |
| ubOperands.clear(); |
| } |
| |
| void ComputationSliceState::dump() const { |
| llvm::errs() << "\tIVs:\n"; |
| for (Value iv : ivs) |
| llvm::errs() << "\t\t" << iv << "\n"; |
| |
| llvm::errs() << "\tLBs:\n"; |
| for (auto en : llvm::enumerate(lbs)) { |
| llvm::errs() << "\t\t" << en.value() << "\n"; |
| llvm::errs() << "\t\tOperands:\n"; |
| for (Value lbOp : lbOperands[en.index()]) |
| llvm::errs() << "\t\t\t" << lbOp << "\n"; |
| } |
| |
| llvm::errs() << "\tUBs:\n"; |
| for (auto en : llvm::enumerate(ubs)) { |
| llvm::errs() << "\t\t" << en.value() << "\n"; |
| llvm::errs() << "\t\tOperands:\n"; |
| for (Value ubOp : ubOperands[en.index()]) |
| llvm::errs() << "\t\t\t" << ubOp << "\n"; |
| } |
| } |
| |
| /// Fast check to determine if the computation slice is maximal. Returns true if |
| /// each slice dimension maps to an existing dst dimension and both the src |
| /// and the dst loops for those dimensions have the same bounds. Returns false |
| /// if both the src and the dst loops don't have the same bounds. Returns |
| /// std::nullopt if none of the above can be proven. |
| std::optional<bool> ComputationSliceState::isSliceMaximalFastCheck() const { |
| assert(lbs.size() == ubs.size() && !lbs.empty() && !ivs.empty() && |
| "Unexpected number of lbs, ubs and ivs in slice"); |
| |
| for (unsigned i = 0, end = lbs.size(); i < end; ++i) { |
| AffineMap lbMap = lbs[i]; |
| AffineMap ubMap = ubs[i]; |
| |
| // Check if this slice is just an equality along this dimension. |
| if (!lbMap || !ubMap || lbMap.getNumResults() != 1 || |
| ubMap.getNumResults() != 1 || |
| lbMap.getResult(0) + 1 != ubMap.getResult(0) || |
| // The condition above will be true for maps describing a single |
| // iteration (e.g., lbMap.getResult(0) = 0, ubMap.getResult(0) = 1). |
| // Make sure we skip those cases by checking that the lb result is not |
| // just a constant. |
| isa<AffineConstantExpr>(lbMap.getResult(0))) |
| return std::nullopt; |
| |
| // Limited support: we expect the lb result to be just a loop dimension for |
| // now. |
| AffineDimExpr result = dyn_cast<AffineDimExpr>(lbMap.getResult(0)); |
| if (!result) |
| return std::nullopt; |
| |
| // Retrieve dst loop bounds. |
| AffineForOp dstLoop = |
| getForInductionVarOwner(lbOperands[i][result.getPosition()]); |
| if (!dstLoop) |
| return std::nullopt; |
| AffineMap dstLbMap = dstLoop.getLowerBoundMap(); |
| AffineMap dstUbMap = dstLoop.getUpperBoundMap(); |
| |
| // Retrieve src loop bounds. |
| AffineForOp srcLoop = getForInductionVarOwner(ivs[i]); |
| assert(srcLoop && "Expected affine for"); |
| AffineMap srcLbMap = srcLoop.getLowerBoundMap(); |
| AffineMap srcUbMap = srcLoop.getUpperBoundMap(); |
| |
| // Limited support: we expect simple src and dst loops with a single |
| // constant component per bound for now. |
| if (srcLbMap.getNumResults() != 1 || srcUbMap.getNumResults() != 1 || |
| dstLbMap.getNumResults() != 1 || dstUbMap.getNumResults() != 1) |
| return std::nullopt; |
| |
| AffineExpr srcLbResult = srcLbMap.getResult(0); |
| AffineExpr dstLbResult = dstLbMap.getResult(0); |
| AffineExpr srcUbResult = srcUbMap.getResult(0); |
| AffineExpr dstUbResult = dstUbMap.getResult(0); |
| if (!isa<AffineConstantExpr>(srcLbResult) || |
| !isa<AffineConstantExpr>(srcUbResult) || |
| !isa<AffineConstantExpr>(dstLbResult) || |
| !isa<AffineConstantExpr>(dstUbResult)) |
| return std::nullopt; |
| |
| // Check if src and dst loop bounds are the same. If not, we can guarantee |
| // that the slice is not maximal. |
| if (srcLbResult != dstLbResult || srcUbResult != dstUbResult || |
| srcLoop.getStep() != dstLoop.getStep()) |
| return false; |
| } |
| |
| return true; |
| } |
| |
| /// Returns true if it is deterministically verified that the original iteration |
| /// space of the slice is contained within the new iteration space that is |
| /// created after fusing 'this' slice into its destination. |
| std::optional<bool> ComputationSliceState::isSliceValid() const { |
| // Fast check to determine if the slice is valid. If the following conditions |
| // are verified to be true, slice is declared valid by the fast check: |
| // 1. Each slice loop is a single iteration loop bound in terms of a single |
| // destination loop IV. |
| // 2. Loop bounds of the destination loop IV (from above) and those of the |
| // source loop IV are exactly the same. |
| // If the fast check is inconclusive or false, we proceed with a more |
| // expensive analysis. |
| // TODO: Store the result of the fast check, as it might be used again in |
| // `canRemoveSrcNodeAfterFusion`. |
| std::optional<bool> isValidFastCheck = isSliceMaximalFastCheck(); |
| if (isValidFastCheck && *isValidFastCheck) |
| return true; |
| |
| // Create constraints for the source loop nest using which slice is computed. |
| FlatAffineValueConstraints srcConstraints; |
| // TODO: Store the source's domain to avoid computation at each depth. |
| if (failed(getSourceAsConstraints(srcConstraints))) { |
| LDBG() << "Unable to compute source's domain"; |
| return std::nullopt; |
| } |
| // As the set difference utility currently cannot handle symbols in its |
| // operands, validity of the slice cannot be determined. |
| if (srcConstraints.getNumSymbolVars() > 0) { |
| LDBG() << "Cannot handle symbols in source domain"; |
| return std::nullopt; |
| } |
| // TODO: Handle local vars in the source domains while using the 'projectOut' |
| // utility below. Currently, aligning is not done assuming that there will be |
| // no local vars in the source domain. |
| if (srcConstraints.getNumLocalVars() != 0) { |
| LDBG() << "Cannot handle locals in source domain"; |
| return std::nullopt; |
| } |
| |
| // Create constraints for the slice loop nest that would be created if the |
| // fusion succeeds. |
| FlatAffineValueConstraints sliceConstraints; |
| if (failed(getAsConstraints(&sliceConstraints))) { |
| LDBG() << "Unable to compute slice's domain"; |
| return std::nullopt; |
| } |
| |
| // Projecting out every dimension other than the 'ivs' to express slice's |
| // domain completely in terms of source's IVs. |
| sliceConstraints.projectOut(ivs.size(), |
| sliceConstraints.getNumVars() - ivs.size()); |
| |
| LDBG() << "Domain of the source of the slice:\n" |
| << "Source constraints:" << srcConstraints |
| << "\nDomain of the slice if this fusion succeeds " |
| << "(expressed in terms of its source's IVs):\n" |
| << "Slice constraints:" << sliceConstraints; |
| |
| // TODO: Store 'srcSet' to avoid recalculating for each depth. |
| PresburgerSet srcSet(srcConstraints); |
| PresburgerSet sliceSet(sliceConstraints); |
| PresburgerSet diffSet = sliceSet.subtract(srcSet); |
| |
| if (!diffSet.isIntegerEmpty()) { |
| LDBG() << "Incorrect slice"; |
| return false; |
| } |
| return true; |
| } |
| |
| /// Returns true if the computation slice encloses all the iterations of the |
| /// sliced loop nest. Returns false if it does not. Returns std::nullopt if it |
| /// cannot determine if the slice is maximal or not. |
| std::optional<bool> ComputationSliceState::isMaximal() const { |
| // Fast check to determine if the computation slice is maximal. If the result |
| // is inconclusive, we proceed with a more expensive analysis. |
| std::optional<bool> isMaximalFastCheck = isSliceMaximalFastCheck(); |
| if (isMaximalFastCheck) |
| return isMaximalFastCheck; |
| |
| // Create constraints for the src loop nest being sliced. |
| FlatAffineValueConstraints srcConstraints(/*numDims=*/ivs.size(), |
| /*numSymbols=*/0, |
| /*numLocals=*/0, ivs); |
| for (Value iv : ivs) { |
| AffineForOp loop = getForInductionVarOwner(iv); |
| assert(loop && "Expected affine for"); |
| if (failed(srcConstraints.addAffineForOpDomain(loop))) |
| return std::nullopt; |
| } |
| |
| // Create constraints for the slice using the dst loop nest information. We |
| // retrieve existing dst loops from the lbOperands. |
| SmallVector<Value> consumerIVs; |
| for (Value lbOp : lbOperands[0]) |
| if (getForInductionVarOwner(lbOp)) |
| consumerIVs.push_back(lbOp); |
| |
| // Add empty IV Values for those new loops that are not equalities and, |
| // therefore, are not yet materialized in the IR. |
| for (int i = consumerIVs.size(), end = ivs.size(); i < end; ++i) |
| consumerIVs.push_back(Value()); |
| |
| FlatAffineValueConstraints sliceConstraints(/*numDims=*/consumerIVs.size(), |
| /*numSymbols=*/0, |
| /*numLocals=*/0, consumerIVs); |
| |
| if (failed(sliceConstraints.addDomainFromSliceMaps(lbs, ubs, lbOperands[0]))) |
| return std::nullopt; |
| |
| if (srcConstraints.getNumDimVars() != sliceConstraints.getNumDimVars()) |
| // Constraint dims are different. The integer set difference can't be |
| // computed so we don't know if the slice is maximal. |
| return std::nullopt; |
| |
| // Compute the difference between the src loop nest and the slice integer |
| // sets. |
| PresburgerSet srcSet(srcConstraints); |
| PresburgerSet sliceSet(sliceConstraints); |
| PresburgerSet diffSet = srcSet.subtract(sliceSet); |
| return diffSet.isIntegerEmpty(); |
| } |
| |
| unsigned MemRefRegion::getRank() const { |
| return cast<MemRefType>(memref.getType()).getRank(); |
| } |
| |
| std::optional<int64_t> MemRefRegion::getConstantBoundingSizeAndShape( |
| SmallVectorImpl<int64_t> *shape, SmallVectorImpl<AffineMap> *lbs) const { |
| auto memRefType = cast<MemRefType>(memref.getType()); |
| MLIRContext *context = memref.getContext(); |
| unsigned rank = memRefType.getRank(); |
| if (shape) |
| shape->reserve(rank); |
| |
| assert(rank == cst.getNumDimVars() && "inconsistent memref region"); |
| |
| // Use a copy of the region constraints that has upper/lower bounds for each |
| // memref dimension with static size added to guard against potential |
| // over-approximation from projection or union bounding box. We may not add |
| // this on the region itself since they might just be redundant constraints |
| // that will need non-trivials means to eliminate. |
| FlatLinearValueConstraints cstWithShapeBounds(cst); |
| for (unsigned r = 0; r < rank; r++) { |
| cstWithShapeBounds.addBound(BoundType::LB, r, 0); |
| int64_t dimSize = memRefType.getDimSize(r); |
| if (ShapedType::isDynamic(dimSize)) |
| continue; |
| cstWithShapeBounds.addBound(BoundType::UB, r, dimSize - 1); |
| } |
| |
| // Find a constant upper bound on the extent of this memref region along |
| // each dimension. |
| int64_t numElements = 1; |
| int64_t diffConstant; |
| for (unsigned d = 0; d < rank; d++) { |
| AffineMap lb; |
| std::optional<int64_t> diff = |
| cstWithShapeBounds.getConstantBoundOnDimSize(context, d, &lb); |
| if (diff.has_value()) { |
| diffConstant = *diff; |
| assert(diffConstant >= 0 && "dim size bound cannot be negative"); |
| } else { |
| // If no constant bound is found, then it can always be bound by the |
| // memref's dim size if the latter has a constant size along this dim. |
| auto dimSize = memRefType.getDimSize(d); |
| if (ShapedType::isDynamic(dimSize)) |
| return std::nullopt; |
| diffConstant = dimSize; |
| // Lower bound becomes 0. |
| lb = AffineMap::get(/*dimCount=*/0, cstWithShapeBounds.getNumSymbolVars(), |
| /*result=*/getAffineConstantExpr(0, context)); |
| } |
| numElements *= diffConstant; |
| // Populate outputs if available. |
| if (lbs) |
| lbs->push_back(lb); |
| if (shape) |
| shape->push_back(diffConstant); |
| } |
| return numElements; |
| } |
| |
| void MemRefRegion::getLowerAndUpperBound(unsigned pos, AffineMap &lbMap, |
| AffineMap &ubMap) const { |
| assert(pos < cst.getNumDimVars() && "invalid position"); |
| auto memRefType = cast<MemRefType>(memref.getType()); |
| unsigned rank = memRefType.getRank(); |
| |
| assert(rank == cst.getNumDimVars() && "inconsistent memref region"); |
| |
| auto boundPairs = cst.getLowerAndUpperBound( |
| pos, /*offset=*/0, /*num=*/rank, cst.getNumDimAndSymbolVars(), |
| /*localExprs=*/{}, memRefType.getContext()); |
| lbMap = boundPairs.first; |
| ubMap = boundPairs.second; |
| assert(lbMap && "lower bound for a region must exist"); |
| assert(ubMap && "upper bound for a region must exist"); |
| assert(lbMap.getNumInputs() == cst.getNumDimAndSymbolVars() - rank); |
| assert(ubMap.getNumInputs() == cst.getNumDimAndSymbolVars() - rank); |
| } |
| |
| LogicalResult MemRefRegion::unionBoundingBox(const MemRefRegion &other) { |
| assert(memref == other.memref); |
| return cst.unionBoundingBox(*other.getConstraints()); |
| } |
| |
| /// Computes the memory region accessed by this memref with the region |
| /// represented as constraints symbolic/parametric in 'loopDepth' loops |
| /// surrounding opInst and any additional Function symbols. |
| // For example, the memref region for this load operation at loopDepth = 1 will |
| // be as below: |
| // |
| // affine.for %i = 0 to 32 { |
| // affine.for %ii = %i to (d0) -> (d0 + 8) (%i) { |
| // load %A[%ii] |
| // } |
| // } |
| // |
| // region: {memref = %A, write = false, {%i <= m0 <= %i + 7} } |
| // The last field is a 2-d FlatAffineValueConstraints symbolic in %i. |
| // |
| // TODO: extend this to any other memref dereferencing ops |
| // (dma_start, dma_wait). |
| LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth, |
| const ComputationSliceState *sliceState, |
| bool addMemRefDimBounds, bool dropLocalVars, |
| bool dropOuterIvs) { |
| assert((isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) && |
| "affine read/write op expected"); |
| |
| MemRefAccess access(op); |
| memref = access.memref; |
| write = access.isStore(); |
| |
| unsigned rank = access.getRank(); |
| |
| LDBG() << "MemRefRegion::compute: " << *op << " depth: " << loopDepth; |
| |
| // 0-d memrefs. |
| if (rank == 0) { |
| SmallVector<Value, 4> ivs; |
| getAffineIVs(*op, ivs); |
| assert(loopDepth <= ivs.size() && "invalid 'loopDepth'"); |
| // The first 'loopDepth' IVs are symbols for this region. |
| ivs.resize(loopDepth); |
| // A 0-d memref has a 0-d region. |
| cst = FlatAffineValueConstraints(rank, loopDepth, /*numLocals=*/0, ivs); |
| return success(); |
| } |
| |
| // Build the constraints for this region. |
| AffineValueMap accessValueMap; |
| access.getAccessMap(&accessValueMap); |
| AffineMap accessMap = accessValueMap.getAffineMap(); |
| |
| unsigned numDims = accessMap.getNumDims(); |
| unsigned numSymbols = accessMap.getNumSymbols(); |
| unsigned numOperands = accessValueMap.getNumOperands(); |
| // Merge operands with slice operands. |
| SmallVector<Value, 4> operands; |
| operands.resize(numOperands); |
| for (unsigned i = 0; i < numOperands; ++i) |
| operands[i] = accessValueMap.getOperand(i); |
| |
| if (sliceState != nullptr) { |
| operands.reserve(operands.size() + sliceState->lbOperands[0].size()); |
| // Append slice operands to 'operands' as symbols. |
| for (auto extraOperand : sliceState->lbOperands[0]) { |
| if (!llvm::is_contained(operands, extraOperand)) { |
| operands.push_back(extraOperand); |
| numSymbols++; |
| } |
| } |
| } |
| // We'll first associate the dims and symbols of the access map to the dims |
| // and symbols resp. of cst. This will change below once cst is |
| // fully constructed out. |
| cst = FlatAffineValueConstraints(numDims, numSymbols, 0, operands); |
| |
| // Add equality constraints. |
| // Add inequalities for loop lower/upper bounds. |
| for (unsigned i = 0; i < numDims + numSymbols; ++i) { |
| auto operand = operands[i]; |
| if (auto affineFor = getForInductionVarOwner(operand)) { |
| // Note that cst can now have more dimensions than accessMap if the |
| // bounds expressions involve outer loops or other symbols. |
| // TODO: rewrite this to use getInstIndexSet; this way |
| // conditionals will be handled when the latter supports it. |
| if (failed(cst.addAffineForOpDomain(affineFor))) |
| return failure(); |
| } else if (auto parallelOp = getAffineParallelInductionVarOwner(operand)) { |
| if (failed(cst.addAffineParallelOpDomain(parallelOp))) |
| return failure(); |
| } else if (isValidSymbol(operand)) { |
| // Check if the symbol is a constant. |
| Value symbol = operand; |
| if (auto constVal = getConstantIntValue(symbol)) |
| cst.addBound(BoundType::EQ, symbol, constVal.value()); |
| } else { |
| LDBG() << "unknown affine dimensional value"; |
| return failure(); |
| } |
| } |
| |
| // Add lower/upper bounds on loop IVs using bounds from 'sliceState'. |
| if (sliceState != nullptr) { |
| // Add dim and symbol slice operands. |
| for (auto operand : sliceState->lbOperands[0]) { |
| if (failed(cst.addInductionVarOrTerminalSymbol(operand))) |
| return failure(); |
| } |
| // Add upper/lower bounds from 'sliceState' to 'cst'. |
| LogicalResult ret = |
| cst.addSliceBounds(sliceState->ivs, sliceState->lbs, sliceState->ubs, |
| sliceState->lbOperands[0]); |
| assert(succeeded(ret) && |
| "should not fail as we never have semi-affine slice maps"); |
| (void)ret; |
| } |
| |
| // Add access function equalities to connect loop IVs to data dimensions. |
| if (failed(cst.composeMap(&accessValueMap))) { |
| op->emitError("getMemRefRegion: compose affine map failed"); |
| LDBG() << "Access map: " << accessValueMap.getAffineMap(); |
| return failure(); |
| } |
| |
| // Set all variables appearing after the first 'rank' variables as |
| // symbolic variables - so that the ones corresponding to the memref |
| // dimensions are the dimensional variables for the memref region. |
| cst.setDimSymbolSeparation(cst.getNumDimAndSymbolVars() - rank); |
| |
| // Eliminate any loop IVs other than the outermost 'loopDepth' IVs, on which |
| // this memref region is symbolic. |
| SmallVector<Value, 4> enclosingIVs; |
| getAffineIVs(*op, enclosingIVs); |
| assert(loopDepth <= enclosingIVs.size() && "invalid loop depth"); |
| enclosingIVs.resize(loopDepth); |
| SmallVector<Value, 4> vars; |
| cst.getValues(cst.getNumDimVars(), cst.getNumDimAndSymbolVars(), &vars); |
| for (auto en : llvm::enumerate(vars)) { |
| if ((isAffineInductionVar(en.value())) && |
| !llvm::is_contained(enclosingIVs, en.value())) { |
| if (dropOuterIvs) { |
| cst.projectOut(en.value()); |
| } else { |
| unsigned varPosition; |
| cst.findVar(en.value(), &varPosition); |
| auto varKind = cst.getVarKindAt(varPosition); |
| varPosition -= cst.getNumDimVars(); |
| cst.convertToLocal(varKind, varPosition, varPosition + 1); |
| } |
| } |
| } |
| |
| // Project out any local variables (these would have been added for any |
| // mod/divs) if specified. |
| if (dropLocalVars) |
| cst.projectOut(cst.getNumDimAndSymbolVars(), cst.getNumLocalVars()); |
| |
| // Constant fold any symbolic variables. |
| cst.constantFoldVarRange(/*pos=*/cst.getNumDimVars(), |
| /*num=*/cst.getNumSymbolVars()); |
| |
| assert(cst.getNumDimVars() == rank && "unexpected MemRefRegion format"); |
| |
| // Add upper/lower bounds for each memref dimension with static size |
| // to guard against potential over-approximation from projection. |
| // TODO: Support dynamic memref dimensions. |
| if (addMemRefDimBounds) { |
| auto memRefType = cast<MemRefType>(memref.getType()); |
| for (unsigned r = 0; r < rank; r++) { |
| cst.addBound(BoundType::LB, /*pos=*/r, /*value=*/0); |
| if (memRefType.isDynamicDim(r)) |
| continue; |
| cst.addBound(BoundType::UB, /*pos=*/r, memRefType.getDimSize(r) - 1); |
| } |
| } |
| cst.removeTrivialRedundancy(); |
| |
| LDBG() << "Memory region: " << cst; |
| return success(); |
| } |
| |
| std::optional<int64_t> |
| mlir::affine::getMemRefIntOrFloatEltSizeInBytes(MemRefType memRefType) { |
| auto elementType = memRefType.getElementType(); |
| |
| unsigned sizeInBits; |
| if (elementType.isIntOrFloat()) { |
| sizeInBits = elementType.getIntOrFloatBitWidth(); |
| } else if (auto vectorType = dyn_cast<VectorType>(elementType)) { |
| if (vectorType.getElementType().isIntOrFloat()) |
| sizeInBits = |
| vectorType.getElementTypeBitWidth() * vectorType.getNumElements(); |
| else |
| return std::nullopt; |
| } else { |
| return std::nullopt; |
| } |
| return llvm::divideCeil(sizeInBits, 8); |
| } |
| |
| // Returns the size of the region. |
| std::optional<int64_t> MemRefRegion::getRegionSize() { |
| auto memRefType = cast<MemRefType>(memref.getType()); |
| |
| if (!memRefType.getLayout().isIdentity()) { |
| LDBG() << "Non-identity layout map not yet supported"; |
| return false; |
| } |
| |
| // Compute the extents of the buffer. |
| std::optional<int64_t> numElements = getConstantBoundingSizeAndShape(); |
| if (!numElements) { |
| LDBG() << "Dynamic shapes not yet supported"; |
| return std::nullopt; |
| } |
| auto eltSize = getMemRefIntOrFloatEltSizeInBytes(memRefType); |
| if (!eltSize) |
| return std::nullopt; |
| return *eltSize * *numElements; |
| } |
| |
| /// Returns the size of memref data in bytes if it's statically shaped, |
| /// std::nullopt otherwise. If the element of the memref has vector type, takes |
| /// into account size of the vector as well. |
| // TODO: improve/complete this when we have target data. |
| std::optional<uint64_t> |
| mlir::affine::getIntOrFloatMemRefSizeInBytes(MemRefType memRefType) { |
| if (!memRefType.hasStaticShape()) |
| return std::nullopt; |
| auto elementType = memRefType.getElementType(); |
| if (!elementType.isIntOrFloat() && !isa<VectorType>(elementType)) |
| return std::nullopt; |
| |
| auto sizeInBytes = getMemRefIntOrFloatEltSizeInBytes(memRefType); |
| if (!sizeInBytes) |
| return std::nullopt; |
| for (unsigned i = 0, e = memRefType.getRank(); i < e; i++) { |
| sizeInBytes = *sizeInBytes * memRefType.getDimSize(i); |
| } |
| return sizeInBytes; |
| } |
| |
| template <typename LoadOrStoreOp> |
| LogicalResult mlir::affine::boundCheckLoadOrStoreOp(LoadOrStoreOp loadOrStoreOp, |
| bool emitError) { |
| static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface, |
| AffineWriteOpInterface>::value, |
| "argument should be either a AffineReadOpInterface or a " |
| "AffineWriteOpInterface"); |
| |
| Operation *op = loadOrStoreOp.getOperation(); |
| MemRefRegion region(op->getLoc()); |
| if (failed(region.compute(op, /*loopDepth=*/0, /*sliceState=*/nullptr, |
| /*addMemRefDimBounds=*/false))) |
| return success(); |
| |
| LDBG() << "Memory region: " << region.getConstraints(); |
| |
| bool outOfBounds = false; |
| unsigned rank = loadOrStoreOp.getMemRefType().getRank(); |
| |
| // For each dimension, check for out of bounds. |
| for (unsigned r = 0; r < rank; r++) { |
| FlatAffineValueConstraints ucst(*region.getConstraints()); |
| |
| // Intersect memory region with constraint capturing out of bounds (both out |
| // of upper and out of lower), and check if the constraint system is |
| // feasible. If it is, there is at least one point out of bounds. |
| SmallVector<int64_t, 4> ineq(rank + 1, 0); |
| int64_t dimSize = loadOrStoreOp.getMemRefType().getDimSize(r); |
| // TODO: handle dynamic dim sizes. |
| if (dimSize == -1) |
| continue; |
| |
| // Check for overflow: d_i >= memref dim size. |
| ucst.addBound(BoundType::LB, r, dimSize); |
| outOfBounds = !ucst.isEmpty(); |
| if (outOfBounds && emitError) { |
| loadOrStoreOp.emitOpError() |
| << "memref out of upper bound access along dimension #" << (r + 1); |
| } |
| |
| // Check for a negative index. |
| FlatAffineValueConstraints lcst(*region.getConstraints()); |
| llvm::fill(ineq, 0); |
| // d_i <= -1; |
| lcst.addBound(BoundType::UB, r, -1); |
| outOfBounds = !lcst.isEmpty(); |
| if (outOfBounds && emitError) { |
| loadOrStoreOp.emitOpError() |
| << "memref out of lower bound access along dimension #" << (r + 1); |
| } |
| } |
| return failure(outOfBounds); |
| } |
| |
| // Explicitly instantiate the template so that the compiler knows we need them! |
| template LogicalResult |
| mlir::affine::boundCheckLoadOrStoreOp(AffineReadOpInterface loadOp, |
| bool emitError); |
| template LogicalResult |
| mlir::affine::boundCheckLoadOrStoreOp(AffineWriteOpInterface storeOp, |
| bool emitError); |
| |
| // Returns in 'positions' the Block positions of 'op' in each ancestor |
| // Block from the Block containing operation, stopping at 'limitBlock'. |
| static void findInstPosition(Operation *op, Block *limitBlock, |
| SmallVectorImpl<unsigned> *positions) { |
| Block *block = op->getBlock(); |
| while (block != limitBlock) { |
| // FIXME: This algorithm is unnecessarily O(n) and should be improved to not |
| // rely on linear scans. |
| int instPosInBlock = std::distance(block->begin(), op->getIterator()); |
| positions->push_back(instPosInBlock); |
| op = block->getParentOp(); |
| block = op->getBlock(); |
| } |
| std::reverse(positions->begin(), positions->end()); |
| } |
| |
| // Returns the Operation in a possibly nested set of Blocks, where the |
| // position of the operation is represented by 'positions', which has a |
| // Block position for each level of nesting. |
| static Operation *getInstAtPosition(ArrayRef<unsigned> positions, |
| unsigned level, Block *block) { |
| unsigned i = 0; |
| for (auto &op : *block) { |
| if (i != positions[level]) { |
| ++i; |
| continue; |
| } |
| if (level == positions.size() - 1) |
| return &op; |
| if (auto childAffineForOp = dyn_cast<AffineForOp>(op)) |
| return getInstAtPosition(positions, level + 1, |
| childAffineForOp.getBody()); |
| |
| for (auto ®ion : op.getRegions()) { |
| for (auto &b : region) |
| if (auto *ret = getInstAtPosition(positions, level + 1, &b)) |
| return ret; |
| } |
| return nullptr; |
| } |
| return nullptr; |
| } |
| |
| // Adds loop IV bounds to 'cst' for loop IVs not found in 'ivs'. |
| static LogicalResult addMissingLoopIVBounds(SmallPtrSet<Value, 8> &ivs, |
| FlatAffineValueConstraints *cst) { |
| for (unsigned i = 0, e = cst->getNumDimVars(); i < e; ++i) { |
| auto value = cst->getValue(i); |
| if (ivs.count(value) == 0) { |
| assert(isAffineForInductionVar(value)); |
| auto loop = getForInductionVarOwner(value); |
| if (failed(cst->addAffineForOpDomain(loop))) |
| return failure(); |
| } |
| } |
| return success(); |
| } |
| |
| /// Returns the innermost common loop depth for the set of operations in 'ops'. |
| // TODO: Move this to LoopUtils. |
| unsigned mlir::affine::getInnermostCommonLoopDepth( |
| ArrayRef<Operation *> ops, SmallVectorImpl<AffineForOp> *surroundingLoops) { |
| unsigned numOps = ops.size(); |
| assert(numOps > 0 && "Expected at least one operation"); |
| |
| std::vector<SmallVector<AffineForOp, 4>> loops(numOps); |
| unsigned loopDepthLimit = std::numeric_limits<unsigned>::max(); |
| for (unsigned i = 0; i < numOps; ++i) { |
| getAffineForIVs(*ops[i], &loops[i]); |
| loopDepthLimit = |
| std::min(loopDepthLimit, static_cast<unsigned>(loops[i].size())); |
| } |
| |
| unsigned loopDepth = 0; |
| for (unsigned d = 0; d < loopDepthLimit; ++d) { |
| unsigned i; |
| for (i = 1; i < numOps; ++i) { |
| if (loops[i - 1][d] != loops[i][d]) |
| return loopDepth; |
| } |
| if (surroundingLoops) |
| surroundingLoops->push_back(loops[i - 1][d]); |
| ++loopDepth; |
| } |
| return loopDepth; |
| } |
| |
| /// Computes in 'sliceUnion' the union of all slice bounds computed at |
| /// 'loopDepth' between all dependent pairs of ops in 'opsA' and 'opsB', and |
| /// then verifies if it is valid. Returns 'SliceComputationResult::Success' if |
| /// union was computed correctly, an appropriate failure otherwise. |
| SliceComputationResult |
| mlir::affine::computeSliceUnion(ArrayRef<Operation *> opsA, |
| ArrayRef<Operation *> opsB, unsigned loopDepth, |
| unsigned numCommonLoops, bool isBackwardSlice, |
| ComputationSliceState *sliceUnion) { |
| // Compute the union of slice bounds between all pairs in 'opsA' and |
| // 'opsB' in 'sliceUnionCst'. |
| FlatAffineValueConstraints sliceUnionCst; |
| assert(sliceUnionCst.getNumDimAndSymbolVars() == 0); |
| std::vector<std::pair<Operation *, Operation *>> dependentOpPairs; |
| MemRefAccess srcAccess; |
| MemRefAccess dstAccess; |
| for (Operation *a : opsA) { |
| srcAccess = MemRefAccess(a); |
| for (Operation *b : opsB) { |
| dstAccess = MemRefAccess(b); |
| if (srcAccess.memref != dstAccess.memref) |
| continue; |
| // Check if 'loopDepth' exceeds nesting depth of src/dst ops. |
| if ((!isBackwardSlice && loopDepth > getNestingDepth(a)) || |
| (isBackwardSlice && loopDepth > getNestingDepth(b))) { |
| LDBG() << "Invalid loop depth"; |
| return SliceComputationResult::GenericFailure; |
| } |
| |
| bool readReadAccesses = isa<AffineReadOpInterface>(srcAccess.opInst) && |
| isa<AffineReadOpInterface>(dstAccess.opInst); |
| FlatAffineValueConstraints dependenceConstraints; |
| // Check dependence between 'srcAccess' and 'dstAccess'. |
| DependenceResult result = checkMemrefAccessDependence( |
| srcAccess, dstAccess, /*loopDepth=*/numCommonLoops + 1, |
| &dependenceConstraints, /*dependenceComponents=*/nullptr, |
| /*allowRAR=*/readReadAccesses); |
| if (result.value == DependenceResult::Failure) { |
| LDBG() << "Dependence check failed"; |
| return SliceComputationResult::GenericFailure; |
| } |
| if (result.value == DependenceResult::NoDependence) |
| continue; |
| dependentOpPairs.emplace_back(a, b); |
| |
| // Compute slice bounds for 'srcAccess' and 'dstAccess'. |
| ComputationSliceState tmpSliceState; |
| getComputationSliceState(a, b, dependenceConstraints, loopDepth, |
| isBackwardSlice, &tmpSliceState); |
| |
| if (sliceUnionCst.getNumDimAndSymbolVars() == 0) { |
| // Initialize 'sliceUnionCst' with the bounds computed in previous step. |
| if (failed(tmpSliceState.getAsConstraints(&sliceUnionCst))) { |
| LDBG() << "Unable to compute slice bound constraints"; |
| return SliceComputationResult::GenericFailure; |
| } |
| assert(sliceUnionCst.getNumDimAndSymbolVars() > 0); |
| continue; |
| } |
| |
| // Compute constraints for 'tmpSliceState' in 'tmpSliceCst'. |
| FlatAffineValueConstraints tmpSliceCst; |
| if (failed(tmpSliceState.getAsConstraints(&tmpSliceCst))) { |
| LDBG() << "Unable to compute slice bound constraints"; |
| return SliceComputationResult::GenericFailure; |
| } |
| |
| // Align coordinate spaces of 'sliceUnionCst' and 'tmpSliceCst' if needed. |
| if (!sliceUnionCst.areVarsAlignedWithOther(tmpSliceCst)) { |
| |
| // Pre-constraint var alignment: record loop IVs used in each constraint |
| // system. |
| SmallPtrSet<Value, 8> sliceUnionIVs; |
| for (unsigned k = 0, l = sliceUnionCst.getNumDimVars(); k < l; ++k) |
| sliceUnionIVs.insert(sliceUnionCst.getValue(k)); |
| SmallPtrSet<Value, 8> tmpSliceIVs; |
| for (unsigned k = 0, l = tmpSliceCst.getNumDimVars(); k < l; ++k) |
| tmpSliceIVs.insert(tmpSliceCst.getValue(k)); |
| |
| sliceUnionCst.mergeAndAlignVarsWithOther(/*offset=*/0, &tmpSliceCst); |
| |
| // Post-constraint var alignment: add loop IV bounds missing after |
| // var alignment to constraint systems. This can occur if one constraint |
| // system uses an loop IV that is not used by the other. The call |
| // to unionBoundingBox below expects constraints for each Loop IV, even |
| // if they are the unsliced full loop bounds added here. |
| if (failed(addMissingLoopIVBounds(sliceUnionIVs, &sliceUnionCst))) |
| return SliceComputationResult::GenericFailure; |
| if (failed(addMissingLoopIVBounds(tmpSliceIVs, &tmpSliceCst))) |
| return SliceComputationResult::GenericFailure; |
| } |
| // Compute union bounding box of 'sliceUnionCst' and 'tmpSliceCst'. |
| if (sliceUnionCst.getNumLocalVars() > 0 || |
| tmpSliceCst.getNumLocalVars() > 0 || |
| failed(sliceUnionCst.unionBoundingBox(tmpSliceCst))) { |
| LDBG() << "Unable to compute union bounding box of slice bounds"; |
| return SliceComputationResult::GenericFailure; |
| } |
| } |
| } |
| |
| // Empty union. |
| if (sliceUnionCst.getNumDimAndSymbolVars() == 0) { |
| LDBG() << "empty slice union - unexpected"; |
| return SliceComputationResult::GenericFailure; |
| } |
| |
| // Gather loops surrounding ops from loop nest where slice will be inserted. |
| SmallVector<Operation *, 4> ops; |
| for (auto &dep : dependentOpPairs) { |
| ops.push_back(isBackwardSlice ? dep.second : dep.first); |
| } |
| SmallVector<AffineForOp, 4> surroundingLoops; |
| unsigned innermostCommonLoopDepth = |
| getInnermostCommonLoopDepth(ops, &surroundingLoops); |
| if (loopDepth > innermostCommonLoopDepth) { |
| LDBG() << "Exceeds max loop depth"; |
| return SliceComputationResult::GenericFailure; |
| } |
| |
| // Store 'numSliceLoopIVs' before converting dst loop IVs to dims. |
| unsigned numSliceLoopIVs = sliceUnionCst.getNumDimVars(); |
| |
| // Convert any dst loop IVs which are symbol variables to dim variables. |
| sliceUnionCst.convertLoopIVSymbolsToDims(); |
| sliceUnion->clearBounds(); |
| sliceUnion->lbs.resize(numSliceLoopIVs, AffineMap()); |
| sliceUnion->ubs.resize(numSliceLoopIVs, AffineMap()); |
| |
| // Get slice bounds from slice union constraints 'sliceUnionCst'. |
| sliceUnionCst.getSliceBounds(/*offset=*/0, numSliceLoopIVs, |
| opsA[0]->getContext(), &sliceUnion->lbs, |
| &sliceUnion->ubs); |
| |
| // Add slice bound operands of union. |
| SmallVector<Value, 4> sliceBoundOperands; |
| sliceUnionCst.getValues(numSliceLoopIVs, |
| sliceUnionCst.getNumDimAndSymbolVars(), |
| &sliceBoundOperands); |
| |
| // Copy src loop IVs from 'sliceUnionCst' to 'sliceUnion'. |
| sliceUnion->ivs.clear(); |
| sliceUnionCst.getValues(0, numSliceLoopIVs, &sliceUnion->ivs); |
| |
| // Set loop nest insertion point to block start at 'loopDepth' for forward |
| // slices, while at the end for backward slices. |
| sliceUnion->insertPoint = |
| isBackwardSlice |
| ? surroundingLoops[loopDepth - 1].getBody()->begin() |
| : std::prev(surroundingLoops[loopDepth - 1].getBody()->end()); |
| |
| // Give each bound its own copy of 'sliceBoundOperands' for subsequent |
| // canonicalization. |
| sliceUnion->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands); |
| sliceUnion->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands); |
| |
| // Check if the slice computed is valid. Return success only if it is verified |
| // that the slice is valid, otherwise return appropriate failure status. |
| std::optional<bool> isSliceValid = sliceUnion->isSliceValid(); |
| if (!isSliceValid) { |
| LDBG() << "Cannot determine if the slice is valid"; |
| return SliceComputationResult::GenericFailure; |
| } |
| if (!*isSliceValid) |
| return SliceComputationResult::IncorrectSliceFailure; |
| |
| return SliceComputationResult::Success; |
| } |
| |
| // TODO: extend this to handle multiple result maps. |
| static std::optional<uint64_t> getConstDifference(AffineMap lbMap, |
| AffineMap ubMap) { |
| assert(lbMap.getNumResults() == 1 && "expected single result bound map"); |
| assert(ubMap.getNumResults() == 1 && "expected single result bound map"); |
| assert(lbMap.getNumDims() == ubMap.getNumDims()); |
| assert(lbMap.getNumSymbols() == ubMap.getNumSymbols()); |
| AffineExpr lbExpr(lbMap.getResult(0)); |
| AffineExpr ubExpr(ubMap.getResult(0)); |
| auto loopSpanExpr = simplifyAffineExpr(ubExpr - lbExpr, lbMap.getNumDims(), |
| lbMap.getNumSymbols()); |
| auto cExpr = dyn_cast<AffineConstantExpr>(loopSpanExpr); |
| if (!cExpr) |
| return std::nullopt; |
| return cExpr.getValue(); |
| } |
| |
| // Builds a map 'tripCountMap' from AffineForOp to constant trip count for loop |
| // nest surrounding represented by slice loop bounds in 'slice'. Returns true |
| // on success, false otherwise (if a non-constant trip count was encountered). |
| // TODO: Make this work with non-unit step loops. |
| bool mlir::affine::buildSliceTripCountMap( |
| const ComputationSliceState &slice, |
| llvm::SmallDenseMap<Operation *, uint64_t, 8> *tripCountMap) { |
| unsigned numSrcLoopIVs = slice.ivs.size(); |
| // Populate map from AffineForOp -> trip count |
| for (unsigned i = 0; i < numSrcLoopIVs; ++i) { |
| AffineForOp forOp = getForInductionVarOwner(slice.ivs[i]); |
| auto *op = forOp.getOperation(); |
| AffineMap lbMap = slice.lbs[i]; |
| AffineMap ubMap = slice.ubs[i]; |
| // If lower or upper bound maps are null or provide no results, it implies |
| // that source loop was not at all sliced, and the entire loop will be a |
| // part of the slice. |
| if (!lbMap || lbMap.getNumResults() == 0 || !ubMap || |
| ubMap.getNumResults() == 0) { |
| // The iteration of src loop IV 'i' was not sliced. Use full loop bounds. |
| if (forOp.hasConstantLowerBound() && forOp.hasConstantUpperBound()) { |
| (*tripCountMap)[op] = |
| forOp.getConstantUpperBound() - forOp.getConstantLowerBound(); |
| continue; |
| } |
| std::optional<uint64_t> maybeConstTripCount = getConstantTripCount(forOp); |
| if (maybeConstTripCount.has_value()) { |
| (*tripCountMap)[op] = *maybeConstTripCount; |
| continue; |
| } |
| return false; |
| } |
| std::optional<uint64_t> tripCount = getConstDifference(lbMap, ubMap); |
| // Slice bounds are created with a constant ub - lb difference. |
| if (!tripCount.has_value()) |
| return false; |
| (*tripCountMap)[op] = *tripCount; |
| } |
| return true; |
| } |
| |
| // Return the number of iterations in the given slice. |
| uint64_t mlir::affine::getSliceIterationCount( |
| const llvm::SmallDenseMap<Operation *, uint64_t, 8> &sliceTripCountMap) { |
| uint64_t iterCount = 1; |
| for (const auto &count : sliceTripCountMap) { |
| iterCount *= count.second; |
| } |
| return iterCount; |
| } |
| |
| const char *const kSliceFusionBarrierAttrName = "slice_fusion_barrier"; |
| // Computes slice bounds by projecting out any loop IVs from |
| // 'dependenceConstraints' at depth greater than 'loopDepth', and computes slice |
| // bounds in 'sliceState' which represent the one loop nest's IVs in terms of |
| // the other loop nest's IVs, symbols and constants (using 'isBackwardsSlice'). |
| void mlir::affine::getComputationSliceState( |
| Operation *depSourceOp, Operation *depSinkOp, |
| const FlatAffineValueConstraints &dependenceConstraints, unsigned loopDepth, |
| bool isBackwardSlice, ComputationSliceState *sliceState) { |
| // Get loop nest surrounding src operation. |
| SmallVector<AffineForOp, 4> srcLoopIVs; |
| getAffineForIVs(*depSourceOp, &srcLoopIVs); |
| unsigned numSrcLoopIVs = srcLoopIVs.size(); |
| |
| // Get loop nest surrounding dst operation. |
| SmallVector<AffineForOp, 4> dstLoopIVs; |
| getAffineForIVs(*depSinkOp, &dstLoopIVs); |
| unsigned numDstLoopIVs = dstLoopIVs.size(); |
| |
| assert((!isBackwardSlice && loopDepth <= numSrcLoopIVs) || |
| (isBackwardSlice && loopDepth <= numDstLoopIVs)); |
| |
| // Project out dimensions other than those up to 'loopDepth'. |
| unsigned pos = isBackwardSlice ? numSrcLoopIVs + loopDepth : loopDepth; |
| unsigned num = |
| isBackwardSlice ? numDstLoopIVs - loopDepth : numSrcLoopIVs - loopDepth; |
| FlatAffineValueConstraints sliceCst(dependenceConstraints); |
| sliceCst.projectOut(pos, num); |
| |
| // Add slice loop IV values to 'sliceState'. |
| unsigned offset = isBackwardSlice ? 0 : loopDepth; |
| unsigned numSliceLoopIVs = isBackwardSlice ? numSrcLoopIVs : numDstLoopIVs; |
| sliceCst.getValues(offset, offset + numSliceLoopIVs, &sliceState->ivs); |
| |
| // Set up lower/upper bound affine maps for the slice. |
| sliceState->lbs.resize(numSliceLoopIVs, AffineMap()); |
| sliceState->ubs.resize(numSliceLoopIVs, AffineMap()); |
| |
| // Get bounds for slice IVs in terms of other IVs, symbols, and constants. |
| sliceCst.getSliceBounds(offset, numSliceLoopIVs, depSourceOp->getContext(), |
| &sliceState->lbs, &sliceState->ubs); |
| |
| // Set up bound operands for the slice's lower and upper bounds. |
| SmallVector<Value, 4> sliceBoundOperands; |
| unsigned numDimsAndSymbols = sliceCst.getNumDimAndSymbolVars(); |
| for (unsigned i = 0; i < numDimsAndSymbols; ++i) { |
| if (i < offset || i >= offset + numSliceLoopIVs) |
| sliceBoundOperands.push_back(sliceCst.getValue(i)); |
| } |
| |
| // Give each bound its own copy of 'sliceBoundOperands' for subsequent |
| // canonicalization. |
| sliceState->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands); |
| sliceState->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands); |
| |
| // Set destination loop nest insertion point to block start at 'dstLoopDepth'. |
| sliceState->insertPoint = |
| isBackwardSlice ? dstLoopIVs[loopDepth - 1].getBody()->begin() |
| : std::prev(srcLoopIVs[loopDepth - 1].getBody()->end()); |
| |
| llvm::SmallDenseSet<Value, 8> sequentialLoops; |
| if (isa<AffineReadOpInterface>(depSourceOp) && |
| isa<AffineReadOpInterface>(depSinkOp)) { |
| // For read-read access pairs, clear any slice bounds on sequential loops. |
| // Get sequential loops in loop nest rooted at 'srcLoopIVs[0]'. |
| getSequentialLoops(isBackwardSlice ? srcLoopIVs[0] : dstLoopIVs[0], |
| &sequentialLoops); |
| } |
| auto getSliceLoop = [&](unsigned i) { |
| return isBackwardSlice ? srcLoopIVs[i] : dstLoopIVs[i]; |
| }; |
| auto isInnermostInsertion = [&]() { |
| return (isBackwardSlice ? loopDepth >= srcLoopIVs.size() |
| : loopDepth >= dstLoopIVs.size()); |
| }; |
| llvm::SmallDenseMap<Operation *, uint64_t, 8> sliceTripCountMap; |
| auto srcIsUnitSlice = [&]() { |
| return (buildSliceTripCountMap(*sliceState, &sliceTripCountMap) && |
| (getSliceIterationCount(sliceTripCountMap) == 1)); |
| }; |
| // Clear all sliced loop bounds beginning at the first sequential loop, or |
| // first loop with a slice fusion barrier attribute.. |
| |
| for (unsigned i = 0; i < numSliceLoopIVs; ++i) { |
| Value iv = getSliceLoop(i).getInductionVar(); |
| if (sequentialLoops.count(iv) == 0 && |
| getSliceLoop(i)->getAttr(kSliceFusionBarrierAttrName) == nullptr) |
| continue; |
| // Skip reset of bounds of reduction loop inserted in the destination loop |
| // that meets the following conditions: |
| // 1. Slice is single trip count. |
| // 2. Loop bounds of the source and destination match. |
| // 3. Is being inserted at the innermost insertion point. |
| std::optional<bool> isMaximal = sliceState->isMaximal(); |
| if (isLoopParallelAndContainsReduction(getSliceLoop(i)) && |
| isInnermostInsertion() && srcIsUnitSlice() && isMaximal && *isMaximal) |
| continue; |
| for (unsigned j = i; j < numSliceLoopIVs; ++j) { |
| sliceState->lbs[j] = AffineMap(); |
| sliceState->ubs[j] = AffineMap(); |
| } |
| break; |
| } |
| } |
| |
| /// Creates a computation slice of the loop nest surrounding 'srcOpInst', |
| /// updates the slice loop bounds with any non-null bound maps specified in |
| /// 'sliceState', and inserts this slice into the loop nest surrounding |
| /// 'dstOpInst' at loop depth 'dstLoopDepth'. |
| // TODO: extend the slicing utility to compute slices that |
| // aren't necessarily a one-to-one relation b/w the source and destination. The |
| // relation between the source and destination could be many-to-many in general. |
| // TODO: the slice computation is incorrect in the cases |
| // where the dependence from the source to the destination does not cover the |
| // entire destination index set. Subtract out the dependent destination |
| // iterations from destination index set and check for emptiness --- this is one |
| // solution. |
| AffineForOp mlir::affine::insertBackwardComputationSlice( |
| Operation *srcOpInst, Operation *dstOpInst, unsigned dstLoopDepth, |
| ComputationSliceState *sliceState) { |
| // Get loop nest surrounding src operation. |
| SmallVector<AffineForOp, 4> srcLoopIVs; |
| getAffineForIVs(*srcOpInst, &srcLoopIVs); |
| unsigned numSrcLoopIVs = srcLoopIVs.size(); |
| |
| // Get loop nest surrounding dst operation. |
| SmallVector<AffineForOp, 4> dstLoopIVs; |
| getAffineForIVs(*dstOpInst, &dstLoopIVs); |
| unsigned dstLoopIVsSize = dstLoopIVs.size(); |
| if (dstLoopDepth > dstLoopIVsSize) { |
| dstOpInst->emitError("invalid destination loop depth"); |
| return AffineForOp(); |
| } |
| |
| // Find the op block positions of 'srcOpInst' within 'srcLoopIVs'. |
| SmallVector<unsigned, 4> positions; |
| // TODO: This code is incorrect since srcLoopIVs can be 0-d. |
| findInstPosition(srcOpInst, srcLoopIVs[0]->getBlock(), &positions); |
| |
| // Clone src loop nest and insert it a the beginning of the operation block |
| // of the loop at 'dstLoopDepth' in 'dstLoopIVs'. |
| auto dstAffineForOp = dstLoopIVs[dstLoopDepth - 1]; |
| OpBuilder b(dstAffineForOp.getBody(), dstAffineForOp.getBody()->begin()); |
| auto sliceLoopNest = |
| cast<AffineForOp>(b.clone(*srcLoopIVs[0].getOperation())); |
| |
| Operation *sliceInst = |
| getInstAtPosition(positions, /*level=*/0, sliceLoopNest.getBody()); |
| // Get loop nest surrounding 'sliceInst'. |
| SmallVector<AffineForOp, 4> sliceSurroundingLoops; |
| getAffineForIVs(*sliceInst, &sliceSurroundingLoops); |
| |
| // Sanity check. |
| unsigned sliceSurroundingLoopsSize = sliceSurroundingLoops.size(); |
| (void)sliceSurroundingLoopsSize; |
| assert(dstLoopDepth + numSrcLoopIVs >= sliceSurroundingLoopsSize); |
| unsigned sliceLoopLimit = dstLoopDepth + numSrcLoopIVs; |
| (void)sliceLoopLimit; |
| assert(sliceLoopLimit >= sliceSurroundingLoopsSize); |
| |
| // Update loop bounds for loops in 'sliceLoopNest'. |
| for (unsigned i = 0; i < numSrcLoopIVs; ++i) { |
| auto forOp = sliceSurroundingLoops[dstLoopDepth + i]; |
| if (AffineMap lbMap = sliceState->lbs[i]) |
| forOp.setLowerBound(sliceState->lbOperands[i], lbMap); |
| if (AffineMap ubMap = sliceState->ubs[i]) |
| forOp.setUpperBound(sliceState->ubOperands[i], ubMap); |
| } |
| return sliceLoopNest; |
| } |
| |
| // Constructs MemRefAccess populating it with the memref, its indices and |
| // opinst from 'loadOrStoreOpInst'. |
| MemRefAccess::MemRefAccess(Operation *memOp) { |
| if (auto loadOp = dyn_cast<AffineReadOpInterface>(memOp)) { |
| memref = loadOp.getMemRef(); |
| opInst = memOp; |
| llvm::append_range(indices, loadOp.getMapOperands()); |
| } else { |
| assert(isa<AffineWriteOpInterface>(memOp) && |
| "Affine read/write op expected"); |
| auto storeOp = cast<AffineWriteOpInterface>(memOp); |
| opInst = memOp; |
| memref = storeOp.getMemRef(); |
| llvm::append_range(indices, storeOp.getMapOperands()); |
| } |
| } |
| |
| unsigned MemRefAccess::getRank() const { |
| return cast<MemRefType>(memref.getType()).getRank(); |
| } |
| |
| bool MemRefAccess::isStore() const { |
| return isa<AffineWriteOpInterface>(opInst); |
| } |
| |
| /// Returns the nesting depth of this statement, i.e., the number of loops |
| /// surrounding this statement. |
| unsigned mlir::affine::getNestingDepth(Operation *op) { |
| Operation *currOp = op; |
| unsigned depth = 0; |
| while ((currOp = currOp->getParentOp())) { |
| if (isa<AffineForOp>(currOp)) |
| depth++; |
| if (auto parOp = dyn_cast<AffineParallelOp>(currOp)) |
| depth += parOp.getNumDims(); |
| } |
| return depth; |
| } |
| |
| /// Equal if both affine accesses are provably equivalent (at compile |
| /// time) when considering the memref, the affine maps and their respective |
| /// operands. The equality of access functions + operands is checked by |
| /// subtracting fully composed value maps, and then simplifying the difference |
| /// using the expression flattener. |
| /// TODO: this does not account for aliasing of memrefs. |
| bool MemRefAccess::operator==(const MemRefAccess &rhs) const { |
| if (memref != rhs.memref) |
| return false; |
| |
| AffineValueMap diff, thisMap, rhsMap; |
| getAccessMap(&thisMap); |
| rhs.getAccessMap(&rhsMap); |
| return thisMap == rhsMap; |
| } |
| |
| void mlir::affine::getAffineIVs(Operation &op, SmallVectorImpl<Value> &ivs) { |
| auto *currOp = op.getParentOp(); |
| AffineForOp currAffineForOp; |
| // Traverse up the hierarchy collecting all 'affine.for' and affine.parallel |
| // operation while skipping over 'affine.if' operations. |
| while (currOp) { |
| if (AffineForOp currAffineForOp = dyn_cast<AffineForOp>(currOp)) |
| ivs.push_back(currAffineForOp.getInductionVar()); |
| else if (auto parOp = dyn_cast<AffineParallelOp>(currOp)) |
| llvm::append_range(ivs, parOp.getIVs()); |
| currOp = currOp->getParentOp(); |
| } |
| std::reverse(ivs.begin(), ivs.end()); |
| } |
| |
| /// Returns the number of surrounding loops common to 'loopsA' and 'loopsB', |
| /// where each lists loops from outer-most to inner-most in loop nest. |
| unsigned mlir::affine::getNumCommonSurroundingLoops(Operation &a, |
| Operation &b) { |
| SmallVector<Value, 4> loopsA, loopsB; |
| getAffineIVs(a, loopsA); |
| getAffineIVs(b, loopsB); |
| |
| unsigned minNumLoops = std::min(loopsA.size(), loopsB.size()); |
| unsigned numCommonLoops = 0; |
| for (unsigned i = 0; i < minNumLoops; ++i) { |
| if (loopsA[i] != loopsB[i]) |
| break; |
| ++numCommonLoops; |
| } |
| return numCommonLoops; |
| } |
| |
| static std::optional<int64_t> getMemoryFootprintBytes(Block &block, |
| Block::iterator start, |
| Block::iterator end, |
| int memorySpace) { |
| SmallDenseMap<Value, std::unique_ptr<MemRefRegion>, 4> regions; |
| |
| // Walk this 'affine.for' operation to gather all memory regions. |
| auto result = block.walk(start, end, [&](Operation *opInst) -> WalkResult { |
| if (!isa<AffineReadOpInterface, AffineWriteOpInterface>(opInst)) { |
| // Neither load nor a store op. |
| return WalkResult::advance(); |
| } |
| |
| // Compute the memref region symbolic in any IVs enclosing this block. |
| auto region = std::make_unique<MemRefRegion>(opInst->getLoc()); |
| if (failed( |
| region->compute(opInst, |
| /*loopDepth=*/getNestingDepth(&*block.begin())))) { |
| LDBG() << "Error obtaining memory region"; |
| opInst->emitError("error obtaining memory region"); |
| return failure(); |
| } |
| |
| auto [it, inserted] = regions.try_emplace(region->memref); |
| if (inserted) { |
| it->second = std::move(region); |
| } else if (failed(it->second->unionBoundingBox(*region))) { |
| LDBG() << "getMemoryFootprintBytes: unable to perform a union on a " |
| "memory region"; |
| opInst->emitWarning( |
| "getMemoryFootprintBytes: unable to perform a union on a memory " |
| "region"); |
| return failure(); |
| } |
| return WalkResult::advance(); |
| }); |
| if (result.wasInterrupted()) |
| return std::nullopt; |
| |
| int64_t totalSizeInBytes = 0; |
| for (const auto ®ion : regions) { |
| std::optional<int64_t> size = region.second->getRegionSize(); |
| if (!size.has_value()) |
| return std::nullopt; |
| totalSizeInBytes += *size; |
| } |
| return totalSizeInBytes; |
| } |
| |
| std::optional<int64_t> mlir::affine::getMemoryFootprintBytes(AffineForOp forOp, |
| int memorySpace) { |
| auto *forInst = forOp.getOperation(); |
| return ::getMemoryFootprintBytes( |
| *forInst->getBlock(), Block::iterator(forInst), |
| std::next(Block::iterator(forInst)), memorySpace); |
| } |
| |
| /// Returns whether a loop is parallel and contains a reduction loop. |
| bool mlir::affine::isLoopParallelAndContainsReduction(AffineForOp forOp) { |
| SmallVector<LoopReduction> reductions; |
| if (!isLoopParallel(forOp, &reductions)) |
| return false; |
| return !reductions.empty(); |
| } |
| |
| /// Returns in 'sequentialLoops' all sequential loops in loop nest rooted |
| /// at 'forOp'. |
| void mlir::affine::getSequentialLoops( |
| AffineForOp forOp, llvm::SmallDenseSet<Value, 8> *sequentialLoops) { |
| forOp->walk([&](Operation *op) { |
| if (auto innerFor = dyn_cast<AffineForOp>(op)) |
| if (!isLoopParallel(innerFor)) |
| sequentialLoops->insert(innerFor.getInductionVar()); |
| }); |
| } |
| |
| IntegerSet mlir::affine::simplifyIntegerSet(IntegerSet set) { |
| FlatAffineValueConstraints fac(set); |
| if (fac.isEmpty()) |
| return IntegerSet::getEmptySet(set.getNumDims(), set.getNumSymbols(), |
| set.getContext()); |
| fac.removeTrivialRedundancy(); |
| |
| auto simplifiedSet = fac.getAsIntegerSet(set.getContext()); |
| assert(simplifiedSet && "guaranteed to succeed while roundtripping"); |
| return simplifiedSet; |
| } |
| |
| static void unpackOptionalValues(ArrayRef<std::optional<Value>> source, |
| SmallVector<Value> &target) { |
| target = |
| llvm::to_vector<4>(llvm::map_range(source, [](std::optional<Value> val) { |
| return val.has_value() ? *val : Value(); |
| })); |
| } |
| |
| /// Bound an identifier `pos` in a given FlatAffineValueConstraints with |
| /// constraints drawn from an affine map. Before adding the constraint, the |
| /// dimensions/symbols of the affine map are aligned with `constraints`. |
| /// `operands` are the SSA Value operands used with the affine map. |
| /// Note: This function adds a new symbol column to the `constraints` for each |
| /// dimension/symbol that exists in the affine map but not in `constraints`. |
| static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints, |
| BoundType type, unsigned pos, |
| AffineMap map, ValueRange operands) { |
| SmallVector<Value> dims, syms, newSyms; |
| unpackOptionalValues(constraints.getMaybeValues(VarKind::SetDim), dims); |
| unpackOptionalValues(constraints.getMaybeValues(VarKind::Symbol), syms); |
| |
| AffineMap alignedMap = |
| alignAffineMapWithValues(map, operands, dims, syms, &newSyms); |
| for (unsigned i = syms.size(); i < newSyms.size(); ++i) |
| constraints.appendSymbolVar(newSyms[i]); |
| return constraints.addBound(type, pos, alignedMap); |
| } |
| |
| /// Add `val` to each result of `map`. |
| static AffineMap addConstToResults(AffineMap map, int64_t val) { |
| SmallVector<AffineExpr> newResults; |
| for (AffineExpr r : map.getResults()) |
| newResults.push_back(r + val); |
| return AffineMap::get(map.getNumDims(), map.getNumSymbols(), newResults, |
| map.getContext()); |
| } |
| |
| // Attempt to simplify the given min/max operation by proving that its value is |
| // bounded by the same lower and upper bound. |
| // |
| // Bounds are computed by FlatAffineValueConstraints. Invariants required for |
| // finding/proving bounds should be supplied via `constraints`. |
| // |
| // 1. Add dimensions for `op` and `opBound` (lower or upper bound of `op`). |
| // 2. Compute an upper bound of `op` (in case of `isMin`) or a lower bound (in |
| // case of `!isMin`) and bind it to `opBound`. SSA values that are used in |
| // `op` but are not part of `constraints`, are added as extra symbols. |
| // 3. For each result of `op`: Add result as a dimension `r_i`. Prove that: |
| // * If `isMin`: r_i >= opBound |
| // * If `isMax`: r_i <= opBound |
| // If this is the case, ub(op) == lb(op). |
| // 4. Replace `op` with `opBound`. |
| // |
| // In summary, the following constraints are added throughout this function. |
| // Note: `invar` are dimensions added by the caller to express the invariants. |
| // (Showing only the case where `isMin`.) |
| // |
| // invar | op | opBound | r_i | extra syms... | const | eq/ineq |
| // ------+-------+---------+-----+---------------+-------+------------------- |
| // (various eq./ineq. constraining `invar`, added by the caller) |
| // ... | 0 | 0 | 0 | 0 | ... | ... |
| // ------+-------+---------+-----+---------------+-------+------------------- |
| // (various ineq. constraining `op` in terms of `op` operands (`invar` and |
| // extra `op` operands "extra syms" that are not in `invar`)). |
| // ... | -1 | 0 | 0 | ... | ... | >= 0 |
| // ------+-------+---------+-----+---------------+-------+------------------- |
| // (set `opBound` to `op` upper bound in terms of `invar` and "extra syms") |
| // ... | 0 | -1 | 0 | ... | ... | = 0 |
| // ------+-------+---------+-----+---------------+-------+------------------- |
| // (for each `op` map result r_i: set r_i to corresponding map result, |
| // prove that r_i >= minOpUb via contradiction) |
| // ... | 0 | 0 | -1 | ... | ... | = 0 |
| // 0 | 0 | 1 | -1 | 0 | -1 | >= 0 |
| // |
| FailureOr<AffineValueMap> mlir::affine::simplifyConstrainedMinMaxOp( |
| Operation *op, FlatAffineValueConstraints constraints) { |
| bool isMin = isa<AffineMinOp>(op); |
| assert((isMin || isa<AffineMaxOp>(op)) && "expect AffineMin/MaxOp"); |
| MLIRContext *ctx = op->getContext(); |
| Builder builder(ctx); |
| AffineMap map = |
| isMin ? cast<AffineMinOp>(op).getMap() : cast<AffineMaxOp>(op).getMap(); |
| ValueRange operands = op->getOperands(); |
| unsigned numResults = map.getNumResults(); |
| |
| // Add a few extra dimensions. |
| unsigned dimOp = constraints.appendDimVar(); // `op` |
| unsigned dimOpBound = constraints.appendDimVar(); // `op` lower/upper bound |
| unsigned resultDimStart = constraints.appendDimVar(/*num=*/numResults); |
| |
| // Add an inequality for each result expr_i of map: |
| // isMin: op <= expr_i, !isMin: op >= expr_i |
| auto boundType = isMin ? BoundType::UB : BoundType::LB; |
| // Upper bounds are exclusive, so add 1. (`affine.min` ops are inclusive.) |
| AffineMap mapLbUb = isMin ? addConstToResults(map, 1) : map; |
| if (failed( |
| alignAndAddBound(constraints, boundType, dimOp, mapLbUb, operands))) |
| return failure(); |
| |
| // Try to compute a lower/upper bound for op, expressed in terms of the other |
| // `dims` and extra symbols. |
| SmallVector<AffineMap> opLb(1), opUb(1); |
| constraints.getSliceBounds(dimOp, 1, ctx, &opLb, &opUb); |
| AffineMap sliceBound = isMin ? opUb[0] : opLb[0]; |
| // TODO: `getSliceBounds` may return multiple bounds at the moment. This is |
| // a TODO of `getSliceBounds` and not handled here. |
| if (!sliceBound || sliceBound.getNumResults() != 1) |
| return failure(); // No or multiple bounds found. |
| // Recover the inclusive UB in the case of an `affine.min`. |
| AffineMap boundMap = isMin ? addConstToResults(sliceBound, -1) : sliceBound; |
| |
| // Add an equality: Set dimOpBound to computed bound. |
| // Add back dimension for op. (Was removed by `getSliceBounds`.) |
| AffineMap alignedBoundMap = boundMap.shiftDims(/*shift=*/1, /*offset=*/dimOp); |
| if (failed(constraints.addBound(BoundType::EQ, dimOpBound, alignedBoundMap))) |
| return failure(); |
| |
| // If the constraint system is empty, there is an inconsistency. (E.g., this |
| // can happen if loop lb > ub.) |
| if (constraints.isEmpty()) |
| return failure(); |
| |
| // In the case of `isMin` (`!isMin` is inversed): |
| // Prove that each result of `map` has a lower bound that is equal to (or |
| // greater than) the upper bound of `op` (`dimOpBound`). In that case, `op` |
| // can be replaced with the bound. I.e., prove that for each result |
| // expr_i (represented by dimension r_i): |
| // |
| // r_i >= opBound |
| // |
| // To prove this inequality, add its negation to the constraint set and prove |
| // that the constraint set is empty. |
| for (unsigned i = resultDimStart; i < resultDimStart + numResults; ++i) { |
| FlatAffineValueConstraints newConstr(constraints); |
| |
| // Add an equality: r_i = expr_i |
| // Note: These equalities could have been added earlier and used to express |
| // minOp <= expr_i. However, then we run the risk that `getSliceBounds` |
| // computes minOpUb in terms of r_i dims, which is not desired. |
| if (failed(alignAndAddBound(newConstr, BoundType::EQ, i, |
| map.getSubMap({i - resultDimStart}), operands))) |
| return failure(); |
| |
| // If `isMin`: Add inequality: r_i < opBound |
| // equiv.: opBound - r_i - 1 >= 0 |
| // If `!isMin`: Add inequality: r_i > opBound |
| // equiv.: -opBound + r_i - 1 >= 0 |
| SmallVector<int64_t> ineq(newConstr.getNumCols(), 0); |
| ineq[dimOpBound] = isMin ? 1 : -1; |
| ineq[i] = isMin ? -1 : 1; |
| ineq[newConstr.getNumCols() - 1] = -1; |
| newConstr.addInequality(ineq); |
| if (!newConstr.isEmpty()) |
| return failure(); |
| } |
| |
| // Lower and upper bound of `op` are equal. Replace `minOp` with its bound. |
| AffineMap newMap = alignedBoundMap; |
| SmallVector<Value> newOperands; |
| unpackOptionalValues(constraints.getMaybeValues(), newOperands); |
| // If dims/symbols have known constant values, use those in order to simplify |
| // the affine map further. |
| for (int64_t i = 0, e = constraints.getNumDimAndSymbolVars(); i < e; ++i) { |
| // Skip unused operands and operands that are already constants. |
| if (!newOperands[i] || getConstantIntValue(newOperands[i])) |
| continue; |
| if (auto bound = constraints.getConstantBound64(BoundType::EQ, i)) { |
| AffineExpr expr = |
| i < newMap.getNumDims() |
| ? builder.getAffineDimExpr(i) |
| : builder.getAffineSymbolExpr(i - newMap.getNumDims()); |
| newMap = newMap.replace(expr, builder.getAffineConstantExpr(*bound), |
| newMap.getNumDims(), newMap.getNumSymbols()); |
| } |
| } |
| affine::canonicalizeMapAndOperands(&newMap, &newOperands); |
| return AffineValueMap(newMap, newOperands); |
| } |
| |
| Block *mlir::affine::findInnermostCommonBlockInScope(Operation *a, |
| Operation *b) { |
| Region *aScope = getAffineAnalysisScope(a); |
| Region *bScope = getAffineAnalysisScope(b); |
| if (aScope != bScope) |
| return nullptr; |
| |
| // Get the block ancestry of `op` while stopping at the affine scope `aScope` |
| // and store them in `ancestry`. |
| auto getBlockAncestry = [&](Operation *op, |
| SmallVectorImpl<Block *> &ancestry) { |
| Operation *curOp = op; |
| do { |
| ancestry.push_back(curOp->getBlock()); |
| if (curOp->getParentRegion() == aScope) |
| break; |
| curOp = curOp->getParentOp(); |
| } while (curOp); |
| assert(curOp && "can't reach root op without passing through affine scope"); |
| std::reverse(ancestry.begin(), ancestry.end()); |
| }; |
| |
| SmallVector<Block *, 4> aAncestors, bAncestors; |
| getBlockAncestry(a, aAncestors); |
| getBlockAncestry(b, bAncestors); |
| assert(!aAncestors.empty() && !bAncestors.empty() && |
| "at least one Block ancestor expected"); |
| |
| Block *innermostCommonBlock = nullptr; |
| for (unsigned a = 0, b = 0, e = aAncestors.size(), f = bAncestors.size(); |
| a < e && b < f; ++a, ++b) { |
| if (aAncestors[a] != bAncestors[b]) |
| break; |
| innermostCommonBlock = aAncestors[a]; |
| } |
| return innermostCommonBlock; |
| } |