| //===- BufferDeallocation.cpp - the impl for buffer deallocation ----------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements logic for computing correct alloc and dealloc positions. |
| // Furthermore, buffer deallocation also adds required new clone operations to |
| // ensure that all buffers are deallocated. The main class is the |
| // BufferDeallocationPass class that implements the underlying algorithm. In |
| // order to put allocations and deallocations at safe positions, it is |
| // significantly important to put them into the correct blocks. However, the |
| // liveness analysis does not pay attention to aliases, which can occur due to |
| // branches (and their associated block arguments) in general. For this purpose, |
| // BufferDeallocation firstly finds all possible aliases for a single value |
| // (using the BufferViewFlowAnalysis class). Consider the following example: |
| // |
| // ^bb0(%arg0): |
| // cond_br %cond, ^bb1, ^bb2 |
| // ^bb1: |
| // br ^exit(%arg0) |
| // ^bb2: |
| // %new_value = ... |
| // br ^exit(%new_value) |
| // ^exit(%arg1): |
| // return %arg1; |
| // |
| // We should place the dealloc for %new_value in exit. However, we have to free |
| // the buffer in the same block, because it cannot be freed in the post |
| // dominator. However, this requires a new clone buffer for %arg1 that will |
| // contain the actual contents. Using the class BufferViewFlowAnalysis, we |
| // will find out that %new_value has a potential alias %arg1. In order to find |
| // the dealloc position we have to find all potential aliases, iterate over |
| // their uses and find the common post-dominator block (note that additional |
| // clones and buffers remove potential aliases and will influence the placement |
| // of the deallocs). In all cases, the computed block can be safely used to free |
| // the %new_value buffer (may be exit or bb2) as it will die and we can use |
| // liveness information to determine the exact operation after which we have to |
| // insert the dealloc. However, the algorithm supports introducing clone buffers |
| // and placing deallocs in safe locations to ensure that all buffers will be |
| // freed in the end. |
| // |
| // TODO: |
| // The current implementation does not support explicit-control-flow loops and |
| // the resulting code will be invalid with respect to program semantics. |
| // However, structured control-flow loops are fully supported. Furthermore, it |
| // doesn't accept functions which return buffers already. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "PassDetail.h" |
| |
| #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h" |
| #include "mlir/Dialect/Bufferization/IR/Bufferization.h" |
| #include "mlir/Dialect/Bufferization/Transforms/Passes.h" |
| #include "mlir/Dialect/MemRef/IR/MemRef.h" |
| #include "mlir/Transforms/BufferUtils.h" |
| #include "llvm/ADT/SetOperations.h" |
| |
| using namespace mlir; |
| |
| /// Walks over all immediate return-like terminators in the given region. |
| static LogicalResult |
| walkReturnOperations(Region *region, |
| std::function<LogicalResult(Operation *)> func) { |
| for (Block &block : *region) { |
| Operation *terminator = block.getTerminator(); |
| // Skip non region-return-like terminators. |
| if (isRegionReturnLike(terminator)) { |
| if (failed(func(terminator))) |
| return failure(); |
| } |
| } |
| return success(); |
| } |
| |
| /// Checks if all operations in a given region that have at least one attached |
| /// region implement the RegionBranchOpInterface. This is not required in edge |
| /// cases, where we have a single attached region and the parent operation has |
| /// no results. |
| static bool validateSupportedControlFlow(Region ®ion) { |
| bool success = true; |
| region.walk([&success](Operation *operation) { |
| auto regions = operation->getRegions(); |
| // Walk over all operations in a region and check if the operation has at |
| // least one region and implements the RegionBranchOpInterface. If there |
| // is an operation that does not fulfill this condition, we cannot apply |
| // the deallocation steps. Furthermore, we accept cases, where we have a |
| // region that returns no results, since, in that case, the intra-region |
| // control flow does not affect the transformation. |
| size_t size = regions.size(); |
| if (((size == 1 && !operation->getResults().empty()) || size > 1) && |
| !dyn_cast<RegionBranchOpInterface>(operation)) { |
| operation->emitError("All operations with attached regions need to " |
| "implement the RegionBranchOpInterface."); |
| success = false; |
| } |
| }); |
| return success; |
| } |
| |
| namespace { |
| |
| //===----------------------------------------------------------------------===// |
| // Backedges analysis |
| //===----------------------------------------------------------------------===// |
| |
| /// A straight-forward program analysis which detects loop backedges induced by |
| /// explicit control flow. |
| class Backedges { |
| public: |
| using BlockSetT = SmallPtrSet<Block *, 16>; |
| using BackedgeSetT = llvm::DenseSet<std::pair<Block *, Block *>>; |
| |
| public: |
| /// Constructs a new backedges analysis using the op provided. |
| Backedges(Operation *op) { recurse(op); } |
| |
| /// Returns the number of backedges formed by explicit control flow. |
| size_t size() const { return edgeSet.size(); } |
| |
| /// Returns the start iterator to loop over all backedges. |
| BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); } |
| |
| /// Returns the end iterator to loop over all backedges. |
| BackedgeSetT::const_iterator end() const { return edgeSet.end(); } |
| |
| private: |
| /// Enters the current block and inserts a backedge into the `edgeSet` if we |
| /// have already visited the current block. The inserted edge links the given |
| /// `predecessor` with the `current` block. |
| bool enter(Block ¤t, Block *predecessor) { |
| bool inserted = visited.insert(¤t).second; |
| if (!inserted) |
| edgeSet.insert(std::make_pair(predecessor, ¤t)); |
| return inserted; |
| } |
| |
| /// Leaves the current block. |
| void exit(Block ¤t) { visited.erase(¤t); } |
| |
| /// Recurses into the given operation while taking all attached regions into |
| /// account. |
| void recurse(Operation *op) { |
| Block *current = op->getBlock(); |
| // If the current op implements the `BranchOpInterface`, there can be |
| // cycles in the scope of all successor blocks. |
| if (isa<BranchOpInterface>(op)) { |
| for (Block *succ : current->getSuccessors()) |
| recurse(*succ, current); |
| } |
| // Recurse into all distinct regions and check for explicit control-flow |
| // loops. |
| for (Region ®ion : op->getRegions()) { |
| if (!region.empty()) |
| recurse(region.front(), current); |
| } |
| } |
| |
| /// Recurses into explicit control-flow structures that are given by |
| /// the successor relation defined on the block level. |
| void recurse(Block &block, Block *predecessor) { |
| // Try to enter the current block. If this is not possible, we are |
| // currently processing this block and can safely return here. |
| if (!enter(block, predecessor)) |
| return; |
| |
| // Recurse into all operations and successor blocks. |
| for (Operation &op : block.getOperations()) |
| recurse(&op); |
| |
| // Leave the current block. |
| exit(block); |
| } |
| |
| /// Stores all blocks that are currently visited and on the processing stack. |
| BlockSetT visited; |
| |
| /// Stores all backedges in the format (source, target). |
| BackedgeSetT edgeSet; |
| }; |
| |
| //===----------------------------------------------------------------------===// |
| // BufferDeallocation |
| //===----------------------------------------------------------------------===// |
| |
| /// The buffer deallocation transformation which ensures that all allocs in the |
| /// program have a corresponding de-allocation. As a side-effect, it might also |
| /// introduce clones that in turn leads to additional deallocations. |
| class BufferDeallocation : public BufferPlacementTransformationBase { |
| public: |
| using AliasAllocationMapT = |
| llvm::DenseMap<Value, bufferization::AllocationOpInterface>; |
| |
| BufferDeallocation(Operation *op) |
| : BufferPlacementTransformationBase(op), dominators(op), |
| postDominators(op) {} |
| |
| /// Checks if all allocation operations either provide an already existing |
| /// deallocation operation or implement the AllocationOpInterface. In |
| /// addition, this method initializes the internal alias to |
| /// AllocationOpInterface mapping in order to get compatible |
| /// AllocationOpInterface implementations for aliases. |
| LogicalResult prepare() { |
| for (const BufferPlacementAllocs::AllocEntry &entry : allocs) { |
| // Get the defining allocation operation. |
| Value alloc = std::get<0>(entry); |
| auto allocationInterface = |
| alloc.getDefiningOp<bufferization::AllocationOpInterface>(); |
| // If there is no existing deallocation operation and no implementation of |
| // the AllocationOpInterface, we cannot apply the BufferDeallocation pass. |
| if (!std::get<1>(entry) && !allocationInterface) { |
| return alloc.getDefiningOp()->emitError( |
| "Allocation is not deallocated explicitly nor does the operation " |
| "implement the AllocationOpInterface."); |
| } |
| |
| // Register the current allocation interface implementation. |
| aliasToAllocations[alloc] = allocationInterface; |
| |
| // Get the alias information for the current allocation node. |
| llvm::for_each(aliases.resolve(alloc), [&](Value alias) { |
| // TODO: check for incompatible implementations of the |
| // AllocationOpInterface. This could be realized by promoting the |
| // AllocationOpInterface to a DialectInterface. |
| aliasToAllocations[alias] = allocationInterface; |
| }); |
| } |
| return success(); |
| } |
| |
| /// Performs the actual placement/creation of all temporary clone and dealloc |
| /// nodes. |
| LogicalResult deallocate() { |
| // Add additional clones that are required. |
| if (failed(introduceClones())) |
| return failure(); |
| |
| // Place deallocations for all allocation entries. |
| return placeDeallocs(); |
| } |
| |
| private: |
| /// Introduces required clone operations to avoid memory leaks. |
| LogicalResult introduceClones() { |
| // Initialize the set of values that require a dedicated memory free |
| // operation since their operands cannot be safely deallocated in a post |
| // dominator. |
| SmallPtrSet<Value, 8> valuesToFree; |
| llvm::SmallDenseSet<std::tuple<Value, Block *>> visitedValues; |
| SmallVector<std::tuple<Value, Block *>, 8> toProcess; |
| |
| // Check dominance relation for proper dominance properties. If the given |
| // value node does not dominate an alias, we will have to create a clone in |
| // order to free all buffers that can potentially leak into a post |
| // dominator. |
| auto findUnsafeValues = [&](Value source, Block *definingBlock) { |
| auto it = aliases.find(source); |
| if (it == aliases.end()) |
| return; |
| for (Value value : it->second) { |
| if (valuesToFree.count(value) > 0) |
| continue; |
| Block *parentBlock = value.getParentBlock(); |
| // Check whether we have to free this particular block argument or |
| // generic value. We have to free the current alias if it is either |
| // defined in a non-dominated block or it is defined in the same block |
| // but the current value is not dominated by the source value. |
| if (!dominators.dominates(definingBlock, parentBlock) || |
| (definingBlock == parentBlock && value.isa<BlockArgument>())) { |
| toProcess.emplace_back(value, parentBlock); |
| valuesToFree.insert(value); |
| } else if (visitedValues.insert(std::make_tuple(value, definingBlock)) |
| .second) |
| toProcess.emplace_back(value, definingBlock); |
| } |
| }; |
| |
| // Detect possibly unsafe aliases starting from all allocations. |
| for (BufferPlacementAllocs::AllocEntry &entry : allocs) { |
| Value allocValue = std::get<0>(entry); |
| findUnsafeValues(allocValue, allocValue.getDefiningOp()->getBlock()); |
| } |
| // Try to find block arguments that require an explicit free operation |
| // until we reach a fix point. |
| while (!toProcess.empty()) { |
| auto current = toProcess.pop_back_val(); |
| findUnsafeValues(std::get<0>(current), std::get<1>(current)); |
| } |
| |
| // Update buffer aliases to ensure that we free all buffers and block |
| // arguments at the correct locations. |
| aliases.remove(valuesToFree); |
| |
| // Add new allocs and additional clone operations. |
| for (Value value : valuesToFree) { |
| if (failed(value.isa<BlockArgument>() |
| ? introduceBlockArgCopy(value.cast<BlockArgument>()) |
| : introduceValueCopyForRegionResult(value))) |
| return failure(); |
| |
| // Register the value to require a final dealloc. Note that we do not have |
| // to assign a block here since we do not want to move the allocation node |
| // to another location. |
| allocs.registerAlloc(std::make_tuple(value, nullptr)); |
| } |
| return success(); |
| } |
| |
| /// Introduces temporary clones in all predecessors and copies the source |
| /// values into the newly allocated buffers. |
| LogicalResult introduceBlockArgCopy(BlockArgument blockArg) { |
| // Allocate a buffer for the current block argument in the block of |
| // the associated value (which will be a predecessor block by |
| // definition). |
| Block *block = blockArg.getOwner(); |
| for (auto it = block->pred_begin(), e = block->pred_end(); it != e; ++it) { |
| // Get the terminator and the value that will be passed to our |
| // argument. |
| Operation *terminator = (*it)->getTerminator(); |
| auto branchInterface = cast<BranchOpInterface>(terminator); |
| // Query the associated source value. |
| Value sourceValue = |
| branchInterface.getSuccessorOperands(it.getSuccessorIndex()) |
| .getValue()[blockArg.getArgNumber()]; |
| // Wire new clone and successor operand. |
| auto mutableOperands = |
| branchInterface.getMutableSuccessorOperands(it.getSuccessorIndex()); |
| if (!mutableOperands) { |
| terminator->emitError() << "terminators with immutable successor " |
| "operands are not supported"; |
| continue; |
| } |
| // Create a new clone at the current location of the terminator. |
| auto clone = introduceCloneBuffers(sourceValue, terminator); |
| if (failed(clone)) |
| return failure(); |
| mutableOperands.getValue() |
| .slice(blockArg.getArgNumber(), 1) |
| .assign(*clone); |
| } |
| |
| // Check whether the block argument has implicitly defined predecessors via |
| // the RegionBranchOpInterface. This can be the case if the current block |
| // argument belongs to the first block in a region and the parent operation |
| // implements the RegionBranchOpInterface. |
| Region *argRegion = block->getParent(); |
| Operation *parentOp = argRegion->getParentOp(); |
| RegionBranchOpInterface regionInterface; |
| if (!argRegion || &argRegion->front() != block || |
| !(regionInterface = dyn_cast<RegionBranchOpInterface>(parentOp))) |
| return success(); |
| |
| if (failed(introduceClonesForRegionSuccessors( |
| regionInterface, argRegion->getParentOp()->getRegions(), blockArg, |
| [&](RegionSuccessor &successorRegion) { |
| // Find a predecessor of our argRegion. |
| return successorRegion.getSuccessor() == argRegion; |
| }))) |
| return failure(); |
| |
| // Check whether the block argument belongs to an entry region of the |
| // parent operation. In this case, we have to introduce an additional clone |
| // for buffer that is passed to the argument. |
| SmallVector<RegionSuccessor, 2> successorRegions; |
| regionInterface.getSuccessorRegions(/*index=*/llvm::None, successorRegions); |
| auto *it = |
| llvm::find_if(successorRegions, [&](RegionSuccessor &successorRegion) { |
| return successorRegion.getSuccessor() == argRegion; |
| }); |
| if (it == successorRegions.end()) |
| return success(); |
| |
| // Determine the actual operand to introduce a clone for and rewire the |
| // operand to point to the clone instead. |
| Value operand = |
| regionInterface.getSuccessorEntryOperands(argRegion->getRegionNumber()) |
| [llvm::find(it->getSuccessorInputs(), blockArg).getIndex()]; |
| auto clone = introduceCloneBuffers(operand, parentOp); |
| if (failed(clone)) |
| return failure(); |
| |
| auto op = llvm::find(parentOp->getOperands(), operand); |
| assert(op != parentOp->getOperands().end() && |
| "parentOp does not contain operand"); |
| parentOp->setOperand(op.getIndex(), *clone); |
| return success(); |
| } |
| |
| /// Introduces temporary clones in front of all associated nested-region |
| /// terminators and copies the source values into the newly allocated buffers. |
| LogicalResult introduceValueCopyForRegionResult(Value value) { |
| // Get the actual result index in the scope of the parent terminator. |
| Operation *operation = value.getDefiningOp(); |
| auto regionInterface = cast<RegionBranchOpInterface>(operation); |
| // Filter successors that return to the parent operation. |
| auto regionPredicate = [&](RegionSuccessor &successorRegion) { |
| // If the RegionSuccessor has no associated successor, it will return to |
| // its parent operation. |
| return !successorRegion.getSuccessor(); |
| }; |
| // Introduce a clone for all region "results" that are returned to the |
| // parent operation. This is required since the parent's result value has |
| // been considered critical. Therefore, the algorithm assumes that a clone |
| // of a previously allocated buffer is returned by the operation (like in |
| // the case of a block argument). |
| return introduceClonesForRegionSuccessors( |
| regionInterface, operation->getRegions(), value, regionPredicate); |
| } |
| |
| /// Introduces buffer clones for all terminators in the given regions. The |
| /// regionPredicate is applied to every successor region in order to restrict |
| /// the clones to specific regions. |
| template <typename TPredicate> |
| LogicalResult introduceClonesForRegionSuccessors( |
| RegionBranchOpInterface regionInterface, MutableArrayRef<Region> regions, |
| Value argValue, const TPredicate ®ionPredicate) { |
| for (Region ®ion : regions) { |
| // Query the regionInterface to get all successor regions of the current |
| // one. |
| SmallVector<RegionSuccessor, 2> successorRegions; |
| regionInterface.getSuccessorRegions(region.getRegionNumber(), |
| successorRegions); |
| // Try to find a matching region successor. |
| RegionSuccessor *regionSuccessor = |
| llvm::find_if(successorRegions, regionPredicate); |
| if (regionSuccessor == successorRegions.end()) |
| continue; |
| // Get the operand index in the context of the current successor input |
| // bindings. |
| size_t operandIndex = |
| llvm::find(regionSuccessor->getSuccessorInputs(), argValue) |
| .getIndex(); |
| |
| // Iterate over all immediate terminator operations to introduce |
| // new buffer allocations. Thereby, the appropriate terminator operand |
| // will be adjusted to point to the newly allocated buffer instead. |
| if (failed(walkReturnOperations(®ion, [&](Operation *terminator) { |
| // Get the actual mutable operands for this terminator op. |
| auto terminatorOperands = *getMutableRegionBranchSuccessorOperands( |
| terminator, region.getRegionNumber()); |
| // Extract the source value from the current terminator. |
| // This conversion needs to exist on a separate line due to a bug in |
| // GCC conversion analysis. |
| OperandRange immutableTerminatorOperands = terminatorOperands; |
| Value sourceValue = immutableTerminatorOperands[operandIndex]; |
| // Create a new clone at the current location of the terminator. |
| auto clone = introduceCloneBuffers(sourceValue, terminator); |
| if (failed(clone)) |
| return failure(); |
| // Wire clone and terminator operand. |
| terminatorOperands.slice(operandIndex, 1).assign(*clone); |
| return success(); |
| }))) |
| return failure(); |
| } |
| return success(); |
| } |
| |
| /// Creates a new memory allocation for the given source value and clones |
| /// its content into the newly allocated buffer. The terminator operation is |
| /// used to insert the clone operation at the right place. |
| FailureOr<Value> introduceCloneBuffers(Value sourceValue, |
| Operation *terminator) { |
| // Avoid multiple clones of the same source value. This can happen in the |
| // presence of loops when a branch acts as a backedge while also having |
| // another successor that returns to its parent operation. Note: that |
| // copying copied buffers can introduce memory leaks since the invariant of |
| // BufferDeallocation assumes that a buffer will be only cloned once into a |
| // temporary buffer. Hence, the construction of clone chains introduces |
| // additional allocations that are not tracked automatically by the |
| // algorithm. |
| if (clonedValues.contains(sourceValue)) |
| return sourceValue; |
| // Create a new clone operation that copies the contents of the old |
| // buffer to the new one. |
| auto clone = buildClone(terminator, sourceValue); |
| if (succeeded(clone)) { |
| // Remember the clone of original source value. |
| clonedValues.insert(*clone); |
| } |
| return clone; |
| } |
| |
| /// Finds correct dealloc positions according to the algorithm described at |
| /// the top of the file for all alloc nodes and block arguments that can be |
| /// handled by this analysis. |
| LogicalResult placeDeallocs() { |
| // Move or insert deallocs using the previously computed information. |
| // These deallocations will be linked to their associated allocation nodes |
| // since they don't have any aliases that can (potentially) increase their |
| // liveness. |
| for (const BufferPlacementAllocs::AllocEntry &entry : allocs) { |
| Value alloc = std::get<0>(entry); |
| auto aliasesSet = aliases.resolve(alloc); |
| assert(!aliasesSet.empty() && "must contain at least one alias"); |
| |
| // Determine the actual block to place the dealloc and get liveness |
| // information. |
| Block *placementBlock = |
| findCommonDominator(alloc, aliasesSet, postDominators); |
| const LivenessBlockInfo *livenessInfo = |
| liveness.getLiveness(placementBlock); |
| |
| // We have to ensure that the dealloc will be after the last use of all |
| // aliases of the given value. We first assume that there are no uses in |
| // the placementBlock and that we can safely place the dealloc at the |
| // beginning. |
| Operation *endOperation = &placementBlock->front(); |
| |
| // Iterate over all aliases and ensure that the endOperation will point |
| // to the last operation of all potential aliases in the placementBlock. |
| for (Value alias : aliasesSet) { |
| // Ensure that the start operation is at least the defining operation of |
| // the current alias to avoid invalid placement of deallocs for aliases |
| // without any uses. |
| Operation *beforeOp = endOperation; |
| if (alias.getDefiningOp() && |
| !(beforeOp = placementBlock->findAncestorOpInBlock( |
| *alias.getDefiningOp()))) |
| continue; |
| |
| Operation *aliasEndOperation = |
| livenessInfo->getEndOperation(alias, beforeOp); |
| // Check whether the aliasEndOperation lies in the desired block and |
| // whether it is behind the current endOperation. If yes, this will be |
| // the new endOperation. |
| if (aliasEndOperation->getBlock() == placementBlock && |
| endOperation->isBeforeInBlock(aliasEndOperation)) |
| endOperation = aliasEndOperation; |
| } |
| // endOperation is the last operation behind which we can safely store |
| // the dealloc taking all potential aliases into account. |
| |
| // If there is an existing dealloc, move it to the right place. |
| Operation *deallocOperation = std::get<1>(entry); |
| if (deallocOperation) { |
| deallocOperation->moveAfter(endOperation); |
| } else { |
| // If the Dealloc position is at the terminator operation of the |
| // block, then the value should escape from a deallocation. |
| Operation *nextOp = endOperation->getNextNode(); |
| if (!nextOp) |
| continue; |
| // If there is no dealloc node, insert one in the right place. |
| if (failed(buildDealloc(nextOp, alloc))) |
| return failure(); |
| } |
| } |
| return success(); |
| } |
| |
| /// Builds a deallocation operation compatible with the given allocation |
| /// value. If there is no registered AllocationOpInterface implementation for |
| /// the given value (e.g. in the case of a function parameter), this method |
| /// builds a memref::DeallocOp. |
| LogicalResult buildDealloc(Operation *op, Value alloc) { |
| OpBuilder builder(op); |
| auto it = aliasToAllocations.find(alloc); |
| if (it != aliasToAllocations.end()) { |
| // Call the allocation op interface to build a supported and |
| // compatible deallocation operation. |
| auto dealloc = it->second.buildDealloc(builder, alloc); |
| if (!dealloc) |
| return op->emitError() |
| << "allocations without compatible deallocations are " |
| "not supported"; |
| } else { |
| // Build a "default" DeallocOp for unknown allocation sources. |
| builder.create<memref::DeallocOp>(alloc.getLoc(), alloc); |
| } |
| return success(); |
| } |
| |
| /// Builds a clone operation compatible with the given allocation value. If |
| /// there is no registered AllocationOpInterface implementation for the given |
| /// value (e.g. in the case of a function parameter), this method builds a |
| /// bufferization::CloneOp. |
| FailureOr<Value> buildClone(Operation *op, Value alloc) { |
| OpBuilder builder(op); |
| auto it = aliasToAllocations.find(alloc); |
| if (it != aliasToAllocations.end()) { |
| // Call the allocation op interface to build a supported and |
| // compatible clone operation. |
| auto clone = it->second.buildClone(builder, alloc); |
| if (clone) |
| return *clone; |
| return (LogicalResult)(op->emitError() |
| << "allocations without compatible clone ops " |
| "are not supported"); |
| } |
| // Build a "default" CloneOp for unknown allocation sources. |
| return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc) |
| .getResult(); |
| } |
| |
| /// The dominator info to find the appropriate start operation to move the |
| /// allocs. |
| DominanceInfo dominators; |
| |
| /// The post dominator info to move the dependent allocs in the right |
| /// position. |
| PostDominanceInfo postDominators; |
| |
| /// Stores already cloned buffers to avoid additional clones of clones. |
| ValueSetT clonedValues; |
| |
| /// Maps aliases to their source allocation interfaces (inverse mapping). |
| AliasAllocationMapT aliasToAllocations; |
| }; |
| |
| //===----------------------------------------------------------------------===// |
| // BufferDeallocationPass |
| //===----------------------------------------------------------------------===// |
| |
| struct DefaultAllocationInterface |
| : public bufferization::AllocationOpInterface::ExternalModel< |
| DefaultAllocationInterface, memref::AllocOp> { |
| static Optional<Operation *> buildDealloc(OpBuilder &builder, Value alloc) { |
| return builder.create<memref::DeallocOp>(alloc.getLoc(), alloc) |
| .getOperation(); |
| } |
| static Optional<Value> buildClone(OpBuilder &builder, Value alloc) { |
| return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc) |
| .getResult(); |
| } |
| }; |
| |
| /// The actual buffer deallocation pass that inserts and moves dealloc nodes |
| /// into the right positions. Furthermore, it inserts additional clones if |
| /// necessary. It uses the algorithm described at the top of the file. |
| struct BufferDeallocationPass : BufferDeallocationBase<BufferDeallocationPass> { |
| void getDependentDialects(DialectRegistry ®istry) const override { |
| registry.insert<bufferization::BufferizationDialect>(); |
| registry.insert<memref::MemRefDialect>(); |
| registry.addOpInterface<memref::AllocOp, DefaultAllocationInterface>(); |
| } |
| |
| void runOnFunction() override { |
| // Ensure that there are supported loops only. |
| FuncOp func = getFunction(); |
| Backedges backedges(func); |
| if (backedges.size()) { |
| func.emitError("Only structured control-flow loops are supported."); |
| return signalPassFailure(); |
| } |
| |
| // Check that the control flow structures are supported. |
| if (!validateSupportedControlFlow(func.getRegion())) |
| return signalPassFailure(); |
| |
| // Gather all required allocation nodes and prepare the deallocation phase. |
| BufferDeallocation deallocation(func); |
| |
| // Check for supported AllocationOpInterface implementations and prepare the |
| // internal deallocation pass. |
| if (failed(deallocation.prepare())) |
| return signalPassFailure(); |
| |
| // Place all required temporary clone and dealloc nodes. |
| if (failed(deallocation.deallocate())) |
| return signalPassFailure(); |
| } |
| }; |
| |
| } // end anonymous namespace |
| |
| //===----------------------------------------------------------------------===// |
| // BufferDeallocationPass construction |
| //===----------------------------------------------------------------------===// |
| |
| std::unique_ptr<Pass> mlir::bufferization::createBufferDeallocationPass() { |
| return std::make_unique<BufferDeallocationPass>(); |
| } |