mlir/lib/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation.cpp - llvm-project - Git at Google

 //===- OwnershipBasedBufferDeallocation.cpp - impl. for buffer dealloc. ---===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file implements logic for computing correct `bufferization.dealloc`
 // positions. Furthermore, buffer deallocation also adds required new clone
 // operations to ensure that memrefs returned by functions never alias an
 // argument.
 //
 // TODO:
 // The current implementation does not support explicit-control-flow loops and
 // the resulting code will be invalid with respect to program semantics.
 // However, structured control-flow loops are fully supported.
 //
 //===----------------------------------------------------------------------===//

 #include "mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/IR/Iterators.h"
 #include "mlir/Interfaces/ControlFlowInterfaces.h"

 namespace mlir {
 namespace bufferization {
 #define GEN_PASS_DEF_OWNERSHIPBASEDBUFFERDEALLOCATIONPASS
 #include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc"
 } // namespace bufferization
 } // namespace mlir

 using namespace mlir;
 using namespace mlir::bufferization;

 //===----------------------------------------------------------------------===//
 // Helpers
 //===----------------------------------------------------------------------===//

 static Value buildBoolValue(OpBuilder &builder, Location loc, bool value) {
   return arith::ConstantOp::create(builder, loc, builder.getBoolAttr(value));
 }

 static bool isMemref(Value v) { return isa<BaseMemRefType>(v.getType()); }

 /// Return "true" if the given op is guaranteed to have neither "Allocate" nor
 /// "Free" side effects.
 static bool hasNeitherAllocateNorFreeSideEffect(Operation *op) {
   return !mightHaveEffect<MemoryEffects::Allocate>(op) &&
          !mightHaveEffect<MemoryEffects::Free>(op);
 }

 /// Return "true" if the given op has buffer semantics. I.e., it has buffer
 /// operands, buffer results and/or buffer region entry block arguments.
 static bool hasBufferSemantics(Operation *op) {
   if (llvm::any_of(op->getOperands(), isMemref) ||
       llvm::any_of(op->getResults(), isMemref))
     return true;
   for (Region &region : op->getRegions())
     if (!region.empty())
       if (llvm::any_of(region.front().getArguments(), isMemref))
         return true;
   return false;
 }

 //===----------------------------------------------------------------------===//
 // Backedges analysis
 //===----------------------------------------------------------------------===//

 namespace {

 /// A straight-forward program analysis which detects loop backedges induced by
 /// explicit control flow.
 class Backedges {
 public:
   using BlockSetT = SmallPtrSet<Block *, 16>;
   using BackedgeSetT = llvm::DenseSet<std::pair<Block *, Block *>>;

 public:
   /// Constructs a new backedges analysis using the op provided.
   Backedges(Operation *op) { recurse(op); }

   /// Returns the number of backedges formed by explicit control flow.
   size_t size() const { return edgeSet.size(); }

   /// Returns the start iterator to loop over all backedges.
   BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); }

   /// Returns the end iterator to loop over all backedges.
   BackedgeSetT::const_iterator end() const { return edgeSet.end(); }

 private:
   /// Enters the current block and inserts a backedge into the `edgeSet` if we
   /// have already visited the current block. The inserted edge links the given
   /// `predecessor` with the `current` block.
   bool enter(Block &current, Block *predecessor) {
     bool inserted = visited.insert(&current).second;
     if (!inserted)
       edgeSet.insert(std::make_pair(predecessor, &current));
     return inserted;
   }

   /// Leaves the current block.
   void exit(Block &current) { visited.erase(&current); }

   /// Recurses into the given operation while taking all attached regions into
   /// account.
   void recurse(Operation *op) {
     Block *current = op->getBlock();
     // If the current op implements the `BranchOpInterface`, there can be
     // cycles in the scope of all successor blocks.
     if (isa<BranchOpInterface>(op)) {
       for (Block *succ : current->getSuccessors())
         recurse(*succ, current);
     }
     // Recurse into all distinct regions and check for explicit control-flow
     // loops.
     for (Region &region : op->getRegions()) {
       if (!region.empty())
         recurse(region.front(), current);
     }
   }

   /// Recurses into explicit control-flow structures that are given by
   /// the successor relation defined on the block level.
   void recurse(Block &block, Block *predecessor) {
     // Try to enter the current block. If this is not possible, we are
     // currently processing this block and can safely return here.
     if (!enter(block, predecessor))
       return;

     // Recurse into all operations and successor blocks.
     for (Operation &op : block.getOperations())
       recurse(&op);

     // Leave the current block.
     exit(block);
   }

   /// Stores all blocks that are currently visited and on the processing stack.
   BlockSetT visited;

   /// Stores all backedges in the format (source, target).
   BackedgeSetT edgeSet;
 };

 } // namespace

 //===----------------------------------------------------------------------===//
 // BufferDeallocation
 //===----------------------------------------------------------------------===//

 namespace {
 /// The buffer deallocation transformation which ensures that all allocs in the
 /// program have a corresponding de-allocation.
 class BufferDeallocation {
 public:
   BufferDeallocation(Operation *op, DeallocationOptions options,
                      SymbolTableCollection &symbolTables)
       : state(op, symbolTables), options(options) {}

   /// Performs the actual placement/creation of all dealloc operations.
   LogicalResult deallocate(FunctionOpInterface op);

 private:
   /// The base case for the recursive template below.
   template <typename... T>
   typename std::enable_if<sizeof...(T) == 0, FailureOr<Operation *>>::type
   handleOp(Operation *op) {
     return op;
   }

   /// Applies all the handlers of the interfaces in the template list
   /// implemented by 'op'. In particular, if an operation implements more than
   /// one of the interfaces in the template list, all the associated handlers
   /// will be applied to the operation in the same order as the template list
   /// specifies. If a handler reports a failure or removes the operation without
   /// replacement (indicated by returning 'nullptr'), no further handlers are
   /// applied and the return value is propagated to the caller of 'handleOp'.
   ///
   /// The interface handlers job is to update the deallocation state, most
   /// importantly the ownership map and list of memrefs to potentially be
   /// deallocated per block, but also to insert `bufferization.dealloc`
   /// operations where needed. Obviously, no MemRefs that may be used at a later
   /// point in the control-flow may be deallocated and the ownership map has to
   /// be updated to reflect potential ownership changes caused by the dealloc
   /// operation (e.g., if two interfaces on the same op insert a dealloc
   /// operation each, the second one should query the ownership map and use them
   /// as deallocation condition such that MemRefs already deallocated in the
   /// first dealloc operation are not deallocated a second time (double-free)).
   /// Note that currently only the interfaces on terminators may insert dealloc
   /// operations and it is verified as a precondition that a terminator op must
   /// implement exactly one of the interfaces handling dealloc insertion.
   ///
   /// The return value of the 'handleInterface' functions should be a
   /// FailureOr<Operation *> indicating whether there was a failure or otherwise
   /// returning the operation itself or a replacement operation.
   ///
   /// Note: The difference compared to `TypeSwitch` is that all
   /// matching cases are applied instead of just the first match.
   template <typename InterfaceT, typename... InterfacesU>
   FailureOr<Operation *> handleOp(Operation *op) {
     Operation *next = op;
     if (auto concreteOp = dyn_cast<InterfaceT>(op)) {
       FailureOr<Operation *> result = handleInterface(concreteOp);
       if (failed(result))
         return failure();
       next = *result;
     }
     if (!next)
       return FailureOr<Operation *>(nullptr);
     return handleOp<InterfacesU...>(next);
   }

   /// Apply all supported interface handlers to the given op.
   FailureOr<Operation *> handleAllInterfaces(Operation *op) {
     if (auto deallocOpInterface = dyn_cast<BufferDeallocationOpInterface>(op))
       return deallocOpInterface.process(state, options);

     if (failed(verifyOperationPreconditions(op)))
       return failure();

     return handleOp<MemoryEffectOpInterface, RegionBranchOpInterface,
                     CallOpInterface, BranchOpInterface,
                     RegionBranchTerminatorOpInterface>(op);
   }

   /// Make sure that for each forwarded MemRef value, an ownership indicator
   /// `i1` value is forwarded as well such that the successor block knows
   /// whether the MemRef has to be deallocated.
   ///
   /// Example:
   /// ```
   /// ^bb1:
   ///   <more ops...>
   ///   cf.br ^bb2(<forward-to-bb2>)
   /// ```
   /// becomes
   /// ```
   /// // let (m, c) = getMemrefsAndConditionsToDeallocate(bb1)
   /// // let r = getMemrefsToRetain(bb1, bb2, <forward-to-bb2>)
   /// ^bb1:
   ///   <more ops...>
   ///   o = bufferization.dealloc m if c retain r
   ///   // replace ownership(r) with o element-wise
   ///   cf.br ^bb2(<forward-to-bb2>, o)
   /// ```
   FailureOr<Operation *> handleInterface(BranchOpInterface op);

   /// Add an ownership indicator for every forwarding MemRef operand and result.
   /// Nested regions never take ownership of MemRefs owned by a parent region
   /// (neither via forwarding operand nor when captured implicitly when the
   /// region is not isolated from above). Ownerships will only be passed to peer
   /// regions (when an operation has multiple regions, such as scf.while), or to
   /// parent regions.
   /// Note that the block arguments in the nested region are currently handled
   /// centrally in the 'dealloc' function, but better interface support could
   /// allow us to do this here for the nested region specifically to reduce the
   /// amount of assumptions we make on the structure of ops implementing this
   /// interface.
   ///
   /// Example:
   /// ```
   /// %ret = scf.for %i = %c0 to %c10 step %c1 iter_args(%m = %memref) {
   ///   <more ops...>
   ///   scf.yield %m : memref<2xi32>, i1
   /// }
   /// ```
   /// becomes
   /// ```
   /// %ret:2 = scf.for %i = %c0 to %c10 step %c1
   ///     iter_args(%m = %memref, %own = %false) {
   ///   <more ops...>
   ///   // Note that the scf.yield is handled by the
   ///   // RegionBranchTerminatorOpInterface (not this handler)
   ///   // let o = getMemrefWithUniqueOwnership(%own)
   ///   scf.yield %m, o : memref<2xi32>, i1
   /// }
   /// ```
   FailureOr<Operation *> handleInterface(RegionBranchOpInterface op);

   /// If the private-function-dynamic-ownership pass option is enabled and the
   /// called function is private, additional results are added for each MemRef
   /// result to pass the dynamic ownership indicator along. Otherwise, updates
   /// the ownership map and list of memrefs to be deallocated according to the
   /// function boundary ABI, i.e., assume ownership of all returned MemRefs.
   ///
   /// Example (assume `private-function-dynamic-ownership` is enabled):
   /// ```
   /// func.func @f(%arg0: memref<2xi32>) -> memref<2xi32> {...}
   /// func.func private @g(%arg0: memref<2xi32>) -> memref<2xi32> {...}
   ///
   /// %ret_f = func.call @f(%memref) : (memref<2xi32>) -> memref<2xi32>
   /// %ret_g = func.call @g(%memref) : (memref<2xi32>) -> memref<2xi32>
   /// ```
   /// becomes
   /// ```
   /// func.func @f(%arg0: memref<2xi32>) -> memref<2xi32> {...}
   /// func.func private @g(%arg0: memref<2xi32>) -> (memref<2xi32>, i1) {...}
   ///
   /// %ret_f = func.call @f(%memref) : (memref<2xi32>) -> memref<2xi32>
   /// // set ownership(%ret_f) := true
   /// // remember to deallocate %ret_f
   ///
   /// %ret_g:2 = func.call @g(%memref) : (memref<2xi32>) -> (memref<2xi32>, i1)
   /// // set ownership(%ret_g#0) := %ret_g#1
   /// // remember to deallocate %ret_g if it comes with ownership
   /// ```
   FailureOr<Operation *> handleInterface(CallOpInterface op);

   /// Takes care of allocation and free side-effects. It collects allocated
   /// MemRefs that we have to add to manually deallocate, but also removes
   /// values again that are already deallocated before the end of the block. It
   /// also updates the ownership map accordingly.
   ///
   /// Example:
   /// ```
   /// %alloc = memref.alloc()
   /// %alloca = memref.alloca()
   /// ```
   /// becomes
   /// ```
   /// %alloc = memref.alloc()
   /// %alloca = memref.alloca()
   /// // set ownership(alloc) := true
   /// // set ownership(alloca) := false
   /// // remember to deallocate %alloc
   /// ```
   FailureOr<Operation *> handleInterface(MemoryEffectOpInterface op);

   /// Takes care that the function boundary ABI is adhered to if the parent
   /// operation implements FunctionOpInterface, inserting a
   /// `bufferization.clone` if necessary, and inserts the
   /// `bufferization.dealloc` operation according to the ops operands.
   ///
   /// Example:
   /// ```
   /// ^bb1:
   ///   <more ops...>
   ///   func.return <return-vals>
   /// ```
   /// becomes
   /// ```
   /// // let (m, c) = getMemrefsAndConditionsToDeallocate(bb1)
   /// // let r = getMemrefsToRetain(bb1, nullptr, <return-vals>)
   /// ^bb1:
   ///   <more ops...>
   ///   o = bufferization.dealloc m if c retain r
   ///   func.return <return-vals>
   ///     (if !isFunctionWithoutDynamicOwnership: append o)
   /// ```
   FailureOr<Operation *> handleInterface(RegionBranchTerminatorOpInterface op);

   /// Construct a new operation which is exactly the same as the passed 'op'
   /// except that the OpResults list is appended by new results of the passed
   /// 'types'.
   /// TODO: ideally, this would be implemented using an OpInterface because it
   /// is used to append function results, loop iter_args, etc. and thus makes
   /// some assumptions that the variadic list of those is at the end of the
   /// OpResults range.
   Operation *appendOpResults(Operation *op, ArrayRef<Type> types);

   /// A convenience template for the generic 'appendOpResults' function above to
   /// avoid manual casting of the result.
   template <typename OpTy>
   OpTy appendOpResults(OpTy op, ArrayRef<Type> types) {
     return cast<OpTy>(appendOpResults(op.getOperation(), types));
   }

   /// Performs deallocation of a single basic block. This is a private function
   /// because some internal data structures have to be set up beforehand and
   /// this function has to be called on blocks in a region in dominance order.
   LogicalResult deallocate(Block *block);

   /// After all relevant interfaces of an operation have been processed by the
   /// 'handleInterface' functions, this function sets the ownership of operation
   /// results that have not been set yet by the 'handleInterface' functions. It
   /// generally assumes that each result can alias with every operand of the
   /// operation, if there are MemRef typed results but no MemRef operands it
   /// assigns 'false' as ownership. This happens, e.g., for the
   /// memref.get_global operation. It would also be possible to query some alias
   /// analysis to get more precise ownerships, however, the analysis would have
   /// to be updated according to the IR modifications this pass performs (e.g.,
   /// re-building operations to have more result values, inserting clone
   /// operations, etc.).
   void populateRemainingOwnerships(Operation *op);

   /// Given an SSA value of MemRef type, returns the same of a new SSA value
   /// which has 'Unique' ownership where the ownership indicator is guaranteed
   /// to be always 'true'.
   Value materializeMemrefWithGuaranteedOwnership(OpBuilder &builder,
                                                  Value memref, Block *block);

   /// Returns whether the given operation implements FunctionOpInterface, has
   /// private visibility, and the private-function-dynamic-ownership pass option
   /// is enabled.
   bool isFunctionWithoutDynamicOwnership(Operation *op);

   /// Given an SSA value of MemRef type, this function queries the
   /// BufferDeallocationOpInterface of the defining operation of 'memref' for a
   /// materialized ownership indicator for 'memref'.  If the op does not
   /// implement the interface or if the block for which the materialized value
   /// is requested does not match the block in which 'memref' is defined, the
   /// default implementation in
   /// `DeallocationState::getMemrefWithUniqueOwnership` is queried instead.
   std::pair<Value, Value>
   materializeUniqueOwnership(OpBuilder &builder, Value memref, Block *block);

   /// Checks all the preconditions for operations implementing the
   /// FunctionOpInterface that have to hold for the deallocation to be
   /// applicable:
   /// (1) Checks that there are not explicit control flow loops.
   static LogicalResult verifyFunctionPreconditions(FunctionOpInterface op);

   /// Checks all the preconditions for operations inside the region of
   /// operations implementing the FunctionOpInterface that have to hold for the
   /// deallocation to be applicable:
   /// (1) Checks if all operations that have at least one attached region
   /// implement the RegionBranchOpInterface. This is not required in edge cases,
   /// where we have a single attached region and the parent operation has no
   /// results.
   /// (2) Checks that no deallocations already exist. Especially deallocations
   /// in nested regions are not properly supported yet since this requires
   /// ownership of the memref to be transferred to the nested region, which does
   /// not happen by default.  This constrained can be lifted in the future.
   /// (3) Checks that terminators with more than one successor except
   /// `cf.cond_br` are not present and that either BranchOpInterface or
   /// RegionBranchTerminatorOpInterface is implemented.
   static LogicalResult verifyOperationPreconditions(Operation *op);

   /// When the 'private-function-dynamic-ownership' pass option is enabled,
   /// additional `i1` return values are added for each MemRef result in the
   /// function signature. This function takes care of updating the
   /// `function_type` attribute of the function according to the actually
   /// returned values from the terminators.
   static LogicalResult updateFunctionSignature(FunctionOpInterface op);

 private:
   ///  Collects all analysis state and including liveness, caches, ownerships of
   ///  already processed values and operations, and the MemRefs that have to be
   ///  deallocated at the end of each block.
   DeallocationState state;

   /// Collects all pass options in a single place.
   DeallocationOptions options;
 };

 } // namespace

 //===----------------------------------------------------------------------===//
 // BufferDeallocation Implementation
 //===----------------------------------------------------------------------===//

 std::pair<Value, Value>
 BufferDeallocation::materializeUniqueOwnership(OpBuilder &builder, Value memref,
                                                Block *block) {
   // The interface can only materialize ownership indicators in the same block
   // as the defining op.
   if (memref.getParentBlock() != block)
     return state.getMemrefWithUniqueOwnership(builder, memref, block);

   Operation *owner = memref.getDefiningOp();
   if (!owner)
     owner = memref.getParentBlock()->getParentOp();

   // If the op implements the interface, query it for a materialized ownership
   // value.
   if (auto deallocOpInterface = dyn_cast<BufferDeallocationOpInterface>(owner))
     return deallocOpInterface.materializeUniqueOwnershipForMemref(
         state, options, builder, memref);

   // Otherwise use the default implementation.
   return state.getMemrefWithUniqueOwnership(builder, memref, block);
 }

 LogicalResult
 BufferDeallocation::verifyFunctionPreconditions(FunctionOpInterface op) {
   // (1) Ensure that there are supported loops only (no explicit control flow
   // loops).
   Backedges backedges(op);
   if (backedges.size()) {
     op->emitError("Only structured control-flow loops are supported.");
     return failure();
   }

   return success();
 }

 LogicalResult BufferDeallocation::verifyOperationPreconditions(Operation *op) {
   // We do not care about ops that do not operate on buffers and have no
   // Allocate/Free side effect.
   if (!hasBufferSemantics(op) && hasNeitherAllocateNorFreeSideEffect(op))
     return success();

   // (1) The pass does not work properly when deallocations are already present.
   // Alternatively, we could also remove all deallocations as a pre-pass.
   if (isa<DeallocOp>(op))
     return op->emitError(
         "No deallocation operations must be present when running this pass!");

   // (2) Memory side effects of unregistered ops are unknown. In particular, we
   // do not know whether an unregistered op allocates memory or not.
   // - Ops with recursive memory effects are allowed. All nested ops in the
   //   regions of `op` will be analyzed separately.
   // - Call ops are allowed even though they typically do not implement the
   //   MemoryEffectOpInterface. They usually do not have side effects apart
   //   from the callee, which will be analyzed separately. (This is similar to
   //   "recursive memory effects".)
   if (hasUnknownEffects(op) && !isa<CallOpInterface>(op))
     return op->emitError(
         "ops with unknown memory side effects are not supported");

   // (3) Check that the control flow structures are supported.
   auto regions = op->getRegions();
   // Check that if the operation has at
   // least one region it implements the RegionBranchOpInterface. If there
   // is an operation that does not fulfill this condition, we cannot apply
   // the deallocation steps. Furthermore, we accept cases, where we have a
   // region that returns no results, since, in that case, the intra-region
   // control flow does not affect the transformation.
   size_t size = regions.size();
   if (((size == 1 && !op->getResults().empty()) || size > 1) &&
       !dyn_cast<RegionBranchOpInterface>(op)) {
     return op->emitError("All operations with attached regions need to "
                          "implement the RegionBranchOpInterface.");
   }

   // (3) Check that terminators with more than one successor except `cf.cond_br`
   // are not present and that either BranchOpInterface or
   // RegionBranchTerminatorOpInterface is implemented.
   if (op->hasTrait<OpTrait::NoTerminator>())
     return op->emitError("NoTerminator trait is not supported");

   if (op->hasTrait<OpTrait::IsTerminator>()) {
     // Either one of those interfaces has to be implemented on terminators, but
     // not both.
     if (!isa<BranchOpInterface, RegionBranchTerminatorOpInterface>(op) ||
         (isa<BranchOpInterface>(op) &&
          isa<RegionBranchTerminatorOpInterface>(op)))

       return op->emitError(
           "Terminators must implement either BranchOpInterface or "
           "RegionBranchTerminatorOpInterface (but not both)!");

     // We only support terminators with 0 or 1 successors for now and
     // special-case the conditional branch op.
     if (op->getSuccessors().size() > 1)

       return op->emitError("Terminators with more than one successor "
                            "are not supported!");
   }

   return success();
 }

 LogicalResult
 BufferDeallocation::updateFunctionSignature(FunctionOpInterface op) {
   SmallVector<TypeRange> returnOperandTypes(llvm::map_range(
       op.getFunctionBody().getOps<RegionBranchTerminatorOpInterface>(),
       [](RegionBranchTerminatorOpInterface op) {
         return op.getSuccessorOperands(RegionBranchPoint::parent()).getTypes();
       }));
   if (!llvm::all_equal(returnOperandTypes))
     return op->emitError(
         "there are multiple return operations with different operand types");

   TypeRange resultTypes = op.getResultTypes();
   // Check if we found a return operation because that doesn't necessarily
   // always have to be the case, e.g., consider a function with one block that
   // has a cf.br at the end branching to itself again (i.e., an infinite loop).
   // In that case we don't want to crash but just not update the return types.
   if (!returnOperandTypes.empty())
     resultTypes = returnOperandTypes[0];

   op.setFunctionTypeAttr(TypeAttr::get(FunctionType::get(
       op->getContext(), op.getFunctionBody().front().getArgumentTypes(),
       resultTypes)));

   return success();
 }

 LogicalResult BufferDeallocation::deallocate(FunctionOpInterface op) {
   // Stop and emit a proper error message if we don't support the input IR.
   if (failed(verifyFunctionPreconditions(op)))
     return failure();

   // Process the function block by block.
   auto result = op->walk<WalkOrder::PostOrder, ForwardDominanceIterator<>>(
       [&](Block *block) {
         if (failed(deallocate(block)))
           return WalkResult::interrupt();
         return WalkResult::advance();
       });
   if (result.wasInterrupted())
     return failure();

   // Update the function signature if the function is private, dynamic ownership
   // is enabled, and the function has memrefs as arguments or results.
   return updateFunctionSignature(op);
 }

 LogicalResult BufferDeallocation::deallocate(Block *block) {
   OpBuilder builder = OpBuilder::atBlockBegin(block);

   // Compute liveness transfers of ownership to this block.
   SmallVector<Value> liveMemrefs;
   state.getLiveMemrefsIn(block, liveMemrefs);
   for (auto li : liveMemrefs) {
     // Ownership of implicitly captured memrefs from other regions is never
     // taken, but ownership of memrefs in the same region (but different block)
     // is taken.
     if (li.getParentRegion() == block->getParent()) {
       state.updateOwnership(li, state.getOwnership(li, li.getParentBlock()),
                             block);
       state.addMemrefToDeallocate(li, block);
       continue;
     }

     if (li.getParentRegion()->isProperAncestor(block->getParent())) {
       Value falseVal = buildBoolValue(builder, li.getLoc(), false);
       state.updateOwnership(li, falseVal, block);
     }
   }

   for (unsigned i = 0, e = block->getNumArguments(); i < e; ++i) {
     BlockArgument arg = block->getArgument(i);
     if (!isMemref(arg))
       continue;

     // Adhere to function boundary ABI: no ownership of function argument
     // MemRefs is taken.
     if (isa<FunctionOpInterface>(block->getParentOp()) &&
         block->isEntryBlock()) {
       Value newArg = buildBoolValue(builder, arg.getLoc(), false);
       state.updateOwnership(arg, newArg);
       state.addMemrefToDeallocate(arg, block);
       continue;
     }

     // Pass MemRef ownerships along via `i1` values.
     Value newArg = block->addArgument(builder.getI1Type(), arg.getLoc());
     state.updateOwnership(arg, newArg);
     state.addMemrefToDeallocate(arg, block);
   }

   // For each operation in the block, handle the interfaces that affect aliasing
   // and ownership of memrefs.
   for (Operation &op : llvm::make_early_inc_range(*block)) {
     FailureOr<Operation *> result = handleAllInterfaces(&op);
     if (failed(result))
       return failure();
     if (!*result)
       continue;

     populateRemainingOwnerships(*result);
   }

   // TODO: if block has no terminator, handle dealloc insertion here.
   return success();
 }

 Operation *BufferDeallocation::appendOpResults(Operation *op,
                                                ArrayRef<Type> types) {
   SmallVector<Type> newTypes(op->getResultTypes());
   newTypes.append(types.begin(), types.end());
   auto *newOp = Operation::create(op->getLoc(), op->getName(), newTypes,
                                   op->getOperands(), op->getAttrDictionary(),
                                   op->getPropertiesStorage(),
                                   op->getSuccessors(), op->getNumRegions());
   for (auto [oldRegion, newRegion] :
        llvm::zip(op->getRegions(), newOp->getRegions()))
     newRegion.takeBody(oldRegion);

   OpBuilder(op).insert(newOp);
   op->replaceAllUsesWith(newOp->getResults().take_front(op->getNumResults()));
   op->erase();

   return newOp;
 }

 FailureOr<Operation *>
 BufferDeallocation::handleInterface(RegionBranchOpInterface op) {
   OpBuilder builder = OpBuilder::atBlockBegin(op->getBlock());

   // TODO: the RegionBranchOpInterface does not provide all the necessary
   // methods to perform this transformation without additional assumptions on
   // the structure. In particular, that
   // * additional values to be passed to the next region can be added to the end
   //   of the operand list, the end of the block argument list, and the end of
   //   the result value list. However, it seems to be the general guideline for
   //   operations implementing this interface to follow this structure.
   // * and that the block arguments and result values match the forwarded
   //   operands one-to-one (i.e., that there are no other values appended to the
   //   front).
   // These assumptions are satisfied by the `scf.if`, `scf.for`, and `scf.while`
   // operations.

   SmallVector<RegionSuccessor> regions;
   op.getSuccessorRegions(RegionBranchPoint::parent(), regions);
   assert(!regions.empty() && "Must have at least one successor region");
   SmallVector<Value> entryOperands(
       op.getEntrySuccessorOperands(regions.front()));
   unsigned numMemrefOperands = llvm::count_if(entryOperands, isMemref);

   // No ownership is acquired for any MemRefs that are passed to the region from
   // the outside.
   Value falseVal = buildBoolValue(builder, op.getLoc(), false);
   op->insertOperands(op->getNumOperands(),
                      SmallVector<Value>(numMemrefOperands, falseVal));

   int counter = op->getNumResults();
   unsigned numMemrefResults = llvm::count_if(op->getResults(), isMemref);
   SmallVector<Type> ownershipResults(numMemrefResults, builder.getI1Type());
   RegionBranchOpInterface newOp = appendOpResults(op, ownershipResults);

   for (auto result : llvm::make_filter_range(newOp->getResults(), isMemref)) {
     state.updateOwnership(result, newOp->getResult(counter++));
     state.addMemrefToDeallocate(result, newOp->getBlock());
   }

   return newOp.getOperation();
 }

 Value BufferDeallocation::materializeMemrefWithGuaranteedOwnership(
     OpBuilder &builder, Value memref, Block *block) {
   // First, make sure we at least have 'Unique' ownership already.
   std::pair<Value, Value> newMemrefAndOnwership =
       materializeUniqueOwnership(builder, memref, block);
   Value newMemref = newMemrefAndOnwership.first;
   Value condition = newMemrefAndOnwership.second;

   // Avoid inserting additional IR if ownership is already guaranteed. In
   // particular, this is already the case when we had 'Unknown' ownership
   // initially and a clone was inserted to get to 'Unique' ownership.
   if (matchPattern(condition, m_One()))
     return newMemref;

   // Insert a runtime check and only clone if we still don't have ownership at
   // runtime.
   Value maybeClone = scf::IfOp::create(
                          builder, memref.getLoc(), condition,
                          [&](OpBuilder &builder, Location loc) {
                            scf::YieldOp::create(builder, loc, newMemref);
                          },
                          [&](OpBuilder &builder, Location loc) {
                            Value clone = bufferization::CloneOp::create(
                                builder, loc, newMemref);
                            scf::YieldOp::create(builder, loc, clone);
                          })
                          .getResult(0);
   Value trueVal = buildBoolValue(builder, memref.getLoc(), true);
   state.updateOwnership(maybeClone, trueVal);
   state.addMemrefToDeallocate(maybeClone, maybeClone.getParentBlock());
   return maybeClone;
 }

 FailureOr<Operation *>
 BufferDeallocation::handleInterface(BranchOpInterface op) {
   if (op->getNumSuccessors() > 1)
     return op->emitError("BranchOpInterface operations with multiple "
                          "successors are not supported yet");

   if (op->getNumSuccessors() != 1)
     return emitError(op.getLoc(),
                      "only BranchOpInterface operations with exactly "
                      "one successor are supported yet");

   if (op.getSuccessorOperands(0).getProducedOperandCount() > 0)
     return op.emitError("produced operands are not supported");

   // Collect the values to deallocate and retain and use them to create the
   // dealloc operation.
   Block *block = op->getBlock();
   OpBuilder builder(op);
   SmallVector<Value> memrefs, conditions, toRetain;
   if (failed(state.getMemrefsAndConditionsToDeallocate(
           builder, op.getLoc(), block, memrefs, conditions)))
     return failure();

   OperandRange forwardedOperands =
       op.getSuccessorOperands(0).getForwardedOperands();
   state.getMemrefsToRetain(block, op->getSuccessor(0), forwardedOperands,
                            toRetain);

   auto deallocOp = bufferization::DeallocOp::create(
       builder, op.getLoc(), memrefs, conditions, toRetain);

   // We want to replace the current ownership of the retained values with the
   // result values of the dealloc operation as they are always unique.
   state.resetOwnerships(deallocOp.getRetained(), block);
   for (auto [retained, ownership] :
        llvm::zip(deallocOp.getRetained(), deallocOp.getUpdatedConditions())) {
     state.updateOwnership(retained, ownership, block);
   }

   unsigned numAdditionalReturns = llvm::count_if(forwardedOperands, isMemref);
   SmallVector<Value> newOperands(forwardedOperands);
   auto additionalConditions =
       deallocOp.getUpdatedConditions().take_front(numAdditionalReturns);
   newOperands.append(additionalConditions.begin(), additionalConditions.end());
   op.getSuccessorOperands(0).getMutableForwardedOperands().assign(newOperands);

   return op.getOperation();
 }

 FailureOr<Operation *> BufferDeallocation::handleInterface(CallOpInterface op) {
   OpBuilder builder(op);

   // Lookup the function operation and check if it has private visibility. If
   // the function is referenced by SSA value instead of a Symbol, it's assumed
   // to be public. (And we cannot easily change the type of the SSA value
   // anyway.)
   Operation *funcOp = op.resolveCallableInTable(state.getSymbolTable());
   bool isPrivate = false;
   if (auto symbol = dyn_cast_or_null<SymbolOpInterface>(funcOp))
     isPrivate = symbol.isPrivate() && !symbol.isDeclaration();

   // If the private-function-dynamic-ownership option is enabled and we are
   // calling a private function, we need to add an additional `i1` result for
   // each MemRef result to dynamically pass the current ownership indicator
   // rather than adhering to the function boundary ABI.
   if (options.privateFuncDynamicOwnership && isPrivate) {
     unsigned numMemrefs = llvm::count_if(op->getResults(), isMemref);
     SmallVector<Type> ownershipTypesToAppend(numMemrefs, builder.getI1Type());
     unsigned ownershipCounter = op->getNumResults();
     op = appendOpResults(op, ownershipTypesToAppend);

     for (auto result : llvm::make_filter_range(op->getResults(), isMemref)) {
       state.updateOwnership(result, op->getResult(ownershipCounter++));
       state.addMemrefToDeallocate(result, result.getParentBlock());
     }

     return op.getOperation();
   }

   // According to the function boundary ABI we are guaranteed to get ownership
   // of all MemRefs returned by the function. Thus we set ownership to constant
   // 'true' and remember to deallocate it.
   Value trueVal = buildBoolValue(builder, op.getLoc(), true);
   for (auto result : llvm::make_filter_range(op->getResults(), isMemref)) {
     state.updateOwnership(result, trueVal);
     state.addMemrefToDeallocate(result, result.getParentBlock());
   }

   return op.getOperation();
 }

 FailureOr<Operation *>
 BufferDeallocation::handleInterface(MemoryEffectOpInterface op) {
   auto *block = op->getBlock();
   OpBuilder builder = OpBuilder::atBlockBegin(block);

   for (auto operand : llvm::make_filter_range(op->getOperands(), isMemref)) {
     if (op.getEffectOnValue<MemoryEffects::Free>(operand).has_value()) {
       // The bufferization.manual_deallocation attribute can be attached to ops
       // with an allocation and/or deallocation side effect. It indicates that
       // the op is under a "manual deallocation" scheme. Deallocation ops are
       // usually forbidden in the input IR (not supported by the buffer
       // deallocation pass). However, if they are under manual deallocation,
       // they can be safely ignored by the buffer deallocation pass.
       if (!op->hasAttr(BufferizationDialect::kManualDeallocation))
         return op->emitError(
             "memory free side-effect on MemRef value not supported!");

       // Buffers that were allocated under "manual deallocation" may be
       // manually deallocated. We insert a runtime assertion to cover certain
       // cases of invalid IR where an automatically managed buffer allocation
       // is manually deallocated. This is not a bulletproof check!
       OpBuilder::InsertionGuard g(builder);
       builder.setInsertionPoint(op);
       Ownership ownership = state.getOwnership(operand, block);
       if (ownership.isUnique()) {
         Value ownershipInverted = arith::XOrIOp::create(
             builder, op.getLoc(), ownership.getIndicator(),
             buildBoolValue(builder, op.getLoc(), true));
         cf::AssertOp::create(builder, op.getLoc(), ownershipInverted,
                              "expected that the block does not have ownership");
       }
     }
   }

   for (auto res : llvm::make_filter_range(op->getResults(), isMemref)) {
     auto allocEffect = op.getEffectOnValue<MemoryEffects::Allocate>(res);
     if (allocEffect.has_value()) {
       if (isa<SideEffects::AutomaticAllocationScopeResource>(
               allocEffect->getResource())) {
         // Make sure that the ownership of auto-managed allocations is set to
         // false. This is important for operations that have at least one memref
         // typed operand. E.g., consider an operation like `bufferization.clone`
         // that lowers to a `memref.alloca + memref.copy` instead of a
         // `memref.alloc`. If we wouldn't set the ownership of the result here,
         // the default ownership population in `populateRemainingOwnerships`
         // would assume aliasing with the MemRef operand.
         state.resetOwnerships(res, block);
         state.updateOwnership(res, buildBoolValue(builder, op.getLoc(), false));
         continue;
       }

       if (op->hasAttr(BufferizationDialect::kManualDeallocation)) {
         // This allocation will be deallocated manually. Assign an ownership of
         // "false", so that it will never be deallocated by the buffer
         // deallocation pass.
         state.resetOwnerships(res, block);
         state.updateOwnership(res, buildBoolValue(builder, op.getLoc(), false));
         continue;
       }

       state.updateOwnership(res, buildBoolValue(builder, op.getLoc(), true));
       state.addMemrefToDeallocate(res, block);
     }
   }

   return op.getOperation();
 }

 FailureOr<Operation *>
 BufferDeallocation::handleInterface(RegionBranchTerminatorOpInterface op) {
   OpBuilder builder(op);

   // If this is a return operation of a function that is not private or the
   // dynamic function boundary ownership is disabled, we need to return memref
   // values for which we have guaranteed ownership to pass on to adhere to the
   // function boundary ABI.
   bool funcWithoutDynamicOwnership =
       isFunctionWithoutDynamicOwnership(op->getParentOp());
   if (funcWithoutDynamicOwnership) {
     for (OpOperand &val : op->getOpOperands()) {
       if (!isMemref(val.get()))
         continue;

       val.set(materializeMemrefWithGuaranteedOwnership(builder, val.get(),
                                                        op->getBlock()));
     }
   }

   // TODO: getSuccessorRegions is not implemented by all operations we care
   // about, but we would need to check how many successors there are and under
   // which condition they are taken, etc.

   MutableOperandRange operands =
       op.getMutableSuccessorOperands(RegionBranchPoint::parent());

   SmallVector<Value> updatedOwnerships;
   auto result = deallocation_impl::insertDeallocOpForReturnLike(
       state, op, operands.getAsOperandRange(), updatedOwnerships);
   if (failed(result) || !*result)
     return result;

   // Add an additional operand for every MemRef for the ownership indicator.
   if (!funcWithoutDynamicOwnership) {
     SmallVector<Value> newOperands{operands.getAsOperandRange()};
     newOperands.append(updatedOwnerships.begin(), updatedOwnerships.end());
     operands.assign(newOperands);
   }

   return op.getOperation();
 }

 bool BufferDeallocation::isFunctionWithoutDynamicOwnership(Operation *op) {
   auto funcOp = dyn_cast<FunctionOpInterface>(op);
   return funcOp && (!options.privateFuncDynamicOwnership ||
                     !funcOp.isPrivate() || funcOp.isExternal());
 }

 void BufferDeallocation::populateRemainingOwnerships(Operation *op) {
   for (auto res : op->getResults()) {
     if (!isMemref(res))
       continue;
     if (!state.getOwnership(res, op->getBlock()).isUninitialized())
       continue;

     // The op does not allocate memory, otherwise, it would have been assigned
     // an ownership during `handleInterface`. Assume the result may alias with
     // any memref operand and thus combine all their ownerships.
     for (auto operand : op->getOperands()) {
       if (!isMemref(operand))
         continue;

       state.updateOwnership(
           res, state.getOwnership(operand, operand.getParentBlock()),
           op->getBlock());
     }

     // If the ownership value is still uninitialized (e.g., because the op has
     // no memref operands), assume that no ownership is taken. E.g., this is the
     // case for "memref.get_global".
     //
     // Note: This can lead to memory leaks if memory side effects are not
     // properly specified on the op.
     if (state.getOwnership(res, op->getBlock()).isUninitialized()) {
       OpBuilder builder(op);
       state.updateOwnership(res, buildBoolValue(builder, op->getLoc(), false));
     }
   }
 }

 //===----------------------------------------------------------------------===//
 // OwnershipBasedBufferDeallocationPass
 //===----------------------------------------------------------------------===//

 namespace {

 /// The actual buffer deallocation pass that inserts and moves dealloc nodes
 /// into the right positions. Furthermore, it inserts additional clones if
 /// necessary. It uses the algorithm described at the top of the file.
 struct OwnershipBasedBufferDeallocationPass
     : public bufferization::impl::OwnershipBasedBufferDeallocationPassBase<
           OwnershipBasedBufferDeallocationPass> {
   using Base::Base;

   void runOnOperation() override {
     DeallocationOptions options;
     options.privateFuncDynamicOwnership = privateFuncDynamicOwnership;

     mlir::SymbolTableCollection symbolTables;

     auto status = getOperation()->walk([&](func::FuncOp func) {
       if (func.isExternal())
         return WalkResult::skip();

       if (failed(deallocateBuffersOwnershipBased(func, options, symbolTables)))
         return WalkResult::interrupt();

       return WalkResult::advance();
     });
     if (status.wasInterrupted())
       signalPassFailure();
   }
 };

 } // namespace

 //===----------------------------------------------------------------------===//
 // Implement bufferization API
 //===----------------------------------------------------------------------===//

 LogicalResult bufferization::deallocateBuffersOwnershipBased(
     FunctionOpInterface op, DeallocationOptions options,
     SymbolTableCollection &symbolTables) {
   // Gather all required allocation nodes and prepare the deallocation phase.
   BufferDeallocation deallocation(op, options, symbolTables);

   // Place all required temporary clone and dealloc nodes.
   return deallocation.deallocate(op);
 }