mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp - llvm-project - Git at Google

 //===- SCFInterfaceImpl.cpp - SCF Impl. of BufferizableOpInterface --------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/Operation.h"

 namespace mlir {
 namespace linalg {
 namespace comprehensive_bufferize {
 namespace scf_ext {

 struct ExecuteRegionOpInterface
     : public BufferizableOpInterface::ExternalModel<ExecuteRegionOpInterface,
                                                     scf::ExecuteRegionOp> {
   SmallVector<OpOperand *> getAliasingOpOperand(Operation *op,
                                                 OpResult opResult) const {
     // ExecuteRegionOps do not have tensor OpOperands. The yielded value can be
     // any SSA value that is in scope. To allow for use-def chain traversal
     // through ExecuteRegionOps in the analysis, the corresponding yield value
     // is considered to be aliasing with the result.
     auto executeRegionOp = cast<scf::ExecuteRegionOp>(op);
     size_t resultNum = std::distance(op->getOpResults().begin(),
                                      llvm::find(op->getOpResults(), opResult));
     assert(executeRegionOp.region().getBlocks().size() == 1 &&
            "expected exactly 1 block");
     auto yieldOp = dyn_cast<scf::YieldOp>(
         executeRegionOp.region().front().getTerminator());
     assert(yieldOp && "expected scf.yield terminator in scf.execute_region");
     return {&yieldOp->getOpOperand(resultNum)};
   }

   bool mustBufferizeInPlace(Operation *op, OpResult opResult) const {
     // ExecuteRegionOp results always bufferize in-place. Since they have no
     // OpOperands, they are mostly ignored by the analysis once alias sets are
     // set up.
     return true;
   }

   LogicalResult bufferize(Operation *op, OpBuilder &b,
                           BufferizationState &state) const {
     // TODO: Add bufferization support when needed. scf.execute_region should be
     // bufferized similar to scf.if.
     auto executeRegionOp = cast<scf::ExecuteRegionOp>(op);
     bool hasTensorReturnType = any_of(
         op->getResultTypes(), [](Type t) { return t.isa<TensorType>(); });
     if (hasTensorReturnType)
       return op->emitError(
           "scf.execute_region with tensor result not supported");
     return comprehensive_bufferize::bufferize(&executeRegionOp.region(), state);
   }
 };

 struct IfOpInterface
     : public BufferizableOpInterface::ExternalModel<IfOpInterface, scf::IfOp> {
   SmallVector<OpOperand *> getAliasingOpOperand(Operation *op,
                                                 OpResult opResult) const {
     // IfOps do not have tensor OpOperands. The yielded value can be any SSA
     // value that is in scope. To allow for use-def chain traversal through
     // IfOps in the analysis, both corresponding yield values from the then/else
     // branches are considered to be aliasing with the result.
     auto ifOp = cast<scf::IfOp>(op);
     size_t resultNum = std::distance(op->getOpResults().begin(),
                                      llvm::find(op->getOpResults(), opResult));
     return {&ifOp.thenYield()->getOpOperand(resultNum),
             &ifOp.elseYield()->getOpOperand(resultNum)};
   }

   // TODO: For better bufferization results, this could return `true` only if
   // there is a memory write in one (or both) of the branches. Since this is not
   // allowed at the moment, we should never encounter scf.ifs that yield
   // unmodified tensors. Such scf.yield ops could just fold away.
   bool isMemoryWrite(Operation *op, OpResult opResult) const {
     // IfOp results are always considered memory writes in the analysis. This
     // design decision simplifies the analysis considerably. E.g., consider the
     // following test case:
     //
     // %0 = "some_writing_op" : tensor<?xf32>
     // %r = scf.if %c -> (tensor<?xf32>) {
     //   scf.yield %0
     // } else {
     //   %1 = "another_writing_op"(%0) : tensor<?xf32>
     // }
     // "some_reading_op"(%r)
     //
     // "another_writing_op" in the above example should be able to bufferize
     // inplace in the absence of another read of %0. However, if the scf.if op
     // would not be considered a "write", the analysis would detect the
     // following conflict:
     //
     // * read = some_reading_op
     // * lastWrite = %0  (Note: The last write of %r would be a set: {%0, %1}.)
     // * conflictingWrite = %1
     //
     // For more details, check the "scf.IfOp" section of the design document.
     return true;
   }

   bool mustBufferizeInPlace(Operation *op, OpResult opResult) const {
     // IfOp results always bufferize in-place. Since they have no OpOperands,
     // they are mostly ignored by the analysis once alias sets are set up.
     return true;
   }

   LogicalResult bufferize(Operation *op, OpBuilder &b,
                           BufferizationState &state) const {
     auto ifOp = cast<scf::IfOp>(op);

     // Bufferize then/else blocks.
     if (failed(comprehensive_bufferize::bufferize(ifOp.thenBlock(), state)))
       return failure();
     if (failed(comprehensive_bufferize::bufferize(ifOp.elseBlock(), state)))
       return failure();

     for (OpResult opResult : ifOp->getResults()) {
       if (!opResult.getType().isa<TensorType>())
         continue;
       // TODO: Atm we bail on unranked TensorType because we don't know how to
       // alloc an UnrankedMemRefType + its underlying ranked MemRefType.
       assert(opResult.getType().isa<RankedTensorType>() &&
              "unsupported unranked tensor");

       Value resultBuffer = getResultBuffer(b, opResult, state);
       if (!resultBuffer)
         return failure();

       state.aliasInfo.createAliasInfoEntry(resultBuffer);
       state.mapBuffer(opResult, resultBuffer);
     }

     return success();
   }
 };

 struct ForOpInterface
     : public BufferizableOpInterface::ExternalModel<ForOpInterface,
                                                     scf::ForOp> {
   bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const {
     // scf::ForOp alone doesn't bufferize to a memory read, one of the uses of
     // its matching bbArg may.
     auto forOp = cast<scf::ForOp>(op);
     return isValueRead(forOp.getRegionIterArgForOpOperand(opOperand));
   }

   bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const {
     // Tensor iter_args of scf::ForOps are always considered as a write. This is
     // to simplify the analysis.
     // TODO: Consider doing sth. like isValueWritten.
     return true;
   }

   SmallVector<OpOperand *> getAliasingOpOperand(Operation *op,
                                                 OpResult opResult) const {
     auto forOp = cast<scf::ForOp>(op);
     return {&forOp.getIterOpOperands()[opResult.getResultNumber()]};
   }

   OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const {
     auto forOp = cast<scf::ForOp>(op);
     if (!opOperand.get().getType().isa<RankedTensorType>())
       return OpResult();
     return forOp.getResultForOpOperand(opOperand);
   }

   BufferRelation bufferRelation(Operation *op, OpOperand &opOperand) const {
     return BufferRelation::Equivalent;
   }

   bool isWritable(Operation *op, Value value) const {
     // Interestingly, scf::ForOp's bbArg can **always** be viewed
     // inplace from the perspective of ops nested under:
     //   1. Either the matching iter operand is not bufferized inplace and an
     //      alloc + optional copy makes the bbArg itself inplaceable.
     //   2. Or the matching iter operand is bufferized inplace and bbArg just
     //      bufferizes to that too.
     return true;
   }

   LogicalResult bufferize(Operation *op, OpBuilder &b,
                           BufferizationState &state) const {
     auto forOp = cast<scf::ForOp>(op);

     // Take a guard before anything else.
     OpBuilder::InsertionGuard g(b);

     for (OpResult opResult : forOp->getResults()) {
       if (!opResult.getType().isa<TensorType>())
         continue;
       // TODO: Atm we bail on unranked TensorType because we don't know how to
       // alloc an UnrankedMemRefType + its underlying ranked MemRefType.
       assert(opResult.getType().isa<RankedTensorType>() &&
              "unsupported unranked tensor");

       // TODO: More general: Matching bbArg does not bufferize to a read.
       Value resultBuffer = getResultBuffer(b, opResult, state);
       if (!resultBuffer)
         return failure();

       OpOperand &opOperand = forOp.getOpOperandForResult(opResult);
       BlockArgument bbArg = forOp.getRegionIterArgForOpOperand(opOperand);
       state.aliasInfo.createAliasInfoEntry(resultBuffer);
       state.aliasInfo.insertNewBufferEquivalence(bbArg, resultBuffer);
       state.mapBuffer(bbArg, resultBuffer);
       state.mapBuffer(opResult, resultBuffer);
     }

     // Bufferize loop body.
     if (failed(comprehensive_bufferize::bufferize(&forOp.region(), state)))
       return failure();

     // Finish bufferizing scf::ForOp.
     auto yieldOp = cast<scf::YieldOp>(&forOp.region().front().back());
     for (OpOperand &operand : yieldOp->getOpOperands()) {
       auto tensorType = operand.get().getType().dyn_cast<TensorType>();
       if (!tensorType)
         continue;

       OpOperand &forOperand = forOp.getOpOperandForResult(
           forOp->getResult(operand.getOperandNumber()));
       auto bbArg = forOp.getRegionIterArgForOpOperand(forOperand);
       Value yieldedBuffer = state.lookupBuffer(operand.get());
       Value bbArgBuffer = state.lookupBuffer(bbArg);
       if (!state.aliasInfo.areEquivalentBufferizedValues(yieldedBuffer,
                                                          bbArgBuffer)) {
         // TODO: this could get resolved with copies but it can also turn into
         // swaps so we need to be careful about order of copies.
         return yieldOp->emitError()
                << "Yield operand #" << operand.getOperandNumber()
                << " does not bufferize to an equivalent buffer to the matching"
                << " enclosing scf::for operand";
       }

       // Buffers are equivalent so the work is already done and we just yield
       // the bbArg so that it later canonicalizes away.
       operand.set(bbArg);
     }
     return success();
   }
 };

 struct YieldOpInterface
     : public BufferizableOpInterface::ExternalModel<YieldOpInterface,
                                                     scf::YieldOp> {
   bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const {
     return true;
   }

   bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const {
     return false;
   }

   OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const {
     return OpResult();
   }

   BufferRelation bufferRelation(Operation *op, OpOperand &opOperand) const {
     return BufferRelation::Equivalent;
   }

   LogicalResult bufferize(Operation *op, OpBuilder &b,
                           BufferizationState &state) const {
     auto yieldOp = cast<scf::YieldOp>(op);
     if (!isa<scf::ExecuteRegionOp, scf::IfOp, scf::ForOp>(
             yieldOp->getParentOp()))
       return yieldOp->emitError("unsupported scf::YieldOp parent");
     return success();
   }
 };

 } // namespace scf_ext
 } // namespace comprehensive_bufferize
 } // namespace linalg
 } // namespace mlir

 void mlir::linalg::comprehensive_bufferize::scf_ext::
     registerBufferizableOpInterfaceExternalModels(DialectRegistry &registry) {
   registry.addOpInterface<scf::ExecuteRegionOp,
                           scf_ext::ExecuteRegionOpInterface>();
   registry.addOpInterface<scf::ForOp, scf_ext::ForOpInterface>();
   registry.addOpInterface<scf::IfOp, scf_ext::IfOpInterface>();
   registry.addOpInterface<scf::YieldOp, scf_ext::YieldOpInterface>();
   registry.addOpInterface<scf::ParallelOp,
                           AllocationHoistingBarrierOnly<scf::ParallelOp>>();
 }
	//===- SCFInterfaceImpl.cpp - SCF Impl. of BufferizableOpInterface --------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "mlir/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.h"
	#include "mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h"
	#include "mlir/Dialect/SCF/SCF.h"
	#include "mlir/IR/Dialect.h"
	#include "mlir/IR/Operation.h"

	namespace mlir {
	namespace linalg {
	namespace comprehensive_bufferize {
	namespace scf_ext {

	struct ExecuteRegionOpInterface
	: public BufferizableOpInterface::ExternalModel<ExecuteRegionOpInterface,
	scf::ExecuteRegionOp> {
	SmallVector<OpOperand > getAliasingOpOperand(Operation op,
	OpResult opResult) const {
	// ExecuteRegionOps do not have tensor OpOperands. The yielded value can be
	// any SSA value that is in scope. To allow for use-def chain traversal
	// through ExecuteRegionOps in the analysis, the corresponding yield value
	// is considered to be aliasing with the result.
	auto executeRegionOp = cast<scf::ExecuteRegionOp>(op);
	size_t resultNum = std::distance(op->getOpResults().begin(),
	llvm::find(op->getOpResults(), opResult));
	assert(executeRegionOp.region().getBlocks().size() == 1 &&
	"expected exactly 1 block");
	auto yieldOp = dyn_cast<scf::YieldOp>(
	executeRegionOp.region().front().getTerminator());
	assert(yieldOp && "expected scf.yield terminator in scf.execute_region");
	return {&yieldOp->getOpOperand(resultNum)};
	}

	bool mustBufferizeInPlace(Operation *op, OpResult opResult) const {
	// ExecuteRegionOp results always bufferize in-place. Since they have no
	// OpOperands, they are mostly ignored by the analysis once alias sets are
	// set up.
	return true;
	}

	LogicalResult bufferize(Operation *op, OpBuilder &b,
	BufferizationState &state) const {
	// TODO: Add bufferization support when needed. scf.execute_region should be
	// bufferized similar to scf.if.
	auto executeRegionOp = cast<scf::ExecuteRegionOp>(op);
	bool hasTensorReturnType = any_of(
	op->getResultTypes(), [](Type t) { return t.isa<TensorType>(); });
	if (hasTensorReturnType)
	return op->emitError(
	"scf.execute_region with tensor result not supported");
	return comprehensive_bufferize::bufferize(&executeRegionOp.region(), state);
	}
	};

	struct IfOpInterface
	: public BufferizableOpInterface::ExternalModel<IfOpInterface, scf::IfOp> {
	SmallVector<OpOperand > getAliasingOpOperand(Operation op,
	OpResult opResult) const {
	// IfOps do not have tensor OpOperands. The yielded value can be any SSA
	// value that is in scope. To allow for use-def chain traversal through
	// IfOps in the analysis, both corresponding yield values from the then/else
	// branches are considered to be aliasing with the result.
	auto ifOp = cast<scf::IfOp>(op);
	size_t resultNum = std::distance(op->getOpResults().begin(),
	llvm::find(op->getOpResults(), opResult));
	return {&ifOp.thenYield()->getOpOperand(resultNum),
	&ifOp.elseYield()->getOpOperand(resultNum)};
	}

	// TODO: For better bufferization results, this could return `true` only if
	// there is a memory write in one (or both) of the branches. Since this is not
	// allowed at the moment, we should never encounter scf.ifs that yield
	// unmodified tensors. Such scf.yield ops could just fold away.
	bool isMemoryWrite(Operation *op, OpResult opResult) const {
	// IfOp results are always considered memory writes in the analysis. This
	// design decision simplifies the analysis considerably. E.g., consider the
	// following test case:
	//
	// %0 = "some_writing_op" : tensor<?xf32>
	// %r = scf.if %c -> (tensor<?xf32>) {
	// scf.yield %0
	// } else {
	// %1 = "another_writing_op"(%0) : tensor<?xf32>
	// }
	// "some_reading_op"(%r)
	//
	// "another_writing_op" in the above example should be able to bufferize
	// inplace in the absence of another read of %0. However, if the scf.if op
	// would not be considered a "write", the analysis would detect the
	// following conflict:
	//
	// * read = some_reading_op
	// * lastWrite = %0 (Note: The last write of %r would be a set: {%0, %1}.)
	// * conflictingWrite = %1
	//
	// For more details, check the "scf.IfOp" section of the design document.
	return true;
	}

	bool mustBufferizeInPlace(Operation *op, OpResult opResult) const {
	// IfOp results always bufferize in-place. Since they have no OpOperands,
	// they are mostly ignored by the analysis once alias sets are set up.
	return true;
	}

	LogicalResult bufferize(Operation *op, OpBuilder &b,
	BufferizationState &state) const {
	auto ifOp = cast<scf::IfOp>(op);

	// Bufferize then/else blocks.
	if (failed(comprehensive_bufferize::bufferize(ifOp.thenBlock(), state)))
	return failure();
	if (failed(comprehensive_bufferize::bufferize(ifOp.elseBlock(), state)))
	return failure();

	for (OpResult opResult : ifOp->getResults()) {
	if (!opResult.getType().isa<TensorType>())
	continue;
	// TODO: Atm we bail on unranked TensorType because we don't know how to
	// alloc an UnrankedMemRefType + its underlying ranked MemRefType.
	assert(opResult.getType().isa<RankedTensorType>() &&
	"unsupported unranked tensor");

	Value resultBuffer = getResultBuffer(b, opResult, state);
	if (!resultBuffer)
	return failure();

	state.aliasInfo.createAliasInfoEntry(resultBuffer);
	state.mapBuffer(opResult, resultBuffer);
	}

	return success();
	}
	};

	struct ForOpInterface
	: public BufferizableOpInterface::ExternalModel<ForOpInterface,
	scf::ForOp> {
	bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const {
	// scf::ForOp alone doesn't bufferize to a memory read, one of the uses of
	// its matching bbArg may.
	auto forOp = cast<scf::ForOp>(op);
	return isValueRead(forOp.getRegionIterArgForOpOperand(opOperand));
	}

	bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const {
	// Tensor iter_args of scf::ForOps are always considered as a write. This is
	// to simplify the analysis.
	// TODO: Consider doing sth. like isValueWritten.
	return true;
	}

	SmallVector<OpOperand > getAliasingOpOperand(Operation op,
	OpResult opResult) const {
	auto forOp = cast<scf::ForOp>(op);
	return {&forOp.getIterOpOperands()[opResult.getResultNumber()]};
	}

	OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const {
	auto forOp = cast<scf::ForOp>(op);
	if (!opOperand.get().getType().isa<RankedTensorType>())
	return OpResult();
	return forOp.getResultForOpOperand(opOperand);
	}

	BufferRelation bufferRelation(Operation *op, OpOperand &opOperand) const {
	return BufferRelation::Equivalent;
	}

	bool isWritable(Operation *op, Value value) const {
	// Interestingly, scf::ForOp's bbArg can always be viewed
	// inplace from the perspective of ops nested under:
	// 1. Either the matching iter operand is not bufferized inplace and an
	// alloc + optional copy makes the bbArg itself inplaceable.
	// 2. Or the matching iter operand is bufferized inplace and bbArg just
	// bufferizes to that too.
	return true;
	}

	LogicalResult bufferize(Operation *op, OpBuilder &b,
	BufferizationState &state) const {
	auto forOp = cast<scf::ForOp>(op);

	// Take a guard before anything else.
	OpBuilder::InsertionGuard g(b);

	for (OpResult opResult : forOp->getResults()) {
	if (!opResult.getType().isa<TensorType>())
	continue;
	// TODO: Atm we bail on unranked TensorType because we don't know how to
	// alloc an UnrankedMemRefType + its underlying ranked MemRefType.
	assert(opResult.getType().isa<RankedTensorType>() &&
	"unsupported unranked tensor");

	// TODO: More general: Matching bbArg does not bufferize to a read.
	Value resultBuffer = getResultBuffer(b, opResult, state);
	if (!resultBuffer)
	return failure();

	OpOperand &opOperand = forOp.getOpOperandForResult(opResult);
	BlockArgument bbArg = forOp.getRegionIterArgForOpOperand(opOperand);
	state.aliasInfo.createAliasInfoEntry(resultBuffer);
	state.aliasInfo.insertNewBufferEquivalence(bbArg, resultBuffer);
	state.mapBuffer(bbArg, resultBuffer);
	state.mapBuffer(opResult, resultBuffer);
	}

	// Bufferize loop body.
	if (failed(comprehensive_bufferize::bufferize(&forOp.region(), state)))
	return failure();

	// Finish bufferizing scf::ForOp.
	auto yieldOp = cast<scf::YieldOp>(&forOp.region().front().back());
	for (OpOperand &operand : yieldOp->getOpOperands()) {
	auto tensorType = operand.get().getType().dyn_cast<TensorType>();
	if (!tensorType)
	continue;

	OpOperand &forOperand = forOp.getOpOperandForResult(
	forOp->getResult(operand.getOperandNumber()));
	auto bbArg = forOp.getRegionIterArgForOpOperand(forOperand);
	Value yieldedBuffer = state.lookupBuffer(operand.get());
	Value bbArgBuffer = state.lookupBuffer(bbArg);
	if (!state.aliasInfo.areEquivalentBufferizedValues(yieldedBuffer,
	bbArgBuffer)) {
	// TODO: this could get resolved with copies but it can also turn into
	// swaps so we need to be careful about order of copies.
	return yieldOp->emitError()
	<< "Yield operand #" << operand.getOperandNumber()
	<< " does not bufferize to an equivalent buffer to the matching"
	<< " enclosing scf::for operand";
	}

	// Buffers are equivalent so the work is already done and we just yield
	// the bbArg so that it later canonicalizes away.
	operand.set(bbArg);
	}
	return success();
	}
	};

	struct YieldOpInterface
	: public BufferizableOpInterface::ExternalModel<YieldOpInterface,
	scf::YieldOp> {
	bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const {
	return true;
	}

	bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const {
	return false;
	}

	OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const {
	return OpResult();
	}

	BufferRelation bufferRelation(Operation *op, OpOperand &opOperand) const {
	return BufferRelation::Equivalent;
	}

	LogicalResult bufferize(Operation *op, OpBuilder &b,
	BufferizationState &state) const {
	auto yieldOp = cast<scf::YieldOp>(op);
	if (!isa<scf::ExecuteRegionOp, scf::IfOp, scf::ForOp>(
	yieldOp->getParentOp()))
	return yieldOp->emitError("unsupported scf::YieldOp parent");
	return success();
	}
	};

	} // namespace scf_ext
	} // namespace comprehensive_bufferize
	} // namespace linalg
	} // namespace mlir

	void mlir::linalg::comprehensive_bufferize::scf_ext::
	registerBufferizableOpInterfaceExternalModels(DialectRegistry &registry) {
	registry.addOpInterface<scf::ExecuteRegionOp,
	scf_ext::ExecuteRegionOpInterface>();
	registry.addOpInterface<scf::ForOp, scf_ext::ForOpInterface>();
	registry.addOpInterface<scf::IfOp, scf_ext::IfOpInterface>();
	registry.addOpInterface<scf::YieldOp, scf_ext::YieldOpInterface>();
	registry.addOpInterface<scf::ParallelOp,
	AllocationHoistingBarrierOnly<scf::ParallelOp>>();
	}