mlir/lib/Dialect/MemRef/Transforms/FlattenMemRefs.cpp - llvm-project - Git at Google

 //===----- FlattenMemRefs.cpp - MemRef ops flattener pass  ----------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file contains patterns for flattening an multi-rank memref-related
 // ops into 1-d memref ops.
 //
 //===----------------------------------------------------------------------===//

 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/MemRef/Transforms/Passes.h"
 #include "mlir/Dialect/MemRef/Transforms/Transforms.h"
 #include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
 #include "mlir/Dialect/Utils/IndexingUtils.h"
 #include "mlir/Dialect/Utils/StaticValueUtils.h"
 #include "mlir/Dialect/Vector/IR/VectorOps.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/OpDefinition.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 #include "llvm/ADT/TypeSwitch.h"

 namespace mlir {
 namespace memref {
 #define GEN_PASS_DEF_FLATTENMEMREFSPASS
 #include "mlir/Dialect/MemRef/Transforms/Passes.h.inc"
 } // namespace memref
 } // namespace mlir

 using namespace mlir;

 static Value getValueFromOpFoldResult(OpBuilder &rewriter, Location loc,
                                       OpFoldResult in) {
   if (Attribute offsetAttr = dyn_cast<Attribute>(in)) {
     return arith::ConstantIndexOp::create(
         rewriter, loc, cast<IntegerAttr>(offsetAttr).getInt());
   }
   return cast<Value>(in);
 }

 /// Returns a collapsed memref and the linearized index to access the element
 /// at the specified indices.
 static std::pair<Value, Value> getFlattenMemrefAndOffset(OpBuilder &rewriter,
                                                          Location loc,
                                                          Value source,
                                                          ValueRange indices) {
   int64_t sourceOffset;
   SmallVector<int64_t, 4> sourceStrides;
   auto sourceType = cast<MemRefType>(source.getType());
   if (failed(sourceType.getStridesAndOffset(sourceStrides, sourceOffset))) {
     assert(false);
   }

   memref::ExtractStridedMetadataOp stridedMetadata =
       memref::ExtractStridedMetadataOp::create(rewriter, loc, source);

   auto typeBit = sourceType.getElementType().getIntOrFloatBitWidth();
   OpFoldResult linearizedIndices;
   memref::LinearizedMemRefInfo linearizedInfo;
   std::tie(linearizedInfo, linearizedIndices) =
       memref::getLinearizedMemRefOffsetAndSize(
           rewriter, loc, typeBit, typeBit,
           stridedMetadata.getConstifiedMixedOffset(),
           stridedMetadata.getConstifiedMixedSizes(),
           stridedMetadata.getConstifiedMixedStrides(),
           getAsOpFoldResult(indices));

   return std::make_pair(
       memref::ReinterpretCastOp::create(
           rewriter, loc, source,
           /* offset = */ linearizedInfo.linearizedOffset,
           /* shapes = */
           ArrayRef<OpFoldResult>{linearizedInfo.linearizedSize},
           /* strides = */
           ArrayRef<OpFoldResult>{rewriter.getIndexAttr(1)}),
       getValueFromOpFoldResult(rewriter, loc, linearizedIndices));
 }

 static bool needFlattening(Value val) {
   auto type = cast<MemRefType>(val.getType());
   return type.getRank() > 1;
 }

 static bool checkLayout(Value val) {
   auto type = cast<MemRefType>(val.getType());
   return type.getLayout().isIdentity() ||
          isa<StridedLayoutAttr>(type.getLayout());
 }

 namespace {
 static Value getTargetMemref(Operation *op) {
   return llvm::TypeSwitch<Operation *, Value>(op)
       .template Case<memref::LoadOp, memref::StoreOp, memref::AllocaOp,
                      memref::AllocOp>([](auto op) { return op.getMemref(); })
       .template Case<vector::LoadOp, vector::StoreOp, vector::MaskedLoadOp,
                      vector::MaskedStoreOp, vector::TransferReadOp,
                      vector::TransferWriteOp>(
           [](auto op) { return op.getBase(); })
       .Default(nullptr);
 }

 template <typename T>
 static void replaceOp(T op, PatternRewriter &rewriter, Value flatMemref,
                       Value offset) {
   Location loc = op->getLoc();
   llvm::TypeSwitch<Operation *>(op.getOperation())
       .Case([&](memref::LoadOp op) {
         auto newLoad =
             memref::LoadOp::create(rewriter, loc, op->getResultTypes(),
                                    flatMemref, ValueRange{offset});
         newLoad->setAttrs(op->getAttrs());
         rewriter.replaceOp(op, newLoad.getResult());
       })
       .Case([&](memref::StoreOp op) {
         auto newStore =
             memref::StoreOp::create(rewriter, loc, op->getOperands().front(),
                                     flatMemref, ValueRange{offset});
         newStore->setAttrs(op->getAttrs());
         rewriter.replaceOp(op, newStore);
       })
       .Case([&](vector::LoadOp op) {
         auto newLoad =
             vector::LoadOp::create(rewriter, loc, op->getResultTypes(),
                                    flatMemref, ValueRange{offset});
         newLoad->setAttrs(op->getAttrs());
         rewriter.replaceOp(op, newLoad.getResult());
       })
       .Case([&](vector::StoreOp op) {
         auto newStore =
             vector::StoreOp::create(rewriter, loc, op->getOperands().front(),
                                     flatMemref, ValueRange{offset});
         newStore->setAttrs(op->getAttrs());
         rewriter.replaceOp(op, newStore);
       })
       .Case([&](vector::MaskedLoadOp op) {
         auto newMaskedLoad = vector::MaskedLoadOp::create(
             rewriter, loc, op.getType(), flatMemref, ValueRange{offset},
             op.getMask(), op.getPassThru());
         newMaskedLoad->setAttrs(op->getAttrs());
         rewriter.replaceOp(op, newMaskedLoad.getResult());
       })
       .Case([&](vector::MaskedStoreOp op) {
         auto newMaskedStore = vector::MaskedStoreOp::create(
             rewriter, loc, flatMemref, ValueRange{offset}, op.getMask(),
             op.getValueToStore());
         newMaskedStore->setAttrs(op->getAttrs());
         rewriter.replaceOp(op, newMaskedStore);
       })
       .Case([&](vector::TransferReadOp op) {
         auto newTransferRead = vector::TransferReadOp::create(
             rewriter, loc, op.getType(), flatMemref, ValueRange{offset},
             op.getPadding());
         rewriter.replaceOp(op, newTransferRead.getResult());
       })
       .Case([&](vector::TransferWriteOp op) {
         auto newTransferWrite = vector::TransferWriteOp::create(
             rewriter, loc, op.getVector(), flatMemref, ValueRange{offset});
         rewriter.replaceOp(op, newTransferWrite);
       })
       .Default([&](auto op) {
         op->emitOpError("unimplemented: do not know how to replace op.");
       });
 }

 template <typename T>
 static ValueRange getIndices(T op) {
   return op.getIndices();
 }

 template <typename T>
 static LogicalResult canBeFlattened(T op, PatternRewriter &rewriter) {
   return llvm::TypeSwitch<Operation *, LogicalResult>(op.getOperation())
       .template Case<vector::TransferReadOp, vector::TransferWriteOp>(
           [&](auto oper) {
             // For vector.transfer_read/write, must make sure:
             // 1. all accesses are inbound, and
             // 2. has an identity or minor identity permutation map.
             auto permutationMap = oper.getPermutationMap();
             if (!permutationMap.isIdentity() &&
                 !permutationMap.isMinorIdentity()) {
               return rewriter.notifyMatchFailure(
                   oper, "only identity permutation map is supported");
             }
             mlir::ArrayAttr inbounds = oper.getInBounds();
             if (llvm::any_of(inbounds, [](Attribute attr) {
                   return !cast<BoolAttr>(attr).getValue();
                 })) {
               return rewriter.notifyMatchFailure(oper,
                                                  "only inbounds are supported");
             }
             return success();
           })
       .Default([&](auto op) { return success(); });
 }

 // Pattern for memref::AllocOp and memref::AllocaOp.
 //
 // The "source" memref for these ops IS the op's own result, so the generic
 // MemRefRewritePattern cannot be used: getFlattenMemrefAndOffset would insert
 // ExtractStridedMetadataOp and ReinterpretCastOp that use op.result BEFORE op
 // in the block. After replaceOpWithNewOp the original result is RAUW'd to the
 // new ReinterpretCastOp, leaving the earlier ops with forward references
 // (domination violations) caught by MLIR_ENABLE_EXPENSIVE_PATTERN_API_CHECKS.
 //
 // Instead, sizes and strides are computed from the op's operands and type
 // (which all dominate the op), avoiding any reference to op.result until the
 // final replaceOpWithNewOp.
 template <typename AllocLikeOp>
 struct AllocLikeFlattenPattern : public OpRewritePattern<AllocLikeOp> {
   using OpRewritePattern<AllocLikeOp>::OpRewritePattern;
   LogicalResult matchAndRewrite(AllocLikeOp op,
                                 PatternRewriter &rewriter) const override {
     if (!needFlattening(op.getMemref()) || !checkLayout(op.getMemref()))
       return failure();

     Location loc = op->getLoc();
     auto memrefType = cast<MemRefType>(op.getType());
     auto elemType = memrefType.getElementType();
     if (!elemType.isIntOrFloat())
       return failure();
     unsigned elemBitWidth = elemType.getIntOrFloatBitWidth();

     SmallVector<OpFoldResult> sizes = op.getMixedSizes();

     int64_t staticOffset;
     SmallVector<int64_t> staticStrides;
     if (failed(memrefType.getStridesAndOffset(staticStrides, staticOffset)))
       return failure();
     if (staticOffset == ShapedType::kDynamic)
       return rewriter.notifyMatchFailure(op, "dynamic offset not supported");
     SmallVector<OpFoldResult> strides;
     strides.reserve(staticStrides.size());
     for (int64_t stride : staticStrides) {
       if (stride == ShapedType::kDynamic)
         return rewriter.notifyMatchFailure(op,
                                            "dynamic stride cannot be computed");
       strides.push_back(rewriter.getIndexAttr(stride));
     }

     // Compute the linearized flat extent from sizes and strides (no SSA ops
     // referencing op.result are created here).
     memref::LinearizedMemRefInfo linearizedInfo;
     OpFoldResult linearizedOffset;
     std::tie(linearizedInfo, linearizedOffset) =
         memref::getLinearizedMemRefOffsetAndSize(
             rewriter, loc, elemBitWidth, elemBitWidth, rewriter.getIndexAttr(0),
             sizes, strides);
     (void)linearizedOffset;

     // The total allocation must cover [0, staticOffset + linearizedExtent).
     // When the offset is non-zero, add it to the computed extent so that the
     // buffer is large enough for elements accessed at positions
     // [staticOffset, staticOffset + linearizedExtent).
     OpFoldResult flatSizeOfr = linearizedInfo.linearizedSize;
     if (staticOffset != 0) {
       AffineExpr s0;
       bindSymbols(rewriter.getContext(), s0);
       flatSizeOfr = affine::makeComposedFoldedAffineApply(
           rewriter, loc, s0 + staticOffset, {flatSizeOfr});
     }

     // Build the flat 1-D MemRefType. The linearized size may be static or
     // dynamic (OpFoldResult of either IntegerAttr or a Value).
     int64_t flatDimSize = ShapedType::kDynamic;
     if (auto attr = dyn_cast<Attribute>(flatSizeOfr))
       if (auto intAttr = dyn_cast<IntegerAttr>(attr))
         flatDimSize = intAttr.getInt();

     auto flatMemrefType =
         MemRefType::get({flatDimSize}, memrefType.getElementType(),
                         StridedLayoutAttr::get(rewriter.getContext(), 0, {1}),
                         memrefType.getMemorySpace());

     // Collect the flat dynamic-size operand (empty for fully-static case).
     SmallVector<Value, 1> dynSizes;
     if (flatDimSize == ShapedType::kDynamic)
       dynSizes.push_back(getValueFromOpFoldResult(rewriter, loc, flatSizeOfr));

     auto newOp = AllocLikeOp::create(rewriter, loc, flatMemrefType, dynSizes,
                                      op.getAlignmentAttr());
     rewriter.replaceOpWithNewOp<memref::ReinterpretCastOp>(
         op, cast<MemRefType>(op.getType()), newOp,
         rewriter.getIndexAttr(staticOffset), sizes, strides);
     return success();
   }
 };

 template <typename T>
 struct MemRefRewritePattern : public OpRewritePattern<T> {
   using OpRewritePattern<T>::OpRewritePattern;
   LogicalResult matchAndRewrite(T op,
                                 PatternRewriter &rewriter) const override {
     LogicalResult canFlatten = canBeFlattened(op, rewriter);
     if (failed(canFlatten))
       return canFlatten;

     Value memref = getTargetMemref(op);
     if (!needFlattening(memref) || !checkLayout(memref))
       return failure();

     auto &&[flatMemref, offset] = getFlattenMemrefAndOffset(
         rewriter, op->getLoc(), memref, getIndices<T>(op));
     replaceOp<T>(op, rewriter, flatMemref, offset);
     return success();
   }
 };

 struct FlattenMemrefsPass
     : public mlir::memref::impl::FlattenMemrefsPassBase<FlattenMemrefsPass> {
   using Base::Base;

   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<affine::AffineDialect, arith::ArithDialect,
                     memref::MemRefDialect, vector::VectorDialect>();
   }

   void runOnOperation() override {
     RewritePatternSet patterns(&getContext());

     memref::populateFlattenMemrefsPatterns(patterns);

     if (failed(applyPatternsGreedily(getOperation(), std::move(patterns))))
       return signalPassFailure();
   }
 };

 } // namespace

 void memref::populateFlattenVectorOpsOnMemrefPatterns(
     RewritePatternSet &patterns) {
   patterns.insert<MemRefRewritePattern<vector::LoadOp>,
                   MemRefRewritePattern<vector::StoreOp>,
                   MemRefRewritePattern<vector::TransferReadOp>,
                   MemRefRewritePattern<vector::TransferWriteOp>,
                   MemRefRewritePattern<vector::MaskedLoadOp>,
                   MemRefRewritePattern<vector::MaskedStoreOp>>(
       patterns.getContext());
 }

 void memref::populateFlattenMemrefOpsPatterns(RewritePatternSet &patterns) {
   patterns.insert<MemRefRewritePattern<memref::LoadOp>,
                   MemRefRewritePattern<memref::StoreOp>,
                   AllocLikeFlattenPattern<memref::AllocOp>,
                   AllocLikeFlattenPattern<memref::AllocaOp>>(
       patterns.getContext());
 }

 void memref::populateFlattenMemrefsPatterns(RewritePatternSet &patterns) {
   populateFlattenMemrefOpsPatterns(patterns);
   populateFlattenVectorOpsOnMemrefPatterns(patterns);
 }
	//===----- FlattenMemRefs.cpp - MemRef ops flattener pass ----------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file contains patterns for flattening an multi-rank memref-related
	// ops into 1-d memref ops.
	//
	//===----------------------------------------------------------------------===//

	#include "mlir/Dialect/Affine/IR/AffineOps.h"
	#include "mlir/Dialect/Arith/IR/Arith.h"
	#include "mlir/Dialect/MemRef/IR/MemRef.h"
	#include "mlir/Dialect/MemRef/Transforms/Passes.h"
	#include "mlir/Dialect/MemRef/Transforms/Transforms.h"
	#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
	#include "mlir/Dialect/Utils/IndexingUtils.h"
	#include "mlir/Dialect/Utils/StaticValueUtils.h"
	#include "mlir/Dialect/Vector/IR/VectorOps.h"
	#include "mlir/IR/Attributes.h"
	#include "mlir/IR/Builders.h"
	#include "mlir/IR/BuiltinTypes.h"
	#include "mlir/IR/OpDefinition.h"
	#include "mlir/IR/PatternMatch.h"
	#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
	#include "llvm/ADT/TypeSwitch.h"

	namespace mlir {
	namespace memref {
	#define GEN_PASS_DEF_FLATTENMEMREFSPASS
	#include "mlir/Dialect/MemRef/Transforms/Passes.h.inc"
	} // namespace memref
	} // namespace mlir

	using namespace mlir;

	static Value getValueFromOpFoldResult(OpBuilder &rewriter, Location loc,
	OpFoldResult in) {
	if (Attribute offsetAttr = dyn_cast<Attribute>(in)) {
	return arith::ConstantIndexOp::create(
	rewriter, loc, cast<IntegerAttr>(offsetAttr).getInt());
	}
	return cast<Value>(in);
	}

	/// Returns a collapsed memref and the linearized index to access the element
	/// at the specified indices.
	static std::pair<Value, Value> getFlattenMemrefAndOffset(OpBuilder &rewriter,
	Location loc,
	Value source,
	ValueRange indices) {
	int64_t sourceOffset;
	SmallVector<int64_t, 4> sourceStrides;
	auto sourceType = cast<MemRefType>(source.getType());
	if (failed(sourceType.getStridesAndOffset(sourceStrides, sourceOffset))) {
	assert(false);
	}

	memref::ExtractStridedMetadataOp stridedMetadata =
	memref::ExtractStridedMetadataOp::create(rewriter, loc, source);

	auto typeBit = sourceType.getElementType().getIntOrFloatBitWidth();
	OpFoldResult linearizedIndices;
	memref::LinearizedMemRefInfo linearizedInfo;
	std::tie(linearizedInfo, linearizedIndices) =
	memref::getLinearizedMemRefOffsetAndSize(
	rewriter, loc, typeBit, typeBit,
	stridedMetadata.getConstifiedMixedOffset(),
	stridedMetadata.getConstifiedMixedSizes(),
	stridedMetadata.getConstifiedMixedStrides(),
	getAsOpFoldResult(indices));

	return std::make_pair(
	memref::ReinterpretCastOp::create(
	rewriter, loc, source,
	/* offset = */ linearizedInfo.linearizedOffset,
	/* shapes = */
	ArrayRef<OpFoldResult>{linearizedInfo.linearizedSize},
	/* strides = */
	ArrayRef<OpFoldResult>{rewriter.getIndexAttr(1)}),
	getValueFromOpFoldResult(rewriter, loc, linearizedIndices));
	}

	static bool needFlattening(Value val) {
	auto type = cast<MemRefType>(val.getType());
	return type.getRank() > 1;
	}

	static bool checkLayout(Value val) {
	auto type = cast<MemRefType>(val.getType());
	return type.getLayout().isIdentity() \|\|
	isa<StridedLayoutAttr>(type.getLayout());
	}

	namespace {
	static Value getTargetMemref(Operation *op) {
	return llvm::TypeSwitch<Operation *, Value>(op)
	.template Case<memref::LoadOp, memref::StoreOp, memref::AllocaOp,
	memref::AllocOp>([](auto op) { return op.getMemref(); })
	.template Case<vector::LoadOp, vector::StoreOp, vector::MaskedLoadOp,
	vector::MaskedStoreOp, vector::TransferReadOp,
	vector::TransferWriteOp>(
	[](auto op) { return op.getBase(); })
	.Default(nullptr);
	}

	template <typename T>
	static void replaceOp(T op, PatternRewriter &rewriter, Value flatMemref,
	Value offset) {
	Location loc = op->getLoc();
	llvm::TypeSwitch<Operation *>(op.getOperation())
	.Case([&](memref::LoadOp op) {
	auto newLoad =
	memref::LoadOp::create(rewriter, loc, op->getResultTypes(),
	flatMemref, ValueRange{offset});
	newLoad->setAttrs(op->getAttrs());
	rewriter.replaceOp(op, newLoad.getResult());
	})
	.Case([&](memref::StoreOp op) {
	auto newStore =
	memref::StoreOp::create(rewriter, loc, op->getOperands().front(),
	flatMemref, ValueRange{offset});
	newStore->setAttrs(op->getAttrs());
	rewriter.replaceOp(op, newStore);
	})
	.Case([&](vector::LoadOp op) {
	auto newLoad =
	vector::LoadOp::create(rewriter, loc, op->getResultTypes(),
	flatMemref, ValueRange{offset});
	newLoad->setAttrs(op->getAttrs());
	rewriter.replaceOp(op, newLoad.getResult());
	})
	.Case([&](vector::StoreOp op) {
	auto newStore =
	vector::StoreOp::create(rewriter, loc, op->getOperands().front(),
	flatMemref, ValueRange{offset});
	newStore->setAttrs(op->getAttrs());
	rewriter.replaceOp(op, newStore);
	})
	.Case([&](vector::MaskedLoadOp op) {
	auto newMaskedLoad = vector::MaskedLoadOp::create(
	rewriter, loc, op.getType(), flatMemref, ValueRange{offset},
	op.getMask(), op.getPassThru());
	newMaskedLoad->setAttrs(op->getAttrs());
	rewriter.replaceOp(op, newMaskedLoad.getResult());
	})
	.Case([&](vector::MaskedStoreOp op) {
	auto newMaskedStore = vector::MaskedStoreOp::create(
	rewriter, loc, flatMemref, ValueRange{offset}, op.getMask(),
	op.getValueToStore());
	newMaskedStore->setAttrs(op->getAttrs());
	rewriter.replaceOp(op, newMaskedStore);
	})
	.Case([&](vector::TransferReadOp op) {
	auto newTransferRead = vector::TransferReadOp::create(
	rewriter, loc, op.getType(), flatMemref, ValueRange{offset},
	op.getPadding());
	rewriter.replaceOp(op, newTransferRead.getResult());
	})
	.Case([&](vector::TransferWriteOp op) {
	auto newTransferWrite = vector::TransferWriteOp::create(
	rewriter, loc, op.getVector(), flatMemref, ValueRange{offset});
	rewriter.replaceOp(op, newTransferWrite);
	})
	.Default([&](auto op) {
	op->emitOpError("unimplemented: do not know how to replace op.");
	});
	}

	template <typename T>
	static ValueRange getIndices(T op) {
	return op.getIndices();
	}

	template <typename T>
	static LogicalResult canBeFlattened(T op, PatternRewriter &rewriter) {
	return llvm::TypeSwitch<Operation *, LogicalResult>(op.getOperation())
	.template Case<vector::TransferReadOp, vector::TransferWriteOp>(
	[&](auto oper) {
	// For vector.transfer_read/write, must make sure:
	// 1. all accesses are inbound, and
	// 2. has an identity or minor identity permutation map.
	auto permutationMap = oper.getPermutationMap();
	if (!permutationMap.isIdentity() &&
	!permutationMap.isMinorIdentity()) {
	return rewriter.notifyMatchFailure(
	oper, "only identity permutation map is supported");
	}
	mlir::ArrayAttr inbounds = oper.getInBounds();
	if (llvm::any_of(inbounds, [](Attribute attr) {
	return !cast<BoolAttr>(attr).getValue();
	})) {
	return rewriter.notifyMatchFailure(oper,
	"only inbounds are supported");
	}
	return success();
	})
	.Default([&](auto op) { return success(); });
	}

	// Pattern for memref::AllocOp and memref::AllocaOp.
	//
	// The "source" memref for these ops IS the op's own result, so the generic
	// MemRefRewritePattern cannot be used: getFlattenMemrefAndOffset would insert
	// ExtractStridedMetadataOp and ReinterpretCastOp that use op.result BEFORE op
	// in the block. After replaceOpWithNewOp the original result is RAUW'd to the
	// new ReinterpretCastOp, leaving the earlier ops with forward references
	// (domination violations) caught by MLIR_ENABLE_EXPENSIVE_PATTERN_API_CHECKS.
	//
	// Instead, sizes and strides are computed from the op's operands and type
	// (which all dominate the op), avoiding any reference to op.result until the
	// final replaceOpWithNewOp.
	template <typename AllocLikeOp>
	struct AllocLikeFlattenPattern : public OpRewritePattern<AllocLikeOp> {
	using OpRewritePattern<AllocLikeOp>::OpRewritePattern;
	LogicalResult matchAndRewrite(AllocLikeOp op,
	PatternRewriter &rewriter) const override {
	if (!needFlattening(op.getMemref()) \|\| !checkLayout(op.getMemref()))
	return failure();

	Location loc = op->getLoc();
	auto memrefType = cast<MemRefType>(op.getType());
	auto elemType = memrefType.getElementType();
	if (!elemType.isIntOrFloat())
	return failure();
	unsigned elemBitWidth = elemType.getIntOrFloatBitWidth();

	SmallVector<OpFoldResult> sizes = op.getMixedSizes();

	int64_t staticOffset;
	SmallVector<int64_t> staticStrides;
	if (failed(memrefType.getStridesAndOffset(staticStrides, staticOffset)))
	return failure();
	if (staticOffset == ShapedType::kDynamic)
	return rewriter.notifyMatchFailure(op, "dynamic offset not supported");
	SmallVector<OpFoldResult> strides;
	strides.reserve(staticStrides.size());
	for (int64_t stride : staticStrides) {
	if (stride == ShapedType::kDynamic)
	return rewriter.notifyMatchFailure(op,
	"dynamic stride cannot be computed");
	strides.push_back(rewriter.getIndexAttr(stride));
	}

	// Compute the linearized flat extent from sizes and strides (no SSA ops
	// referencing op.result are created here).
	memref::LinearizedMemRefInfo linearizedInfo;
	OpFoldResult linearizedOffset;
	std::tie(linearizedInfo, linearizedOffset) =
	memref::getLinearizedMemRefOffsetAndSize(
	rewriter, loc, elemBitWidth, elemBitWidth, rewriter.getIndexAttr(0),
	sizes, strides);
	(void)linearizedOffset;

	// The total allocation must cover [0, staticOffset + linearizedExtent).
	// When the offset is non-zero, add it to the computed extent so that the
	// buffer is large enough for elements accessed at positions
	// [staticOffset, staticOffset + linearizedExtent).
	OpFoldResult flatSizeOfr = linearizedInfo.linearizedSize;
	if (staticOffset != 0) {
	AffineExpr s0;
	bindSymbols(rewriter.getContext(), s0);
	flatSizeOfr = affine::makeComposedFoldedAffineApply(
	rewriter, loc, s0 + staticOffset, {flatSizeOfr});
	}

	// Build the flat 1-D MemRefType. The linearized size may be static or
	// dynamic (OpFoldResult of either IntegerAttr or a Value).
	int64_t flatDimSize = ShapedType::kDynamic;
	if (auto attr = dyn_cast<Attribute>(flatSizeOfr))
	if (auto intAttr = dyn_cast<IntegerAttr>(attr))
	flatDimSize = intAttr.getInt();

	auto flatMemrefType =
	MemRefType::get({flatDimSize}, memrefType.getElementType(),
	StridedLayoutAttr::get(rewriter.getContext(), 0, {1}),
	memrefType.getMemorySpace());

	// Collect the flat dynamic-size operand (empty for fully-static case).
	SmallVector<Value, 1> dynSizes;
	if (flatDimSize == ShapedType::kDynamic)
	dynSizes.push_back(getValueFromOpFoldResult(rewriter, loc, flatSizeOfr));

	auto newOp = AllocLikeOp::create(rewriter, loc, flatMemrefType, dynSizes,
	op.getAlignmentAttr());
	rewriter.replaceOpWithNewOp<memref::ReinterpretCastOp>(
	op, cast<MemRefType>(op.getType()), newOp,
	rewriter.getIndexAttr(staticOffset), sizes, strides);
	return success();
	}
	};

	template <typename T>
	struct MemRefRewritePattern : public OpRewritePattern<T> {
	using OpRewritePattern<T>::OpRewritePattern;
	LogicalResult matchAndRewrite(T op,
	PatternRewriter &rewriter) const override {
	LogicalResult canFlatten = canBeFlattened(op, rewriter);
	if (failed(canFlatten))
	return canFlatten;

	Value memref = getTargetMemref(op);
	if (!needFlattening(memref) \|\| !checkLayout(memref))
	return failure();

	auto &&[flatMemref, offset] = getFlattenMemrefAndOffset(
	rewriter, op->getLoc(), memref, getIndices<T>(op));
	replaceOp<T>(op, rewriter, flatMemref, offset);
	return success();
	}
	};

	struct FlattenMemrefsPass
	: public mlir::memref::impl::FlattenMemrefsPassBase<FlattenMemrefsPass> {
	using Base::Base;

	void getDependentDialects(DialectRegistry &registry) const override {
	registry.insert<affine::AffineDialect, arith::ArithDialect,
	memref::MemRefDialect, vector::VectorDialect>();
	}

	void runOnOperation() override {
	RewritePatternSet patterns(&getContext());

	memref::populateFlattenMemrefsPatterns(patterns);

	if (failed(applyPatternsGreedily(getOperation(), std::move(patterns))))
	return signalPassFailure();
	}
	};

	} // namespace

	void memref::populateFlattenVectorOpsOnMemrefPatterns(
	RewritePatternSet &patterns) {
	patterns.insert<MemRefRewritePattern<vector::LoadOp>,
	MemRefRewritePattern<vector::StoreOp>,
	MemRefRewritePattern<vector::TransferReadOp>,
	MemRefRewritePattern<vector::TransferWriteOp>,
	MemRefRewritePattern<vector::MaskedLoadOp>,
	MemRefRewritePattern<vector::MaskedStoreOp>>(
	patterns.getContext());
	}

	void memref::populateFlattenMemrefOpsPatterns(RewritePatternSet &patterns) {
	patterns.insert<MemRefRewritePattern<memref::LoadOp>,
	MemRefRewritePattern<memref::StoreOp>,
	AllocLikeFlattenPattern<memref::AllocOp>,
	AllocLikeFlattenPattern<memref::AllocaOp>>(
	patterns.getContext());
	}

	void memref::populateFlattenMemrefsPatterns(RewritePatternSet &patterns) {
	populateFlattenMemrefOpsPatterns(patterns);
	populateFlattenVectorOpsOnMemrefPatterns(patterns);
	}