lib/Dialect/Vector/Utils/VectorUtils.cpp - llvm-project/mlir - Git at Google

 //===- VectorUtils.cpp - MLIR Utilities for VectorOps   ------------------===//
 //
 // Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file implements utility methods for working with the Vector dialect.
 //
 //===----------------------------------------------------------------------===//

 #include "mlir/Dialect/Vector/Utils/VectorUtils.h"

 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Arith/Utils/Utils.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Utils/IndexingUtils.h"
 #include "mlir/Dialect/Vector/IR/VectorOps.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/IntegerSet.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/IR/Operation.h"
 #include "mlir/IR/TypeUtilities.h"
 #include "mlir/Support/LLVM.h"

 #include "llvm/ADT/DenseSet.h"
 #include "llvm/Support/DebugLog.h"
 #include "llvm/Support/InterleavedRange.h"

 #define DEBUG_TYPE "vector-utils"

 using namespace mlir;

 /// Helper function that creates a memref::DimOp or tensor::DimOp depending on
 /// the type of `source`.
 Value mlir::vector::createOrFoldDimOp(OpBuilder &b, Location loc, Value source,
                                       int64_t dim) {
   if (isa<UnrankedMemRefType, MemRefType>(source.getType()))
     return b.createOrFold<memref::DimOp>(loc, source, dim);
   if (isa<UnrankedTensorType, RankedTensorType>(source.getType()))
     return b.createOrFold<tensor::DimOp>(loc, source, dim);
   llvm_unreachable("Expected MemRefType or TensorType");
 }

 /// Given the n-D transpose pattern 'transp', return true if 'dim0' and 'dim1'
 /// should be transposed with each other within the context of their 2D
 /// transposition slice.
 ///
 /// Example 1: dim0 = 0, dim1 = 2, transp = [2, 1, 0]
 ///   Return true: dim0 and dim1 are transposed within the context of their 2D
 ///   transposition slice ([1, 0]).
 ///
 /// Example 2: dim0 = 0, dim1 = 1, transp = [2, 1, 0]
 ///   Return true: dim0 and dim1 are transposed within the context of their 2D
 ///   transposition slice ([1, 0]). Paradoxically, note how dim1 (1) is *not*
 ///   transposed within the full context of the transposition.
 ///
 /// Example 3: dim0 = 0, dim1 = 1, transp = [2, 0, 1]
 ///   Return false: dim0 and dim1 are *not* transposed within the context of
 ///   their 2D transposition slice ([0, 1]). Paradoxically, note how dim0 (0)
 ///   and dim1 (1) are transposed within the full context of the of the
 ///   transposition.
 static bool areDimsTransposedIn2DSlice(int64_t dim0, int64_t dim1,
                                        ArrayRef<int64_t> transp) {
   // Perform a linear scan along the dimensions of the transposed pattern. If
   // dim0 is found first, dim0 and dim1 are not transposed within the context of
   // their 2D slice. Otherwise, 'dim1' is found first and they are transposed.
   for (int64_t permDim : transp) {
     if (permDim == dim0)
       return false;
     if (permDim == dim1)
       return true;
   }

   llvm_unreachable("Ill-formed transpose pattern");
 }

 FailureOr<std::pair<int, int>>
 mlir::vector::isTranspose2DSlice(vector::TransposeOp op) {
   VectorType srcType = op.getSourceVectorType();
   SmallVector<int64_t> srcGtOneDims;
   for (auto [index, size] : llvm::enumerate(srcType.getShape()))
     if (size > 1)
       srcGtOneDims.push_back(index);

   if (srcGtOneDims.size() != 2)
     return failure();

   // Check whether the two source vector dimensions that are greater than one
   // must be transposed with each other so that we can apply one of the 2-D
   // transpose patterns. Otherwise, these patterns are not applicable.
   if (!areDimsTransposedIn2DSlice(srcGtOneDims[0], srcGtOneDims[1],
                                   op.getPermutation()))
     return failure();

   return std::pair<int, int>(srcGtOneDims[0], srcGtOneDims[1]);
 }

 /// Constructs a permutation map from memref indices to vector dimension.
 ///
 /// The implementation uses the knowledge of the mapping of enclosing loop to
 /// vector dimension. `enclosingLoopToVectorDim` carries this information as a
 /// map with:
 ///   - keys representing "vectorized enclosing loops";
 ///   - values representing the corresponding vector dimension.
 /// The algorithm traverses "vectorized enclosing loops" and extracts the
 /// at-most-one MemRef index that is invariant along said loop. This index is
 /// guaranteed to be at most one by construction: otherwise the MemRef is not
 /// vectorizable.
 /// If this invariant index is found, it is added to the permutation_map at the
 /// proper vector dimension.
 /// If no index is found to be invariant, 0 is added to the permutation_map and
 /// corresponds to a vector broadcast along that dimension.
 ///
 /// Returns an empty AffineMap if `enclosingLoopToVectorDim` is empty,
 /// signalling that no permutation map can be constructed given
 /// `enclosingLoopToVectorDim`.
 ///
 /// Examples can be found in the documentation of `makePermutationMap`, in the
 /// header file.
 static AffineMap makePermutationMap(
     ArrayRef<Value> indices,
     const DenseMap<Operation *, unsigned> &enclosingLoopToVectorDim) {
   if (enclosingLoopToVectorDim.empty())
     return AffineMap();
   MLIRContext *context =
       enclosingLoopToVectorDim.begin()->getFirst()->getContext();
   SmallVector<AffineExpr> perm(enclosingLoopToVectorDim.size(),
                                getAffineConstantExpr(0, context));

   for (auto kvp : enclosingLoopToVectorDim) {
     assert(kvp.second < perm.size());
     auto invariants = affine::getInvariantAccesses(
         cast<affine::AffineForOp>(kvp.first).getInductionVar(), indices);
     unsigned numIndices = indices.size();
     unsigned countInvariantIndices = 0;
     for (unsigned dim = 0; dim < numIndices; ++dim) {
       if (!invariants.count(indices[dim])) {
         assert(perm[kvp.second] == getAffineConstantExpr(0, context) &&
                "permutationMap already has an entry along dim");
         perm[kvp.second] = getAffineDimExpr(dim, context);
       } else {
         ++countInvariantIndices;
       }
     }
     assert((countInvariantIndices == numIndices ||
             countInvariantIndices == numIndices - 1) &&
            "Vectorization prerequisite violated: at most 1 index may be "
            "invariant wrt a vectorized loop");
     (void)countInvariantIndices;
   }
   return AffineMap::get(indices.size(), 0, perm, context);
 }

 /// Implementation detail that walks up the parents and records the ones with
 /// the specified type.
 /// TODO: could also be implemented as a collect parents followed by a
 /// filter and made available outside this file.
 template <typename T>
 static SetVector<Operation *> getParentsOfType(Block *block) {
   SetVector<Operation *> res;
   auto *current = block->getParentOp();
   while (current) {
     if ([[maybe_unused]] auto typedParent = dyn_cast<T>(current)) {
       assert(res.count(current) == 0 && "Already inserted");
       res.insert(current);
     }
     current = current->getParentOp();
   }
   return res;
 }

 /// Returns the enclosing AffineForOp, from closest to farthest.
 static SetVector<Operation *> getEnclosingforOps(Block *block) {
   return getParentsOfType<affine::AffineForOp>(block);
 }

 AffineMap mlir::makePermutationMap(
     Block *insertPoint, ArrayRef<Value> indices,
     const DenseMap<Operation *, unsigned> &loopToVectorDim) {
   DenseMap<Operation *, unsigned> enclosingLoopToVectorDim;
   auto enclosingLoops = getEnclosingforOps(insertPoint);
   for (auto *forInst : enclosingLoops) {
     auto it = loopToVectorDim.find(forInst);
     if (it != loopToVectorDim.end()) {
       enclosingLoopToVectorDim.insert(*it);
     }
   }
   return ::makePermutationMap(indices, enclosingLoopToVectorDim);
 }

 AffineMap mlir::makePermutationMap(
     Operation *op, ArrayRef<Value> indices,
     const DenseMap<Operation *, unsigned> &loopToVectorDim) {
   return makePermutationMap(op->getBlock(), indices, loopToVectorDim);
 }

 bool matcher::operatesOnSuperVectorsOf(Operation &op,
                                        VectorType subVectorType) {
   // First, extract the vector type and distinguish between:
   //   a. ops that *must* lower a super-vector (i.e. vector.transfer_read,
   //      vector.transfer_write); and
   //   b. ops that *may* lower a super-vector (all other ops).
   // The ops that *may* lower a super-vector only do so if the super-vector to
   // sub-vector ratio exists. The ops that *must* lower a super-vector are
   // explicitly checked for this property.
   /// TODO: there should be a single function for all ops to do this so we
   /// do not have to special case. Maybe a trait, or just a method, unclear atm.
   VectorType superVectorType;
   if (auto transfer = dyn_cast<VectorTransferOpInterface>(op)) {
     superVectorType = transfer.getVectorType();
   } else if (op.getNumResults() == 0) {
     if (!isa<func::ReturnOp>(op)) {
       op.emitError("NYI: assuming only return operations can have 0 "
                    " results at this point");
     }
     return false;
   } else if (op.getNumResults() == 1) {
     if (auto v = dyn_cast<VectorType>(op.getResult(0).getType())) {
       superVectorType = v;
     } else {
       // Not a vector type.
       return false;
     }
   } else {
     // Not a vector.transfer and has more than 1 result, fail hard for now to
     // wake us up when something changes.
     op.emitError("NYI: operation has more than 1 result");
     return false;
   }

   // Get the ratio. If the shapes are incompatible (e.g., different ranks or
   // non-integer divisibility), the operation does not operate on a super-vector
   // of the given sub-vector type.
   auto ratio =
       computeShapeRatio(superVectorType.getShape(), subVectorType.getShape());
   return ratio.has_value();
 }

 bool vector::isContiguousSlice(MemRefType memrefType, VectorType vectorType) {
   if (vectorType.isScalable())
     return false;

   // Ignore a leading sequence of adjacent unit dimensions in the vector.
   ArrayRef<int64_t> vectorShape =
       vectorType.getShape().drop_while([](auto v) { return v == 1; });
   auto vecRank = vectorShape.size();

   // A single element is always contiguous.
   if (vecRank == 0)
     return true;

   if (!memrefType.areTrailingDimsContiguous(vecRank))
     return false;

   // Extract the trailing dims of the input memref
   auto memrefShape = memrefType.getShape().take_back(vecRank);

   // Compare the dims of `vectorType` against `memrefType`.
   // All of the dimensions, except the first must match.
   return llvm::equal(vectorShape.drop_front(), memrefShape.drop_front());
 }

 std::optional<StaticTileOffsetRange>
 vector::createUnrollIterator(VectorType vType, int64_t targetRank) {
   if (vType.getRank() <= targetRank)
     return {};
   // Attempt to unroll until targetRank or the first scalable dimension (which
   // cannot be unrolled).
   auto shapeToUnroll = vType.getShape().drop_back(targetRank);
   auto inputScalableVecDimsToUnroll =
       vType.getScalableDims().drop_back(targetRank);
   const auto *it = llvm::find(inputScalableVecDimsToUnroll, true);
   auto firstScalableDim = it - inputScalableVecDimsToUnroll.begin();
   if (firstScalableDim == 0)
     return {};
   // All scalable dimensions should be removed now.
   inputScalableVecDimsToUnroll =
       inputScalableVecDimsToUnroll.slice(0, firstScalableDim);
   assert(!llvm::is_contained(inputScalableVecDimsToUnroll, true) &&
          "unexpected leading scalable dimension");
   // Create an unroll iterator for leading dimensions.
   shapeToUnroll = shapeToUnroll.slice(0, firstScalableDim);
   return StaticTileOffsetRange(shapeToUnroll, /*unrollStep=*/1);
 }

 SmallVector<OpFoldResult> vector::getMixedSizesXfer(bool hasTensorSemantics,
                                                     Operation *xfer,
                                                     RewriterBase &rewriter) {
   auto loc = xfer->getLoc();

   Value base =
       TypeSwitch<Operation *, Value>(xfer)
           .Case([&](vector::TransferReadOp readOp) { return readOp.getBase(); })
           .Case([&](vector::TransferWriteOp writeOp) {
             return writeOp.getOperand(1);
           });

   SmallVector<OpFoldResult> mixedSourceDims =
       hasTensorSemantics ? tensor::getMixedSizes(rewriter, loc, base)
                          : memref::getMixedSizes(rewriter, loc, base);
   return mixedSourceDims;
 }

 bool vector::isLinearizableVector(VectorType type) {
   return (type.getRank() > 1) && (type.getNumScalableDims() <= 1);
 }

 /// Determines whether a mask for xfer_read/write is trivially "all true"
 ///
 /// Given all the inputs required to generate a mask (mask sizes and shapes),
 /// and an xfer_read/write operation (indices and the source/destination tensor
 /// shape), determines whether the corresponding mask would be trivially
 /// foldable (i.e., trivially "all true").
 ///
 /// Use this method to avoid generating spurious masks and relying on
 /// vectorization post-processing to remove them.
 ///
 /// Pre-conditions for a mask to be trivially foldable:
 ///   * All involved shapes (mask + destination tensor) are static.
 ///   * All indices are constant.
 ///   * All mask sizes are constant (including `arith.constant`).
 ///
 /// If the pre-conditions are met, the method checks for each destination
 /// dimension `d`:
 ///   (1) destDimSize[rankDiff + d] <= maskShape[d]
 ///   (2) destDimSize[rankDiff + d] <= index[d] + maskSize[d]
 ///
 /// rankDiff = rank(dest) - rank(mask).
 ///
 /// This method takes a conservative view: it may return false even if the mask
 /// is technically foldable.
 ///
 /// EXAMPLE 1 (trivially foldable, all shapes match, mask sizes match the shape
 /// of the dest tensor):
 ///   %c0 = arith.constant 0 : index
 ///   %mask = vector.create_mask 5, 1
 ///   vector.mask %mask {
 ///     vector.transfer_write %vecToStore_1, %dest{[%c0, %c0]
 ///       {in_bounds = [true, true]}
 ///     : vector<5x1xi32>, tensor<5x1xi32>
 ///   }
 ///
 /// EXAMPLE 2 (not trivially foldable - vector shape exceeds the tensor shape,
 /// mask is required to avoid out-of-bounds write):
 ///   %c0 = arith.constant 0 : index
 ///   %mask = vector.create_mask 5, 1
 ///   vector.mask %mask {
 ///     vector.transfer_write %vecToStore_2, %dest[%c0, %c0]
 ///      {in_bounds = [true, true]}
 ///     : vector<8x1xi32>, tensor<5x1xi32>
 ///   }
 static bool isMaskTriviallyFoldable(SmallVector<OpFoldResult> &maskSizes,
                                     SmallVector<Value> &indices,
                                     ArrayRef<int64_t> baseShape,
                                     ArrayRef<int64_t> maskShape) {
   // Masking is unavoidable in the case of dynamic tensors.
   if (ShapedType::isDynamicShape(baseShape))
     return false;

   // Collect all constant mask sizes.
   SmallVector<int64_t, 4> cstMaskSizes;
   for (auto [i, dimSize] : llvm::enumerate(maskSizes)) {
     if (auto intSize = getConstantIntValue(dimSize)) {
       cstMaskSizes.push_back(*intSize);
     }
   }

   // If any of the mask sizes is non-constant, bail out.
   if (cstMaskSizes.size() != maskShape.size())
     return false;

   // Collect all constant indices.
   SmallVector<int64_t, 4> cstIndices;
   for (auto [i, idx] : llvm::enumerate(indices)) {
     APSInt intVal;
     if (matchPattern(idx, m_ConstantInt(&intVal))) {
       cstIndices.push_back(intVal.getSExtValue());
     }
   }

   // If any of the indices is non-constant, bail out.
   if (cstIndices.size() != baseShape.size())
     return false;

   // Go over all destination dims and check (1) and (2). Take into account that:
   //  * The number of mask sizes will match the rank of the vector to
   //    load/store. This could be lower than the rank of the destination tensor.
   //  * Mask sizes could be larger than the corresponding mask shape (hence
   //    `clamp`).
   // TODO: The 2nd item should be rejected by the verifier.
   int64_t rankDiff = baseShape.size() - cstMaskSizes.size();
   for (auto [i, idx] : llvm::enumerate(cstMaskSizes)) {
     if (/*(1)*/ maskShape[i] > baseShape[rankDiff + i] ||
         /*(2)*/ baseShape[rankDiff + i] <
             (std::clamp(cstMaskSizes[i], int64_t(0), maskShape[i]) +
              cstIndices[i]))
       return false;
   }

   return true;
 }

 Value vector::createReadOrMaskedRead(OpBuilder &builder, Location loc,
                                      Value source,
                                      ArrayRef<int64_t> inputVectorSizes,
                                      std::optional<Value> padValue,
                                      bool useInBoundsInsteadOfMasking,
                                      ArrayRef<bool> inputScalableVecDims) {
   VectorType vecToReadTy = VectorType::get(
       inputVectorSizes, cast<ShapedType>(source.getType()).getElementType(),
       inputScalableVecDims);

   return createReadOrMaskedRead(builder, loc, source, vecToReadTy, padValue,
                                 useInBoundsInsteadOfMasking);
 }

 Value vector::createReadOrMaskedRead(OpBuilder &builder, Location loc,
                                      Value source,
                                      const VectorType &vecToReadTy,
                                      std::optional<Value> padValue,
                                      bool useInBoundsInsteadOfMasking) {
   assert(!llvm::is_contained(vecToReadTy.getScalableDims(),
                              ShapedType::kDynamic) &&
          "invalid input vector sizes");
   auto sourceShapedType = cast<ShapedType>(source.getType());
   auto sourceShape = sourceShapedType.getShape();

   int64_t vecToReadRank = vecToReadTy.getRank();
   auto vecToReadShape = vecToReadTy.getShape();

   assert(sourceShape.size() == static_cast<size_t>(vecToReadRank) &&
          "expected same ranks.");
   assert((!padValue.has_value() ||
           padValue.value().getType() == sourceShapedType.getElementType()) &&
          "expected same pad element type to match source element type");

   auto zero = arith::ConstantIndexOp::create(builder, loc, 0);
   SmallVector<bool> inBoundsVal(vecToReadRank, true);

   if (useInBoundsInsteadOfMasking) {
     // Update the inBounds attribute.
     // FIXME: This computation is too weak - it ignores the read indices.
     for (unsigned i = 0; i < vecToReadRank; i++)
       inBoundsVal[i] = (sourceShape[i] == vecToReadShape[i]) &&
                        ShapedType::isStatic(sourceShape[i]);
   }
   SmallVector<Value> indices(vecToReadRank, zero);
   auto transferReadOp =
       vector::TransferReadOp::create(builder, loc,
                                      /*vectorType=*/vecToReadTy,
                                      /*source=*/source,
                                      /*indices=*/indices,
                                      /*padding=*/padValue,
                                      /*inBounds=*/inBoundsVal);

   if (useInBoundsInsteadOfMasking)
     return transferReadOp;

   SmallVector<OpFoldResult> mixedSourceDims =
       isa<MemRefType>(source.getType())
           ? memref::getMixedSizes(builder, loc, source)
           : tensor::getMixedSizes(builder, loc, source);

   if (isMaskTriviallyFoldable(mixedSourceDims, indices, sourceShape,
                               vecToReadShape))
     return transferReadOp;

   auto maskType = vecToReadTy.cloneWith(/*shape=*/{}, builder.getI1Type());
   Value mask =
       vector::CreateMaskOp::create(builder, loc, maskType, mixedSourceDims);
   return mlir::vector::maskOperation(builder, transferReadOp, mask)
       ->getResult(0);
 }

 Operation *vector::createWriteOrMaskedWrite(OpBuilder &builder, Location loc,
                                             Value vecToStore, Value dest,
                                             SmallVector<Value> writeIndices,
                                             bool useInBoundsInsteadOfMasking) {

   ShapedType destType = cast<ShapedType>(dest.getType());
   int64_t destRank = destType.getRank();
   auto destShape = destType.getShape();

   VectorType vecToStoreType = cast<VectorType>(vecToStore.getType());
   int64_t vecToStoreRank = vecToStoreType.getRank();
   auto vecToStoreShape = vecToStoreType.getShape();

   // Compute the in_bounds attribute
   SmallVector<bool> inBoundsVal(vecToStoreRank, true);
   if (useInBoundsInsteadOfMasking) {
     // Update the inBounds attribute.
     // FIXME: This computation is too weak - it ignores the write indices.
     for (unsigned i = 0; i < vecToStoreRank; i++)
       inBoundsVal[i] =
           (destShape[destRank - vecToStoreRank + i] >= vecToStoreShape[i]) &&
           ShapedType::isStatic(destShape[destRank - vecToStoreRank + i]);
   }

   // If missing, initialize the write indices to 0.
   bool useDefaultWriteIdxs = writeIndices.empty();
   assert((useDefaultWriteIdxs ||
           writeIndices.size() == static_cast<size_t>(destRank)) &&
          "Invalid number of write indices!");
   if (useDefaultWriteIdxs) {
     auto zero = arith::ConstantIndexOp::create(builder, loc, 0);
     writeIndices.assign(destRank, zero);
   }

   // Generate the xfer_write Op
   Operation *write = vector::TransferWriteOp::create(builder, loc,
                                                      /*vector=*/vecToStore,
                                                      /*dest=*/dest,
                                                      /*indices=*/writeIndices,
                                                      /*inBounds=*/inBoundsVal);

   // If masking is disabled, exit.
   if (useInBoundsInsteadOfMasking)
     return write;

   // Check if masking is needed. If not, exit.
   if (llvm::equal(vecToStoreShape, destShape.take_back(vecToStoreRank)))
     return write;

   // Compute the mask and mask the write Op.
   auto writeMaskType = VectorType::get(vecToStoreShape, builder.getI1Type(),
                                        vecToStoreType.getScalableDims());

   SmallVector<OpFoldResult> destSizes =
       isa<MemRefType>(dest.getType())
           ? memref::getMixedSizes(builder, loc, dest)
           : tensor::getMixedSizes(builder, loc, dest);

   // Compute sizes for write-mask
   SmallVector<OpFoldResult> maskSizes;
   if (useDefaultWriteIdxs) {
     maskSizes = SmallVector<OpFoldResult>(destSizes.end() - vecToStoreRank,
                                           destSizes.end());
   } else {
     size_t diff = destShape.size() - vecToStoreRank;
     for (int64_t idx = 0; idx < vecToStoreRank; idx++) {
       auto value =
           getValueOrCreateConstantIndexOp(builder, loc, destSizes[diff + idx]);
       auto neg =
           builder.createOrFold<arith::SubIOp>(loc, value, writeIndices[idx]);
       maskSizes.push_back(OpFoldResult(neg));
     }
   }

   if (isMaskTriviallyFoldable(maskSizes, writeIndices, destShape,
                               vecToStoreShape))
     return write;

   Value maskForWrite =
       builder.createOrFold<vector::CreateMaskOp>(loc, writeMaskType, maskSizes);
   return mlir::vector::maskOperation(builder, write, maskForWrite);
 }

 LogicalResult
 vector::isValidMaskedInputVector(ArrayRef<int64_t> shape,
                                  ArrayRef<int64_t> inputVectorSizes) {
   LDBG() << "Iteration space static sizes:" << llvm::interleaved(shape);

   if (inputVectorSizes.size() != shape.size()) {
     LDBG() << "Input vector sizes don't match the number of loops";
     return failure();
   }
   if (ShapedType::isDynamicShape(inputVectorSizes)) {
     LDBG() << "Input vector sizes can't have dynamic dimensions";
     return failure();
   }
   if (!llvm::all_of(llvm::zip(shape, inputVectorSizes),
                     [](std::tuple<int64_t, int64_t> sizePair) {
                       int64_t staticSize = std::get<0>(sizePair);
                       int64_t inputSize = std::get<1>(sizePair);
                       return ShapedType::isDynamic(staticSize) ||
                              staticSize <= inputSize;
                     })) {
     LDBG() << "Input vector sizes must be greater than or equal to iteration "
               "space static sizes";
     return failure();
   }
   return success();
 }

 /// Takes a 2+ dimensional vector as an input
 /// returns n vector values produced by n vector.extract operations.
 /// I.e. calling unrollVectorValue([[%v]], rewriter) such that
 ///
 ///   %v : vector<nxaxb...>
 ///
 /// will produce the following IR changes
 ///
 ///   %v0 = vector.extract %v[0] : vector<axbx...> from vector<nxaxb...>
 ///   %v1 = vector.extract %v[1] : vector<axbx...> from vector<nxaxb...>
 ///   ...
 ///   %vnminusone = vector.extract %v[n-1] : vector<axbx...> from ...
 ///
 /// and returns SmallVector<Value> r = {[[%v0]], [[%v1]], ..., [[%vnminusone]]}
 FailureOr<SmallVector<Value>>
 vector::unrollVectorValue(TypedValue<VectorType> vector,
                           RewriterBase &rewriter) {
   SmallVector<Value> subvectors;
   VectorType ty = cast<VectorType>(vector.getType());
   Location loc = vector.getLoc();
   if (ty.getRank() < 2)
     return rewriter.notifyMatchFailure(loc, "already 1-D");

   // Unrolling doesn't take vscale into account. Pattern is disabled for
   // vectors with leading scalable dim(s).
   if (ty.getScalableDims().front())
     return rewriter.notifyMatchFailure(loc, "cannot unroll scalable dim");

   for (int64_t i = 0, e = ty.getShape().front(); i < e; ++i) {
     subvectors.push_back(vector::ExtractOp::create(rewriter, loc, vector, i));
   }

   return subvectors;
 }

 LogicalResult vector::unrollVectorOp(Operation *op, PatternRewriter &rewriter,
                                      vector::UnrollVectorOpFn unrollFn) {
   assert(op->getNumResults() == 1 && "expected single result");
   assert(isa<VectorType>(op->getResult(0).getType()) && "expected vector type");
   VectorType resultTy = cast<VectorType>(op->getResult(0).getType());
   if (resultTy.getRank() < 2)
     return rewriter.notifyMatchFailure(op, "already 1-D");

   // Unrolling doesn't take vscale into account. Pattern is disabled for
   // vectors with leading scalable dim(s).
   if (resultTy.getScalableDims().front())
     return rewriter.notifyMatchFailure(op, "cannot unroll scalable dim");

   Location loc = op->getLoc();
   Value result = ub::PoisonOp::create(rewriter, loc, resultTy);
   VectorType subTy = VectorType::Builder(resultTy).dropDim(0);

   for (int64_t i = 0, e = resultTy.getShape().front(); i < e; ++i) {
     Value subVector = unrollFn(rewriter, loc, subTy, i);
     result = vector::InsertOp::create(rewriter, loc, subVector, result, i);
   }

   rewriter.replaceOp(op, result);
   return success();
 }