| //===- VectorUtils.cpp - MLIR Utilities for VectorOps ------------------===// |
| // |
| // Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements utility methods for working with the Vector dialect. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "mlir/Dialect/Vector/Utils/VectorUtils.h" |
| |
| #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" |
| #include "mlir/Dialect/Affine/IR/AffineOps.h" |
| #include "mlir/Dialect/Arith/IR/Arith.h" |
| #include "mlir/Dialect/Func/IR/FuncOps.h" |
| #include "mlir/Dialect/MemRef/IR/MemRef.h" |
| #include "mlir/Dialect/Tensor/IR/Tensor.h" |
| #include "mlir/Dialect/Utils/IndexingUtils.h" |
| #include "mlir/Dialect/Vector/IR/VectorOps.h" |
| #include "mlir/IR/Builders.h" |
| #include "mlir/IR/IntegerSet.h" |
| #include "mlir/IR/Operation.h" |
| #include "mlir/IR/TypeUtilities.h" |
| #include "mlir/Support/LLVM.h" |
| |
| #include "llvm/ADT/DenseSet.h" |
| #include "llvm/Support/InterleavedRange.h" |
| |
| #define DEBUG_TYPE "vector-utils" |
| |
| #define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ") |
| #define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n") |
| |
| using namespace mlir; |
| |
| /// Helper function that creates a memref::DimOp or tensor::DimOp depending on |
| /// the type of `source`. |
| Value mlir::vector::createOrFoldDimOp(OpBuilder &b, Location loc, Value source, |
| int64_t dim) { |
| if (isa<UnrankedMemRefType, MemRefType>(source.getType())) |
| return b.createOrFold<memref::DimOp>(loc, source, dim); |
| if (isa<UnrankedTensorType, RankedTensorType>(source.getType())) |
| return b.createOrFold<tensor::DimOp>(loc, source, dim); |
| llvm_unreachable("Expected MemRefType or TensorType"); |
| } |
| |
| /// Given the n-D transpose pattern 'transp', return true if 'dim0' and 'dim1' |
| /// should be transposed with each other within the context of their 2D |
| /// transposition slice. |
| /// |
| /// Example 1: dim0 = 0, dim1 = 2, transp = [2, 1, 0] |
| /// Return true: dim0 and dim1 are transposed within the context of their 2D |
| /// transposition slice ([1, 0]). |
| /// |
| /// Example 2: dim0 = 0, dim1 = 1, transp = [2, 1, 0] |
| /// Return true: dim0 and dim1 are transposed within the context of their 2D |
| /// transposition slice ([1, 0]). Paradoxically, note how dim1 (1) is *not* |
| /// transposed within the full context of the transposition. |
| /// |
| /// Example 3: dim0 = 0, dim1 = 1, transp = [2, 0, 1] |
| /// Return false: dim0 and dim1 are *not* transposed within the context of |
| /// their 2D transposition slice ([0, 1]). Paradoxically, note how dim0 (0) |
| /// and dim1 (1) are transposed within the full context of the of the |
| /// transposition. |
| static bool areDimsTransposedIn2DSlice(int64_t dim0, int64_t dim1, |
| ArrayRef<int64_t> transp) { |
| // Perform a linear scan along the dimensions of the transposed pattern. If |
| // dim0 is found first, dim0 and dim1 are not transposed within the context of |
| // their 2D slice. Otherwise, 'dim1' is found first and they are transposed. |
| for (int64_t permDim : transp) { |
| if (permDim == dim0) |
| return false; |
| if (permDim == dim1) |
| return true; |
| } |
| |
| llvm_unreachable("Ill-formed transpose pattern"); |
| } |
| |
| FailureOr<std::pair<int, int>> |
| mlir::vector::isTranspose2DSlice(vector::TransposeOp op) { |
| VectorType srcType = op.getSourceVectorType(); |
| SmallVector<int64_t> srcGtOneDims; |
| for (auto [index, size] : llvm::enumerate(srcType.getShape())) |
| if (size > 1) |
| srcGtOneDims.push_back(index); |
| |
| if (srcGtOneDims.size() != 2) |
| return failure(); |
| |
| // Check whether the two source vector dimensions that are greater than one |
| // must be transposed with each other so that we can apply one of the 2-D |
| // transpose pattens. Otherwise, these patterns are not applicable. |
| if (!areDimsTransposedIn2DSlice(srcGtOneDims[0], srcGtOneDims[1], |
| op.getPermutation())) |
| return failure(); |
| |
| return std::pair<int, int>(srcGtOneDims[0], srcGtOneDims[1]); |
| } |
| |
| /// Constructs a permutation map from memref indices to vector dimension. |
| /// |
| /// The implementation uses the knowledge of the mapping of enclosing loop to |
| /// vector dimension. `enclosingLoopToVectorDim` carries this information as a |
| /// map with: |
| /// - keys representing "vectorized enclosing loops"; |
| /// - values representing the corresponding vector dimension. |
| /// The algorithm traverses "vectorized enclosing loops" and extracts the |
| /// at-most-one MemRef index that is invariant along said loop. This index is |
| /// guaranteed to be at most one by construction: otherwise the MemRef is not |
| /// vectorizable. |
| /// If this invariant index is found, it is added to the permutation_map at the |
| /// proper vector dimension. |
| /// If no index is found to be invariant, 0 is added to the permutation_map and |
| /// corresponds to a vector broadcast along that dimension. |
| /// |
| /// Returns an empty AffineMap if `enclosingLoopToVectorDim` is empty, |
| /// signalling that no permutation map can be constructed given |
| /// `enclosingLoopToVectorDim`. |
| /// |
| /// Examples can be found in the documentation of `makePermutationMap`, in the |
| /// header file. |
| static AffineMap makePermutationMap( |
| ArrayRef<Value> indices, |
| const DenseMap<Operation *, unsigned> &enclosingLoopToVectorDim) { |
| if (enclosingLoopToVectorDim.empty()) |
| return AffineMap(); |
| MLIRContext *context = |
| enclosingLoopToVectorDim.begin()->getFirst()->getContext(); |
| SmallVector<AffineExpr> perm(enclosingLoopToVectorDim.size(), |
| getAffineConstantExpr(0, context)); |
| |
| for (auto kvp : enclosingLoopToVectorDim) { |
| assert(kvp.second < perm.size()); |
| auto invariants = affine::getInvariantAccesses( |
| cast<affine::AffineForOp>(kvp.first).getInductionVar(), indices); |
| unsigned numIndices = indices.size(); |
| unsigned countInvariantIndices = 0; |
| for (unsigned dim = 0; dim < numIndices; ++dim) { |
| if (!invariants.count(indices[dim])) { |
| assert(perm[kvp.second] == getAffineConstantExpr(0, context) && |
| "permutationMap already has an entry along dim"); |
| perm[kvp.second] = getAffineDimExpr(dim, context); |
| } else { |
| ++countInvariantIndices; |
| } |
| } |
| assert((countInvariantIndices == numIndices || |
| countInvariantIndices == numIndices - 1) && |
| "Vectorization prerequisite violated: at most 1 index may be " |
| "invariant wrt a vectorized loop"); |
| (void)countInvariantIndices; |
| } |
| return AffineMap::get(indices.size(), 0, perm, context); |
| } |
| |
| /// Implementation detail that walks up the parents and records the ones with |
| /// the specified type. |
| /// TODO: could also be implemented as a collect parents followed by a |
| /// filter and made available outside this file. |
| template <typename T> |
| static SetVector<Operation *> getParentsOfType(Block *block) { |
| SetVector<Operation *> res; |
| auto *current = block->getParentOp(); |
| while (current) { |
| if ([[maybe_unused]] auto typedParent = dyn_cast<T>(current)) { |
| assert(res.count(current) == 0 && "Already inserted"); |
| res.insert(current); |
| } |
| current = current->getParentOp(); |
| } |
| return res; |
| } |
| |
| /// Returns the enclosing AffineForOp, from closest to farthest. |
| static SetVector<Operation *> getEnclosingforOps(Block *block) { |
| return getParentsOfType<affine::AffineForOp>(block); |
| } |
| |
| AffineMap mlir::makePermutationMap( |
| Block *insertPoint, ArrayRef<Value> indices, |
| const DenseMap<Operation *, unsigned> &loopToVectorDim) { |
| DenseMap<Operation *, unsigned> enclosingLoopToVectorDim; |
| auto enclosingLoops = getEnclosingforOps(insertPoint); |
| for (auto *forInst : enclosingLoops) { |
| auto it = loopToVectorDim.find(forInst); |
| if (it != loopToVectorDim.end()) { |
| enclosingLoopToVectorDim.insert(*it); |
| } |
| } |
| return ::makePermutationMap(indices, enclosingLoopToVectorDim); |
| } |
| |
| AffineMap mlir::makePermutationMap( |
| Operation *op, ArrayRef<Value> indices, |
| const DenseMap<Operation *, unsigned> &loopToVectorDim) { |
| return makePermutationMap(op->getBlock(), indices, loopToVectorDim); |
| } |
| |
| bool matcher::operatesOnSuperVectorsOf(Operation &op, |
| VectorType subVectorType) { |
| // First, extract the vector type and distinguish between: |
| // a. ops that *must* lower a super-vector (i.e. vector.transfer_read, |
| // vector.transfer_write); and |
| // b. ops that *may* lower a super-vector (all other ops). |
| // The ops that *may* lower a super-vector only do so if the super-vector to |
| // sub-vector ratio exists. The ops that *must* lower a super-vector are |
| // explicitly checked for this property. |
| /// TODO: there should be a single function for all ops to do this so we |
| /// do not have to special case. Maybe a trait, or just a method, unclear atm. |
| bool mustDivide = false; |
| (void)mustDivide; |
| VectorType superVectorType; |
| if (auto transfer = dyn_cast<VectorTransferOpInterface>(op)) { |
| superVectorType = transfer.getVectorType(); |
| mustDivide = true; |
| } else if (op.getNumResults() == 0) { |
| if (!isa<func::ReturnOp>(op)) { |
| op.emitError("NYI: assuming only return operations can have 0 " |
| " results at this point"); |
| } |
| return false; |
| } else if (op.getNumResults() == 1) { |
| if (auto v = dyn_cast<VectorType>(op.getResult(0).getType())) { |
| superVectorType = v; |
| } else { |
| // Not a vector type. |
| return false; |
| } |
| } else { |
| // Not a vector.transfer and has more than 1 result, fail hard for now to |
| // wake us up when something changes. |
| op.emitError("NYI: operation has more than 1 result"); |
| return false; |
| } |
| |
| // Get the ratio. |
| auto ratio = |
| computeShapeRatio(superVectorType.getShape(), subVectorType.getShape()); |
| |
| // Sanity check. |
| assert((ratio || !mustDivide) && |
| "vector.transfer operation in which super-vector size is not an" |
| " integer multiple of sub-vector size"); |
| |
| // This catches cases that are not strictly necessary to have multiplicity but |
| // still aren't divisible by the sub-vector shape. |
| // This could be useful information if we wanted to reshape at the level of |
| // the vector type (but we would have to look at the compute and distinguish |
| // between parallel, reduction and possibly other cases. |
| return ratio.has_value(); |
| } |
| |
| bool vector::isContiguousSlice(MemRefType memrefType, VectorType vectorType) { |
| if (vectorType.isScalable()) |
| return false; |
| |
| ArrayRef<int64_t> vectorShape = vectorType.getShape(); |
| auto vecRank = vectorType.getRank(); |
| |
| if (!memrefType.areTrailingDimsContiguous(vecRank)) |
| return false; |
| |
| // Extract the trailing dims and strides of the input memref |
| auto memrefShape = memrefType.getShape().take_back(vecRank); |
| |
| // Compare the dims of `vectorType` against `memrefType` (in reverse). |
| // In the most basic case, all dims will match. |
| auto firstNonMatchingDim = |
| std::mismatch(vectorShape.rbegin(), vectorShape.rend(), |
| memrefShape.rbegin(), memrefShape.rend()); |
| if (firstNonMatchingDim.first == vectorShape.rend()) |
| return true; |
| |
| // One non-matching dim is still fine, however the remaining leading dims of |
| // `vectorType` need to be 1. |
| SmallVector<int64_t> leadingDims(++firstNonMatchingDim.first, |
| vectorShape.rend()); |
| |
| return llvm::all_of(leadingDims, [](auto x) { return x == 1; }); |
| } |
| |
| std::optional<StaticTileOffsetRange> |
| vector::createUnrollIterator(VectorType vType, int64_t targetRank) { |
| if (vType.getRank() <= targetRank) |
| return {}; |
| // Attempt to unroll until targetRank or the first scalable dimension (which |
| // cannot be unrolled). |
| auto shapeToUnroll = vType.getShape().drop_back(targetRank); |
| auto scalableDimsToUnroll = vType.getScalableDims().drop_back(targetRank); |
| auto it = |
| std::find(scalableDimsToUnroll.begin(), scalableDimsToUnroll.end(), true); |
| auto firstScalableDim = it - scalableDimsToUnroll.begin(); |
| if (firstScalableDim == 0) |
| return {}; |
| // All scalable dimensions should be removed now. |
| scalableDimsToUnroll = scalableDimsToUnroll.slice(0, firstScalableDim); |
| assert(!llvm::is_contained(scalableDimsToUnroll, true) && |
| "unexpected leading scalable dimension"); |
| // Create an unroll iterator for leading dimensions. |
| shapeToUnroll = shapeToUnroll.slice(0, firstScalableDim); |
| return StaticTileOffsetRange(shapeToUnroll, /*unrollStep=*/1); |
| } |
| |
| SmallVector<OpFoldResult> vector::getMixedSizesXfer(bool hasTensorSemantics, |
| Operation *xfer, |
| RewriterBase &rewriter) { |
| auto loc = xfer->getLoc(); |
| |
| Value base = TypeSwitch<Operation *, Value>(xfer) |
| .Case<vector::TransferReadOp>( |
| [&](auto readOp) { return readOp.getBase(); }) |
| .Case<vector::TransferWriteOp>( |
| [&](auto writeOp) { return writeOp.getOperand(1); }); |
| |
| SmallVector<OpFoldResult> mixedSourceDims = |
| hasTensorSemantics ? tensor::getMixedSizes(rewriter, loc, base) |
| : memref::getMixedSizes(rewriter, loc, base); |
| return mixedSourceDims; |
| } |
| |
| bool vector::isLinearizableVector(VectorType type) { |
| return (type.getRank() > 1) && (type.getNumScalableDims() <= 1); |
| } |
| |
| Value vector::createReadOrMaskedRead(OpBuilder &builder, Location loc, |
| Value source, |
| ArrayRef<int64_t> inputVectorSizes, |
| Value padValue, |
| bool useInBoundsInsteadOfMasking) { |
| assert(llvm::none_of(inputVectorSizes, |
| [](int64_t s) { return s == ShapedType::kDynamic; }) && |
| "invalid input vector sizes"); |
| auto sourceShapedType = cast<ShapedType>(source.getType()); |
| auto sourceShape = sourceShapedType.getShape(); |
| assert(sourceShape.size() == inputVectorSizes.size() && |
| "expected same ranks."); |
| auto maskType = VectorType::get(inputVectorSizes, builder.getI1Type()); |
| auto vectorType = VectorType::get(inputVectorSizes, padValue.getType()); |
| assert(padValue.getType() == sourceShapedType.getElementType() && |
| "expected same pad element type to match source element type"); |
| int64_t readRank = inputVectorSizes.size(); |
| auto zero = builder.create<arith::ConstantIndexOp>(loc, 0); |
| SmallVector<bool> inBoundsVal(readRank, true); |
| if (useInBoundsInsteadOfMasking) { |
| // Update the inBounds attribute. |
| for (unsigned i = 0; i < readRank; i++) |
| inBoundsVal[i] = (sourceShape[i] == inputVectorSizes[i]) && |
| !ShapedType::isDynamic(sourceShape[i]); |
| } |
| auto transferReadOp = builder.create<vector::TransferReadOp>( |
| loc, |
| /*vectorType=*/vectorType, |
| /*source=*/source, |
| /*indices=*/SmallVector<Value>(readRank, zero), |
| /*padding=*/padValue, |
| /*inBounds=*/inBoundsVal); |
| |
| if (llvm::equal(inputVectorSizes, sourceShape) || useInBoundsInsteadOfMasking) |
| return transferReadOp; |
| SmallVector<OpFoldResult> mixedSourceDims = |
| tensor::getMixedSizes(builder, loc, source); |
| Value mask = |
| builder.create<vector::CreateMaskOp>(loc, maskType, mixedSourceDims); |
| return mlir::vector::maskOperation(builder, transferReadOp, mask) |
| ->getResult(0); |
| } |
| |
| LogicalResult |
| vector::isValidMaskedInputVector(ArrayRef<int64_t> shape, |
| ArrayRef<int64_t> inputVectorSizes) { |
| LDBG("Iteration space static sizes:" << llvm::interleaved(shape)); |
| |
| if (inputVectorSizes.size() != shape.size()) { |
| LDBG("Input vector sizes don't match the number of loops"); |
| return failure(); |
| } |
| if (ShapedType::isDynamicShape(inputVectorSizes)) { |
| LDBG("Input vector sizes can't have dynamic dimensions"); |
| return failure(); |
| } |
| if (!llvm::all_of(llvm::zip(shape, inputVectorSizes), |
| [](std::tuple<int64_t, int64_t> sizePair) { |
| int64_t staticSize = std::get<0>(sizePair); |
| int64_t inputSize = std::get<1>(sizePair); |
| return ShapedType::isDynamic(staticSize) || |
| staticSize <= inputSize; |
| })) { |
| LDBG("Input vector sizes must be greater than or equal to iteration space " |
| "static sizes"); |
| return failure(); |
| } |
| return success(); |
| } |