| //===- VectorUtils.cpp - MLIR Utilities for VectorOps ------------------===// |
| // |
| // Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements utility methods for working with the Vector dialect. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "mlir/Dialect/Vector/VectorUtils.h" |
| #include "mlir/Analysis/LoopAnalysis.h" |
| #include "mlir/Dialect/Affine/IR/AffineOps.h" |
| #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" |
| #include "mlir/Dialect/MemRef/IR/MemRef.h" |
| #include "mlir/Dialect/StandardOps/IR/Ops.h" |
| #include "mlir/Dialect/Tensor/IR/Tensor.h" |
| #include "mlir/Dialect/Vector/VectorOps.h" |
| #include "mlir/IR/Builders.h" |
| #include "mlir/IR/IntegerSet.h" |
| #include "mlir/IR/Operation.h" |
| #include "mlir/Support/LLVM.h" |
| #include "mlir/Support/MathExtras.h" |
| #include <numeric> |
| |
| #include "llvm/ADT/DenseSet.h" |
| #include "llvm/ADT/SetVector.h" |
| |
| using namespace mlir; |
| |
| /// Helper function that creates a memref::DimOp or tensor::DimOp depending on |
| /// the type of `source`. |
| Value mlir::vector::createOrFoldDimOp(OpBuilder &b, Location loc, Value source, |
| int64_t dim) { |
| if (source.getType().isa<UnrankedMemRefType, MemRefType>()) |
| return b.createOrFold<memref::DimOp>(loc, source, dim); |
| if (source.getType().isa<UnrankedTensorType, RankedTensorType>()) |
| return b.createOrFold<tensor::DimOp>(loc, source, dim); |
| llvm_unreachable("Expected MemRefType or TensorType"); |
| } |
| |
| /// Return the number of elements of basis, `0` if empty. |
| int64_t mlir::computeMaxLinearIndex(ArrayRef<int64_t> basis) { |
| if (basis.empty()) |
| return 0; |
| return std::accumulate(basis.begin(), basis.end(), 1, |
| std::multiplies<int64_t>()); |
| } |
| |
| SmallVector<int64_t, 4> mlir::computeStrides(ArrayRef<int64_t> shape, |
| ArrayRef<int64_t> sizes) { |
| int64_t rank = shape.size(); |
| // Compute the count for each dimension. |
| SmallVector<int64_t, 4> sliceDimCounts(rank); |
| for (int64_t r = 0; r < rank; ++r) |
| sliceDimCounts[r] = ceilDiv(shape[r], sizes[r]); |
| // Use that to compute the slice stride for each dimension. |
| SmallVector<int64_t, 4> sliceStrides(rank); |
| sliceStrides[rank - 1] = 1; |
| for (int64_t r = rank - 2; r >= 0; --r) |
| sliceStrides[r] = sliceStrides[r + 1] * sliceDimCounts[r + 1]; |
| return sliceStrides; |
| } |
| |
| int64_t mlir::linearize(ArrayRef<int64_t> offsets, ArrayRef<int64_t> basis) { |
| assert(offsets.size() == basis.size()); |
| int64_t linearIndex = 0; |
| for (unsigned idx = 0, e = basis.size(); idx < e; ++idx) |
| linearIndex += offsets[idx] * basis[idx]; |
| return linearIndex; |
| } |
| |
| SmallVector<int64_t, 4> mlir::delinearize(ArrayRef<int64_t> sliceStrides, |
| int64_t index) { |
| int64_t rank = sliceStrides.size(); |
| SmallVector<int64_t, 4> vectorOffsets(rank); |
| for (int64_t r = 0; r < rank; ++r) { |
| assert(sliceStrides[r] > 0); |
| vectorOffsets[r] = index / sliceStrides[r]; |
| index %= sliceStrides[r]; |
| } |
| return vectorOffsets; |
| } |
| |
| SmallVector<int64_t, 4> mlir::computeElementOffsetsFromVectorSliceOffsets( |
| ArrayRef<int64_t> sizes, ArrayRef<int64_t> vectorOffsets) { |
| SmallVector<int64_t, 4> result; |
| for (auto it : llvm::zip(vectorOffsets, sizes)) |
| result.push_back(std::get<0>(it) * std::get<1>(it)); |
| return result; |
| } |
| |
| Optional<SmallVector<int64_t, 4>> mlir::shapeRatio(ArrayRef<int64_t> superShape, |
| ArrayRef<int64_t> subShape) { |
| if (superShape.size() < subShape.size()) { |
| return Optional<SmallVector<int64_t, 4>>(); |
| } |
| |
| // Starting from the end, compute the integer divisors. |
| std::vector<int64_t> result; |
| result.reserve(superShape.size()); |
| int64_t superSize = 0, subSize = 0; |
| for (auto it : |
| llvm::zip(llvm::reverse(superShape), llvm::reverse(subShape))) { |
| std::tie(superSize, subSize) = it; |
| assert(superSize > 0 && "superSize must be > 0"); |
| assert(subSize > 0 && "subSize must be > 0"); |
| |
| // If integral division does not occur, return and let the caller decide. |
| if (superSize % subSize != 0) |
| return None; |
| result.push_back(superSize / subSize); |
| } |
| |
| // At this point we computed the ratio (in reverse) for the common |
| // size. Fill with the remaining entries from the super-vector shape (still in |
| // reverse). |
| int commonSize = subShape.size(); |
| std::copy(superShape.rbegin() + commonSize, superShape.rend(), |
| std::back_inserter(result)); |
| |
| assert(result.size() == superShape.size() && |
| "super to sub shape ratio is not of the same size as the super rank"); |
| |
| // Reverse again to get it back in the proper order and return. |
| return SmallVector<int64_t, 4>{result.rbegin(), result.rend()}; |
| } |
| |
| Optional<SmallVector<int64_t, 4>> mlir::shapeRatio(VectorType superVectorType, |
| VectorType subVectorType) { |
| assert(superVectorType.getElementType() == subVectorType.getElementType() && |
| "vector types must be of the same elemental type"); |
| return shapeRatio(superVectorType.getShape(), subVectorType.getShape()); |
| } |
| |
| /// Constructs a permutation map from memref indices to vector dimension. |
| /// |
| /// The implementation uses the knowledge of the mapping of enclosing loop to |
| /// vector dimension. `enclosingLoopToVectorDim` carries this information as a |
| /// map with: |
| /// - keys representing "vectorized enclosing loops"; |
| /// - values representing the corresponding vector dimension. |
| /// The algorithm traverses "vectorized enclosing loops" and extracts the |
| /// at-most-one MemRef index that is invariant along said loop. This index is |
| /// guaranteed to be at most one by construction: otherwise the MemRef is not |
| /// vectorizable. |
| /// If this invariant index is found, it is added to the permutation_map at the |
| /// proper vector dimension. |
| /// If no index is found to be invariant, 0 is added to the permutation_map and |
| /// corresponds to a vector broadcast along that dimension. |
| /// |
| /// Returns an empty AffineMap if `enclosingLoopToVectorDim` is empty, |
| /// signalling that no permutation map can be constructed given |
| /// `enclosingLoopToVectorDim`. |
| /// |
| /// Examples can be found in the documentation of `makePermutationMap`, in the |
| /// header file. |
| static AffineMap makePermutationMap( |
| ArrayRef<Value> indices, |
| const DenseMap<Operation *, unsigned> &enclosingLoopToVectorDim) { |
| if (enclosingLoopToVectorDim.empty()) |
| return AffineMap(); |
| MLIRContext *context = |
| enclosingLoopToVectorDim.begin()->getFirst()->getContext(); |
| SmallVector<AffineExpr, 4> perm(enclosingLoopToVectorDim.size(), |
| getAffineConstantExpr(0, context)); |
| |
| for (auto kvp : enclosingLoopToVectorDim) { |
| assert(kvp.second < perm.size()); |
| auto invariants = getInvariantAccesses( |
| cast<AffineForOp>(kvp.first).getInductionVar(), indices); |
| unsigned numIndices = indices.size(); |
| unsigned countInvariantIndices = 0; |
| for (unsigned dim = 0; dim < numIndices; ++dim) { |
| if (!invariants.count(indices[dim])) { |
| assert(perm[kvp.second] == getAffineConstantExpr(0, context) && |
| "permutationMap already has an entry along dim"); |
| perm[kvp.second] = getAffineDimExpr(dim, context); |
| } else { |
| ++countInvariantIndices; |
| } |
| } |
| assert((countInvariantIndices == numIndices || |
| countInvariantIndices == numIndices - 1) && |
| "Vectorization prerequisite violated: at most 1 index may be " |
| "invariant wrt a vectorized loop"); |
| } |
| return AffineMap::get(indices.size(), 0, perm, context); |
| } |
| |
| /// Implementation detail that walks up the parents and records the ones with |
| /// the specified type. |
| /// TODO: could also be implemented as a collect parents followed by a |
| /// filter and made available outside this file. |
| template <typename T> |
| static SetVector<Operation *> getParentsOfType(Block *block) { |
| SetVector<Operation *> res; |
| auto *current = block->getParentOp(); |
| while (current) { |
| if (auto typedParent = dyn_cast<T>(current)) { |
| assert(res.count(current) == 0 && "Already inserted"); |
| res.insert(current); |
| } |
| current = current->getParentOp(); |
| } |
| return res; |
| } |
| |
| /// Returns the enclosing AffineForOp, from closest to farthest. |
| static SetVector<Operation *> getEnclosingforOps(Block *block) { |
| return getParentsOfType<AffineForOp>(block); |
| } |
| |
| AffineMap mlir::makePermutationMap( |
| Block *insertPoint, ArrayRef<Value> indices, |
| const DenseMap<Operation *, unsigned> &loopToVectorDim) { |
| DenseMap<Operation *, unsigned> enclosingLoopToVectorDim; |
| auto enclosingLoops = getEnclosingforOps(insertPoint); |
| for (auto *forInst : enclosingLoops) { |
| auto it = loopToVectorDim.find(forInst); |
| if (it != loopToVectorDim.end()) { |
| enclosingLoopToVectorDim.insert(*it); |
| } |
| } |
| return ::makePermutationMap(indices, enclosingLoopToVectorDim); |
| } |
| |
| AffineMap mlir::makePermutationMap( |
| Operation *op, ArrayRef<Value> indices, |
| const DenseMap<Operation *, unsigned> &loopToVectorDim) { |
| return makePermutationMap(op->getBlock(), indices, loopToVectorDim); |
| } |
| |
| AffineMap mlir::getTransferMinorIdentityMap(ShapedType shapedType, |
| VectorType vectorType) { |
| int64_t elementVectorRank = 0; |
| VectorType elementVectorType = |
| shapedType.getElementType().dyn_cast<VectorType>(); |
| if (elementVectorType) |
| elementVectorRank += elementVectorType.getRank(); |
| // 0-d transfers are to/from tensor<t>/memref<t> and vector<1xt>. |
| // TODO: replace once we have 0-d vectors. |
| if (shapedType.getRank() == 0 && |
| vectorType.getShape() == ArrayRef<int64_t>{1}) |
| return AffineMap::get( |
| /*numDims=*/0, /*numSymbols=*/0, |
| getAffineConstantExpr(0, shapedType.getContext())); |
| return AffineMap::getMinorIdentityMap( |
| shapedType.getRank(), vectorType.getRank() - elementVectorRank, |
| shapedType.getContext()); |
| } |
| |
| bool matcher::operatesOnSuperVectorsOf(Operation &op, |
| VectorType subVectorType) { |
| // First, extract the vector type and distinguish between: |
| // a. ops that *must* lower a super-vector (i.e. vector.transfer_read, |
| // vector.transfer_write); and |
| // b. ops that *may* lower a super-vector (all other ops). |
| // The ops that *may* lower a super-vector only do so if the super-vector to |
| // sub-vector ratio exists. The ops that *must* lower a super-vector are |
| // explicitly checked for this property. |
| /// TODO: there should be a single function for all ops to do this so we |
| /// do not have to special case. Maybe a trait, or just a method, unclear atm. |
| bool mustDivide = false; |
| (void)mustDivide; |
| VectorType superVectorType; |
| if (auto transfer = dyn_cast<VectorTransferOpInterface>(op)) { |
| superVectorType = transfer.getVectorType(); |
| mustDivide = true; |
| } else if (op.getNumResults() == 0) { |
| if (!isa<ReturnOp>(op)) { |
| op.emitError("NYI: assuming only return operations can have 0 " |
| " results at this point"); |
| } |
| return false; |
| } else if (op.getNumResults() == 1) { |
| if (auto v = op.getResult(0).getType().dyn_cast<VectorType>()) { |
| superVectorType = v; |
| } else { |
| // Not a vector type. |
| return false; |
| } |
| } else { |
| // Not a vector.transfer and has more than 1 result, fail hard for now to |
| // wake us up when something changes. |
| op.emitError("NYI: operation has more than 1 result"); |
| return false; |
| } |
| |
| // Get the ratio. |
| auto ratio = shapeRatio(superVectorType, subVectorType); |
| |
| // Sanity check. |
| assert((ratio.hasValue() || !mustDivide) && |
| "vector.transfer operation in which super-vector size is not an" |
| " integer multiple of sub-vector size"); |
| |
| // This catches cases that are not strictly necessary to have multiplicity but |
| // still aren't divisible by the sub-vector shape. |
| // This could be useful information if we wanted to reshape at the level of |
| // the vector type (but we would have to look at the compute and distinguish |
| // between parallel, reduction and possibly other cases. |
| if (!ratio.hasValue()) { |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool mlir::isDisjointTransferIndices(VectorTransferOpInterface transferA, |
| VectorTransferOpInterface transferB) { |
| // For simplicity only look at transfer of same type. |
| if (transferA.getVectorType() != transferB.getVectorType()) |
| return false; |
| unsigned rankOffset = transferA.getLeadingShapedRank(); |
| for (unsigned i = 0, e = transferA.indices().size(); i < e; i++) { |
| auto indexA = transferA.indices()[i].getDefiningOp<arith::ConstantOp>(); |
| auto indexB = transferB.indices()[i].getDefiningOp<arith::ConstantOp>(); |
| // If any of the indices are dynamic we cannot prove anything. |
| if (!indexA || !indexB) |
| continue; |
| |
| if (i < rankOffset) { |
| // For leading dimensions, if we can prove that index are different we |
| // know we are accessing disjoint slices. |
| if (indexA.getValue().cast<IntegerAttr>().getInt() != |
| indexB.getValue().cast<IntegerAttr>().getInt()) |
| return true; |
| } else { |
| // For this dimension, we slice a part of the memref we need to make sure |
| // the intervals accessed don't overlap. |
| int64_t distance = |
| std::abs(indexA.getValue().cast<IntegerAttr>().getInt() - |
| indexB.getValue().cast<IntegerAttr>().getInt()); |
| if (distance >= transferA.getVectorType().getDimSize(i - rankOffset)) |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| bool mlir::isDisjointTransferSet(VectorTransferOpInterface transferA, |
| VectorTransferOpInterface transferB) { |
| if (transferA.source() != transferB.source()) |
| return false; |
| return isDisjointTransferIndices(transferA, transferB); |
| } |
| |
| bool mlir::checkSameValueRAW(vector::TransferWriteOp defWrite, |
| vector::TransferReadOp read) { |
| return !defWrite.hasOutOfBoundsDim() && !defWrite.mask() && !read.mask() && |
| defWrite.indices() == read.indices() && |
| defWrite.getVectorType() == read.getVectorType() && |
| defWrite.permutation_map() == read.permutation_map(); |
| } |
| |
| bool mlir::checkSameValueWAW(vector::TransferWriteOp write, |
| vector::TransferWriteOp priorWrite) { |
| return priorWrite.indices() == write.indices() && |
| priorWrite.mask() == write.mask() && |
| priorWrite.getVectorType() == write.getVectorType() && |
| priorWrite.permutation_map() == write.permutation_map(); |
| } |
| |
| SmallVector<int64_t, 4> mlir::getI64SubArray(ArrayAttr arrayAttr, |
| unsigned dropFront, |
| unsigned dropBack) { |
| assert(arrayAttr.size() > dropFront + dropBack && "Out of bounds"); |
| auto range = arrayAttr.getAsRange<IntegerAttr>(); |
| SmallVector<int64_t, 4> res; |
| res.reserve(arrayAttr.size() - dropFront - dropBack); |
| for (auto it = range.begin() + dropFront, eit = range.end() - dropBack; |
| it != eit; ++it) |
| res.push_back((*it).getValue().getSExtValue()); |
| return res; |
| } |