| //===- Utils.h - General analysis utilities ---------------------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This header file defines prototypes for various transformation utilities for |
| // memref's and non-loop IR structures. These are not passes by themselves but |
| // are used either by passes, optimization sequences, or in turn by other |
| // transformation utilities. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef MLIR_ANALYSIS_UTILS_H |
| #define MLIR_ANALYSIS_UTILS_H |
| |
| #include "mlir/Analysis/AffineStructures.h" |
| #include "mlir/IR/AffineMap.h" |
| #include "mlir/IR/Block.h" |
| #include "mlir/IR/Location.h" |
| #include "mlir/Support/LLVM.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include <memory> |
| |
| namespace mlir { |
| |
| class AffineForOp; |
| class Block; |
| class Location; |
| struct MemRefAccess; |
| class Operation; |
| class Value; |
| |
| /// Populates 'loops' with IVs of the loops surrounding 'op' ordered from |
| /// the outermost 'affine.for' operation to the innermost one. |
| // TODO: handle 'affine.if' ops. |
| void getLoopIVs(Operation &op, SmallVectorImpl<AffineForOp> *loops); |
| |
| /// Populates 'ops' with IVs of the loops surrounding `op`, along with |
| /// `affine.if` operations interleaved between these loops, ordered from the |
| /// outermost `affine.for` or `affine.if` operation to the innermost one. |
| void getEnclosingAffineForAndIfOps(Operation &op, |
| SmallVectorImpl<Operation *> *ops); |
| |
| /// Returns the nesting depth of this operation, i.e., the number of loops |
| /// surrounding this operation. |
| unsigned getNestingDepth(Operation *op); |
| |
| /// Returns whether a loop is a parallel loop and contains a reduction loop. |
| bool isLoopParallelAndContainsReduction(AffineForOp forOp); |
| |
| /// Returns in 'sequentialLoops' all sequential loops in loop nest rooted |
| /// at 'forOp'. |
| void getSequentialLoops(AffineForOp forOp, |
| llvm::SmallDenseSet<Value, 8> *sequentialLoops); |
| |
| /// Enumerates different result statuses of slice computation by |
| /// `computeSliceUnion` |
| // TODO: Identify and add different kinds of failures during slice computation. |
| struct SliceComputationResult { |
| enum ResultEnum { |
| Success, |
| IncorrectSliceFailure, // Slice is computed, but it is incorrect. |
| GenericFailure, // Unable to compute src loop computation slice. |
| } value; |
| SliceComputationResult(ResultEnum v) : value(v) {} |
| }; |
| |
| /// ComputationSliceState aggregates loop IVs, loop bound AffineMaps and their |
| /// associated operands for a set of loops within a loop nest (typically the |
| /// set of loops surrounding a store operation). Loop bound AffineMaps which |
| /// are non-null represent slices of that loop's iteration space. |
| struct ComputationSliceState { |
| // List of sliced loop IVs (ordered from outermost to innermost). |
| // EX: 'ivs[i]' has lower bound 'lbs[i]' and upper bound 'ubs[i]'. |
| SmallVector<Value, 4> ivs; |
| // List of lower bound AffineMaps. |
| SmallVector<AffineMap, 4> lbs; |
| // List of upper bound AffineMaps. |
| SmallVector<AffineMap, 4> ubs; |
| // List of lower bound operands (lbOperands[i] are used by 'lbs[i]'). |
| std::vector<SmallVector<Value, 4>> lbOperands; |
| // List of upper bound operands (ubOperands[i] are used by 'ubs[i]'). |
| std::vector<SmallVector<Value, 4>> ubOperands; |
| // Slice loop nest insertion point in target loop nest. |
| Block::iterator insertPoint; |
| // Adds to 'cst' with constraints which represent the slice bounds on 'ivs' |
| // in 'this'. Specifically, the values in 'ivs' are added to 'cst' as dim |
| // identifiers and the values in 'lb/ubOperands' are added as symbols. |
| // Constraints are added for all loop IV bounds (dim or symbol), and |
| // constraints are added for slice bounds in 'lbs'/'ubs'. |
| // Returns failure if we cannot add loop bounds because of unsupported cases. |
| LogicalResult getAsConstraints(FlatAffineValueConstraints *cst); |
| |
| /// Adds to 'cst' constraints which represent the original loop bounds on |
| /// 'ivs' in 'this'. This corresponds to the original domain of the loop nest |
| /// from which the slice is being computed. Returns failure if we cannot add |
| /// loop bounds because of unsupported cases. |
| LogicalResult getSourceAsConstraints(FlatAffineValueConstraints &cst); |
| |
| // Clears all bounds and operands in slice state. |
| void clearBounds(); |
| |
| /// Returns true if the computation slice is empty. |
| bool isEmpty() const { return ivs.empty(); } |
| |
| /// Returns true if the computation slice encloses all the iterations of the |
| /// sliced loop nest. Returns false if it does not. Returns llvm::None if it |
| /// cannot determine if the slice is maximal or not. |
| // TODO: Cache 'isMaximal' so that we don't recompute it when the slice |
| // information hasn't changed. |
| Optional<bool> isMaximal() const; |
| |
| /// Checks the validity of the slice computed. This is done using the |
| /// following steps: |
| /// 1. Get the new domain of the slice that would be created if fusion |
| /// succeeds. This domain gets constructed with source loop IVS and |
| /// destination loop IVS as dimensions. |
| /// 2. Project out the dimensions of the destination loop from the domain |
| /// above calculated in step(1) to express it purely in terms of the source |
| /// loop IVs. |
| /// 3. Calculate a set difference between the iterations of the new domain and |
| /// the original domain of the source loop. |
| /// If this difference is empty, the slice is declared to be valid. Otherwise, |
| /// return false as it implies that the effective fusion results in at least |
| /// one iteration of the slice that was not originally in the source's domain. |
| /// If the validity cannot be determined, returns llvm:None. |
| Optional<bool> isSliceValid(); |
| |
| void dump() const; |
| |
| private: |
| /// Fast check to determine if the computation slice is maximal. Returns true |
| /// if each slice dimension maps to an existing dst dimension and both the src |
| /// and the dst loops for those dimensions have the same bounds. Returns false |
| /// if both the src and the dst loops don't have the same bounds. Returns |
| /// llvm::None if none of the above can be proven. |
| Optional<bool> isSliceMaximalFastCheck() const; |
| }; |
| |
| /// Computes the computation slice loop bounds for one loop nest as affine maps |
| /// of the other loop nest's IVs and symbols, using 'dependenceConstraints' |
| /// computed between 'depSourceAccess' and 'depSinkAccess'. |
| /// If 'isBackwardSlice' is true, a backwards slice is computed in which the |
| /// slice bounds of loop nest surrounding 'depSourceAccess' are computed in |
| /// terms of loop IVs and symbols of the loop nest surrounding 'depSinkAccess' |
| /// at 'loopDepth'. |
| /// If 'isBackwardSlice' is false, a forward slice is computed in which the |
| /// slice bounds of loop nest surrounding 'depSinkAccess' are computed in terms |
| /// of loop IVs and symbols of the loop nest surrounding 'depSourceAccess' at |
| /// 'loopDepth'. |
| /// The slice loop bounds and associated operands are returned in 'sliceState'. |
| // |
| // Backward slice example: |
| // |
| // affine.for %i0 = 0 to 10 { |
| // affine.store %cst, %0[%i0] : memref<100xf32> // 'depSourceAccess' |
| // } |
| // affine.for %i1 = 0 to 10 { |
| // %v = affine.load %0[%i1] : memref<100xf32> // 'depSinkAccess' |
| // } |
| // |
| // // Backward computation slice of loop nest '%i0'. |
| // affine.for %i0 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 1)(%i1) { |
| // affine.store %cst, %0[%i0] : memref<100xf32> // 'depSourceAccess' |
| // } |
| // |
| // Forward slice example: |
| // |
| // affine.for %i0 = 0 to 10 { |
| // affine.store %cst, %0[%i0] : memref<100xf32> // 'depSourceAccess' |
| // } |
| // affine.for %i1 = 0 to 10 { |
| // %v = affine.load %0[%i1] : memref<100xf32> // 'depSinkAccess' |
| // } |
| // |
| // // Forward computation slice of loop nest '%i1'. |
| // affine.for %i1 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 1)(%i0) { |
| // %v = affine.load %0[%i1] : memref<100xf32> // 'depSinkAccess' |
| // } |
| // |
| void getComputationSliceState(Operation *depSourceOp, Operation *depSinkOp, |
| FlatAffineValueConstraints *dependenceConstraints, |
| unsigned loopDepth, bool isBackwardSlice, |
| ComputationSliceState *sliceState); |
| |
| /// Return the number of iterations for the `slicetripCountMap` provided. |
| uint64_t getSliceIterationCount( |
| const llvm::SmallDenseMap<Operation *, uint64_t, 8> &sliceTripCountMap); |
| |
| /// Builds a map 'tripCountMap' from AffineForOp to constant trip count for |
| /// loop nest surrounding represented by slice loop bounds in 'slice'. Returns |
| /// true on success, false otherwise (if a non-constant trip count was |
| /// encountered). |
| bool buildSliceTripCountMap( |
| const ComputationSliceState &slice, |
| llvm::SmallDenseMap<Operation *, uint64_t, 8> *tripCountMap); |
| |
| /// Computes in 'sliceUnion' the union of all slice bounds computed at |
| /// 'loopDepth' between all dependent pairs of ops in 'opsA' and 'opsB', and |
| /// then verifies if it is valid. The parameter 'numCommonLoops' is the number |
| /// of loops common to the operations in 'opsA' and 'opsB'. If 'isBackwardSlice' |
| /// is true, computes slice bounds for loop nest surrounding ops in 'opsA', as a |
| /// function of IVs and symbols of loop nest surrounding ops in 'opsB' at |
| /// 'loopDepth'. If 'isBackwardSlice' is false, computes slice bounds for loop |
| /// nest surrounding ops in 'opsB', as a function of IVs and symbols of loop |
| /// nest surrounding ops in 'opsA' at 'loopDepth'. Returns |
| /// 'SliceComputationResult::Success' if union was computed correctly, an |
| /// appropriate 'failure' otherwise. |
| // TODO: Change this API to take 'forOpA'/'forOpB'. |
| SliceComputationResult |
| computeSliceUnion(ArrayRef<Operation *> opsA, ArrayRef<Operation *> opsB, |
| unsigned loopDepth, unsigned numCommonLoops, |
| bool isBackwardSlice, ComputationSliceState *sliceUnion); |
| |
| /// Creates a clone of the computation contained in the loop nest surrounding |
| /// 'srcOpInst', slices the iteration space of src loop based on slice bounds |
| /// in 'sliceState', and inserts the computation slice at the beginning of the |
| /// operation block of the loop at 'dstLoopDepth' in the loop nest surrounding |
| /// 'dstOpInst'. Returns the top-level loop of the computation slice on |
| /// success, returns nullptr otherwise. |
| // Loop depth is a crucial optimization choice that determines where to |
| // materialize the results of the backward slice - presenting a trade-off b/w |
| // storage and redundant computation in several cases. |
| // TODO: Support computation slices with common surrounding loops. |
| AffineForOp insertBackwardComputationSlice(Operation *srcOpInst, |
| Operation *dstOpInst, |
| unsigned dstLoopDepth, |
| ComputationSliceState *sliceState); |
| |
| /// A region of a memref's data space; this is typically constructed by |
| /// analyzing load/store op's on this memref and the index space of loops |
| /// surrounding such op's. |
| // For example, the memref region for a load operation at loop depth = 1: |
| // |
| // affine.for %i = 0 to 32 { |
| // affine.for %ii = %i to (d0) -> (d0 + 8) (%i) { |
| // affine.load %A[%ii] |
| // } |
| // } |
| // |
| // Region: {memref = %A, write = false, {%i <= m0 <= %i + 7} } |
| // The last field is a 2-d FlatAffineValueConstraints symbolic in %i. |
| // |
| struct MemRefRegion { |
| explicit MemRefRegion(Location loc) : loc(loc) {} |
| |
| /// Computes the memory region accessed by this memref with the region |
| /// represented as constraints symbolic/parametric in 'loopDepth' loops |
| /// surrounding opInst. The computed region's 'cst' field has exactly as many |
| /// dimensional identifiers as the rank of the memref, and *potentially* |
| /// additional symbolic identifiers which could include any of the loop IVs |
| /// surrounding opInst up until 'loopDepth' and another additional Function |
| /// symbols involved with the access (for eg., those appear in affine.apply's, |
| /// loop bounds, etc.). If 'sliceState' is non-null, operands from |
| /// 'sliceState' are added as symbols, and the following constraints are added |
| /// to the system: |
| /// *) Inequality constraints which represent loop bounds for 'sliceState' |
| /// operands which are loop IVS (these represent the destination loop IVs |
| /// of the slice, and are added as symbols to MemRefRegion's constraint |
| /// system). |
| /// *) Inequality constraints for the slice bounds in 'sliceState', which |
| /// represent the bounds on the loop IVs in this constraint system w.r.t |
| /// to slice operands (which correspond to symbols). |
| /// If 'addMemRefDimBounds' is true, constant upper/lower bounds |
| /// [0, memref.getDimSize(i)) are added for each MemRef dimension 'i'. |
| /// |
| /// For example, the memref region for this operation at loopDepth = 1 will |
| /// be: |
| /// |
| /// affine.for %i = 0 to 32 { |
| /// affine.for %ii = %i to (d0) -> (d0 + 8) (%i) { |
| /// load %A[%ii] |
| /// } |
| /// } |
| /// |
| /// {memref = %A, write = false, {%i <= m0 <= %i + 7} } |
| /// The last field is a 2-d FlatAffineValueConstraints symbolic in %i. |
| /// |
| LogicalResult compute(Operation *op, unsigned loopDepth, |
| const ComputationSliceState *sliceState = nullptr, |
| bool addMemRefDimBounds = true); |
| |
| FlatAffineValueConstraints *getConstraints() { return &cst; } |
| const FlatAffineValueConstraints *getConstraints() const { return &cst; } |
| bool isWrite() const { return write; } |
| void setWrite(bool flag) { write = flag; } |
| |
| /// Returns a constant upper bound on the number of elements in this region if |
| /// bounded by a known constant (always possible for static shapes), None |
| /// otherwise. Note that the symbols of the region are treated specially, |
| /// i.e., the returned bounding constant holds for *any given* value of the |
| /// symbol identifiers. The 'shape' vector is set to the corresponding |
| /// dimension-wise bounds major to minor. The number of elements and all the |
| /// dimension-wise bounds are guaranteed to be non-negative. We use int64_t |
| /// instead of uint64_t since index types can be at most int64_t. `lbs` are |
| /// set to the lower bounds for each of the rank dimensions, and lbDivisors |
| /// contains the corresponding denominators for floorDivs. |
| Optional<int64_t> getConstantBoundingSizeAndShape( |
| SmallVectorImpl<int64_t> *shape = nullptr, |
| std::vector<SmallVector<int64_t, 4>> *lbs = nullptr, |
| SmallVectorImpl<int64_t> *lbDivisors = nullptr) const; |
| |
| /// Gets the lower and upper bound map for the dimensional identifier at |
| /// `pos`. |
| void getLowerAndUpperBound(unsigned pos, AffineMap &lbMap, |
| AffineMap &ubMap) const; |
| |
| /// A wrapper around FlatAffineValueConstraints::getConstantBoundOnDimSize(). |
| /// 'pos' corresponds to the position of the memref shape's dimension (major |
| /// to minor) which matches 1:1 with the dimensional identifier positions in |
| /// 'cst'. |
| Optional<int64_t> |
| getConstantBoundOnDimSize(unsigned pos, |
| SmallVectorImpl<int64_t> *lb = nullptr, |
| int64_t *lbFloorDivisor = nullptr) const { |
| assert(pos < getRank() && "invalid position"); |
| return cst.getConstantBoundOnDimSize(pos, lb); |
| } |
| |
| /// Returns the size of this MemRefRegion in bytes. |
| Optional<int64_t> getRegionSize(); |
| |
| // Wrapper around FlatAffineValueConstraints::unionBoundingBox. |
| LogicalResult unionBoundingBox(const MemRefRegion &other); |
| |
| /// Returns the rank of the memref that this region corresponds to. |
| unsigned getRank() const; |
| |
| /// Memref that this region corresponds to. |
| Value memref; |
| |
| /// Read or write. |
| bool write; |
| |
| /// If there is more than one load/store op associated with the region, the |
| /// location information would correspond to one of those op's. |
| Location loc; |
| |
| /// Region (data space) of the memref accessed. This set will thus have at |
| /// least as many dimensional identifiers as the shape dimensionality of the |
| /// memref, and these are the leading dimensions of the set appearing in that |
| /// order (major to minor / outermost to innermost). There may be additional |
| /// identifiers since getMemRefRegion() is called with a specific loop depth, |
| /// and thus the region is symbolic in the outer surrounding loops at that |
| /// depth. |
| // TODO: Replace this to exploit HyperRectangularSet. |
| FlatAffineValueConstraints cst; |
| }; |
| |
| /// Returns the size of memref data in bytes if it's statically shaped, None |
| /// otherwise. |
| Optional<uint64_t> getMemRefSizeInBytes(MemRefType memRefType); |
| |
| /// Checks a load or store op for an out of bound access; returns failure if the |
| /// access is out of bounds along any of the dimensions, success otherwise. |
| /// Emits a diagnostic error (with location information) if emitError is true. |
| template <typename LoadOrStoreOpPointer> |
| LogicalResult boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp, |
| bool emitError = true); |
| |
| /// Returns the number of surrounding loops common to both A and B. |
| unsigned getNumCommonSurroundingLoops(Operation &A, Operation &B); |
| |
| /// Gets the memory footprint of all data touched in the specified memory space |
| /// in bytes; if the memory space is unspecified, considers all memory spaces. |
| Optional<int64_t> getMemoryFootprintBytes(AffineForOp forOp, |
| int memorySpace = -1); |
| |
| /// Simplify the integer set by simplifying the underlying affine expressions by |
| /// flattening and some simple inference. Also, drop any duplicate constraints. |
| /// Returns the simplified integer set. This method runs in time linear in the |
| /// number of constraints. |
| IntegerSet simplifyIntegerSet(IntegerSet set); |
| |
| /// Returns the innermost common loop depth for the set of operations in 'ops'. |
| unsigned getInnermostCommonLoopDepth( |
| ArrayRef<Operation *> ops, |
| SmallVectorImpl<AffineForOp> *surroundingLoops = nullptr); |
| |
| } // end namespace mlir |
| |
| #endif // MLIR_ANALYSIS_UTILS_H |