| //===- Utils.h - Affine dialect utilities -----------------------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This header file declares a set of utilities for the affine dialect ops. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef MLIR_DIALECT_AFFINE_UTILS_H |
| #define MLIR_DIALECT_AFFINE_UTILS_H |
| |
| #include "mlir/Analysis/AffineAnalysis.h" |
| #include "mlir/IR/AffineExpr.h" |
| #include "mlir/Support/LLVM.h" |
| #include "llvm/ADT/DenseMap.h" |
| #include "llvm/ADT/SmallVector.h" |
| |
| namespace mlir { |
| |
| class AffineForOp; |
| class AffineIfOp; |
| class AffineParallelOp; |
| struct LogicalResult; |
| struct LoopReduction; |
| class Operation; |
| |
| using ReductionLoopMap = DenseMap<Operation *, SmallVector<LoopReduction, 2>>; |
| |
| /// Replaces a parallel affine.for op with a 1-d affine.parallel op. `forOp`'s |
| /// body is taken by the affine.parallel op and the former is erased. |
| /// (mlir::isLoopParallel can be used to detect a parallel affine.for op.) The |
| /// reductions specified in `parallelReductions` are also parallelized. |
| /// Parallelization will fail in the presence of loop iteration arguments that |
| /// are not listed in `parallelReductions`. |
| LogicalResult |
| affineParallelize(AffineForOp forOp, |
| ArrayRef<LoopReduction> parallelReductions = {}); |
| |
| /// Hoists out affine.if/else to as high as possible, i.e., past all invariant |
| /// affine.fors/parallel's. Returns success if any hoisting happened; folded` is |
| /// set to true if the op was folded or erased. This hoisting could lead to |
| /// significant code expansion in some cases. |
| LogicalResult hoistAffineIfOp(AffineIfOp ifOp, bool *folded = nullptr); |
| |
| /// Holds parameters to perform n-D vectorization on a single loop nest. |
| /// For example, for the following loop nest: |
| /// |
| /// func @vec2d(%in: memref<64x128x512xf32>, %out: memref<64x128x512xf32>) { |
| /// affine.for %i0 = 0 to 64 { |
| /// affine.for %i1 = 0 to 128 { |
| /// affine.for %i2 = 0 to 512 { |
| /// %ld = affine.load %in[%i0, %i1, %i2] : memref<64x128x512xf32> |
| /// affine.store %ld, %out[%i0, %i1, %i2] : memref<64x128x512xf32> |
| /// } |
| /// } |
| /// } |
| /// return |
| /// } |
| /// |
| /// and VectorizationStrategy = 'vectorSizes = {8, 4}', 'loopToVectorDim = |
| /// {{i1->0}, {i2->1}}', SuperVectorizer will generate: |
| /// |
| /// func @vec2d(%arg0: memref<64x128x512xf32>, %arg1: memref<64x128x512xf32>) { |
| /// affine.for %arg2 = 0 to 64 { |
| /// affine.for %arg3 = 0 to 128 step 8 { |
| /// affine.for %arg4 = 0 to 512 step 4 { |
| /// %cst = arith.constant 0.000000e+00 : f32 |
| /// %0 = vector.transfer_read %arg0[%arg2, %arg3, %arg4], %cst : ... |
| /// vector.transfer_write %0, %arg1[%arg2, %arg3, %arg4] : ... |
| /// } |
| /// } |
| /// } |
| /// return |
| /// } |
| // TODO: Hoist to a VectorizationStrategy.cpp when appropriate. |
| struct VectorizationStrategy { |
| // Vectorization factors to apply to each target vector dimension. |
| // Each factor will be applied to a different loop. |
| SmallVector<int64_t, 8> vectorSizes; |
| // Maps each AffineForOp vectorization candidate with its vector dimension. |
| // The candidate will be vectorized using the vectorization factor in |
| // 'vectorSizes' for that dimension. |
| DenseMap<Operation *, unsigned> loopToVectorDim; |
| // Maps loops that implement vectorizable reductions to the corresponding |
| // reduction descriptors. |
| ReductionLoopMap reductionLoops; |
| }; |
| |
| /// Vectorizes affine loops in 'loops' using the n-D vectorization factors in |
| /// 'vectorSizes'. By default, each vectorization factor is applied |
| /// inner-to-outer to the loops of each loop nest. 'fastestVaryingPattern' can |
| /// be optionally used to provide a different loop vectorization order. |
| /// If `reductionLoops` is not empty, the given reduction loops may be |
| /// vectorized along the reduction dimension. |
| /// TODO: Vectorizing reductions is supported only for 1-D vectorization. |
| void vectorizeAffineLoops( |
| Operation *parentOp, |
| llvm::DenseSet<Operation *, DenseMapInfo<Operation *>> &loops, |
| ArrayRef<int64_t> vectorSizes, ArrayRef<int64_t> fastestVaryingPattern, |
| const ReductionLoopMap &reductionLoops = ReductionLoopMap()); |
| |
| /// External utility to vectorize affine loops from a single loop nest using an |
| /// n-D vectorization strategy (see doc in VectorizationStrategy definition). |
| /// Loops are provided in a 2D vector container. The first dimension represents |
| /// the nesting level relative to the loops to be vectorized. The second |
| /// dimension contains the loops. This means that: |
| /// a) every loop in 'loops[i]' must have a parent loop in 'loops[i-1]', |
| /// b) a loop in 'loops[i]' may or may not have a child loop in 'loops[i+1]'. |
| /// |
| /// For example, for the following loop nest: |
| /// |
| /// func @vec2d(%in0: memref<64x128x512xf32>, %in1: memref<64x128x128xf32>, |
| /// %out0: memref<64x128x512xf32>, |
| /// %out1: memref<64x128x128xf32>) { |
| /// affine.for %i0 = 0 to 64 { |
| /// affine.for %i1 = 0 to 128 { |
| /// affine.for %i2 = 0 to 512 { |
| /// %ld = affine.load %in0[%i0, %i1, %i2] : memref<64x128x512xf32> |
| /// affine.store %ld, %out0[%i0, %i1, %i2] : memref<64x128x512xf32> |
| /// } |
| /// affine.for %i3 = 0 to 128 { |
| /// %ld = affine.load %in1[%i0, %i1, %i3] : memref<64x128x128xf32> |
| /// affine.store %ld, %out1[%i0, %i1, %i3] : memref<64x128x128xf32> |
| /// } |
| /// } |
| /// } |
| /// return |
| /// } |
| /// |
| /// loops = {{%i0}, {%i2, %i3}}, to vectorize the outermost and the two |
| /// innermost loops; |
| /// loops = {{%i1}, {%i2, %i3}}, to vectorize the middle and the two innermost |
| /// loops; |
| /// loops = {{%i2}}, to vectorize only the first innermost loop; |
| /// loops = {{%i3}}, to vectorize only the second innermost loop; |
| /// loops = {{%i1}}, to vectorize only the middle loop. |
| LogicalResult |
| vectorizeAffineLoopNest(std::vector<SmallVector<AffineForOp, 2>> &loops, |
| const VectorizationStrategy &strategy); |
| |
| /// Normalize a affine.parallel op so that lower bounds are 0 and steps are 1. |
| /// As currently implemented, this transformation cannot fail and will return |
| /// early if the op is already in a normalized form. |
| void normalizeAffineParallel(AffineParallelOp op); |
| |
| /// Normalize an affine.for op. If the affine.for op has only a single iteration |
| /// only then it is simply promoted, else it is normalized in the traditional |
| /// way, by converting the lower bound to zero and loop step to one. The upper |
| /// bound is set to the trip count of the loop. For now, original loops must |
| /// have lower bound with a single result only. There is no such restriction on |
| /// upper bounds. |
| void normalizeAffineFor(AffineForOp op); |
| |
| /// Traverse `e` and return an AffineExpr where all occurrences of `dim` have |
| /// been replaced by either: |
| /// - `min` if `positivePath` is true when we reach an occurrence of `dim` |
| /// - `max` if `positivePath` is true when we reach an occurrence of `dim` |
| /// `positivePath` is negated each time we hit a multiplicative or divisive |
| /// binary op with a constant negative coefficient. |
| AffineExpr substWithMin(AffineExpr e, AffineExpr dim, AffineExpr min, |
| AffineExpr max, bool positivePath = true); |
| |
| } // namespace mlir |
| |
| #endif // MLIR_DIALECT_AFFINE_UTILS_H |