| //===- Utils.cpp ---- Utilities for affine dialect transformation ---------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements miscellaneous transformation utilities for the Affine |
| // dialect. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "mlir/Dialect/Affine/Utils.h" |
| #include "mlir/Analysis/Utils.h" |
| #include "mlir/Dialect/Affine/IR/AffineOps.h" |
| #include "mlir/Dialect/Affine/IR/AffineValueMap.h" |
| #include "mlir/IR/BlockAndValueMapping.h" |
| #include "mlir/IR/IntegerSet.h" |
| #include "mlir/Transforms/GreedyPatternRewriteDriver.h" |
| #include "mlir/Transforms/LoopUtils.h" |
| |
| using namespace mlir; |
| |
| /// Promotes the `then` or the `else` block of `ifOp` (depending on whether |
| /// `elseBlock` is false or true) into `ifOp`'s containing block, and discards |
| /// the rest of the op. |
| static void promoteIfBlock(AffineIfOp ifOp, bool elseBlock) { |
| if (elseBlock) |
| assert(ifOp.hasElse() && "else block expected"); |
| |
| Block *destBlock = ifOp->getBlock(); |
| Block *srcBlock = elseBlock ? ifOp.getElseBlock() : ifOp.getThenBlock(); |
| destBlock->getOperations().splice( |
| Block::iterator(ifOp), srcBlock->getOperations(), srcBlock->begin(), |
| std::prev(srcBlock->end())); |
| ifOp.erase(); |
| } |
| |
| /// Returns the outermost affine.for/parallel op that the `ifOp` is invariant |
| /// on. The `ifOp` could be hoisted and placed right before such an operation. |
| /// This method assumes that the ifOp has been canonicalized (to be correct and |
| /// effective). |
| static Operation *getOutermostInvariantForOp(AffineIfOp ifOp) { |
| // Walk up the parents past all for op that this conditional is invariant on. |
| auto ifOperands = ifOp.getOperands(); |
| auto *res = ifOp.getOperation(); |
| while (!isa<FuncOp>(res->getParentOp())) { |
| auto *parentOp = res->getParentOp(); |
| if (auto forOp = dyn_cast<AffineForOp>(parentOp)) { |
| if (llvm::is_contained(ifOperands, forOp.getInductionVar())) |
| break; |
| } else if (auto parallelOp = dyn_cast<AffineParallelOp>(parentOp)) { |
| for (auto iv : parallelOp.getIVs()) |
| if (llvm::is_contained(ifOperands, iv)) |
| break; |
| } else if (!isa<AffineIfOp>(parentOp)) { |
| // Won't walk up past anything other than affine.for/if ops. |
| break; |
| } |
| // You can always hoist up past any affine.if ops. |
| res = parentOp; |
| } |
| return res; |
| } |
| |
| /// A helper for the mechanics of mlir::hoistAffineIfOp. Hoists `ifOp` just over |
| /// `hoistOverOp`. Returns the new hoisted op if any hoisting happened, |
| /// otherwise the same `ifOp`. |
| static AffineIfOp hoistAffineIfOp(AffineIfOp ifOp, Operation *hoistOverOp) { |
| // No hoisting to do. |
| if (hoistOverOp == ifOp) |
| return ifOp; |
| |
| // Create the hoisted 'if' first. Then, clone the op we are hoisting over for |
| // the else block. Then drop the else block of the original 'if' in the 'then' |
| // branch while promoting its then block, and analogously drop the 'then' |
| // block of the original 'if' from the 'else' branch while promoting its else |
| // block. |
| BlockAndValueMapping operandMap; |
| OpBuilder b(hoistOverOp); |
| auto hoistedIfOp = b.create<AffineIfOp>(ifOp.getLoc(), ifOp.getIntegerSet(), |
| ifOp.getOperands(), |
| /*elseBlock=*/true); |
| |
| // Create a clone of hoistOverOp to use for the else branch of the hoisted |
| // conditional. The else block may get optimized away if empty. |
| Operation *hoistOverOpClone = nullptr; |
| // We use this unique name to identify/find `ifOp`'s clone in the else |
| // version. |
| StringAttr idForIfOp = b.getStringAttr("__mlir_if_hoisting"); |
| operandMap.clear(); |
| b.setInsertionPointAfter(hoistOverOp); |
| // We'll set an attribute to identify this op in a clone of this sub-tree. |
| ifOp->setAttr(idForIfOp, b.getBoolAttr(true)); |
| hoistOverOpClone = b.clone(*hoistOverOp, operandMap); |
| |
| // Promote the 'then' block of the original affine.if in the then version. |
| promoteIfBlock(ifOp, /*elseBlock=*/false); |
| |
| // Move the then version to the hoisted if op's 'then' block. |
| auto *thenBlock = hoistedIfOp.getThenBlock(); |
| thenBlock->getOperations().splice(thenBlock->begin(), |
| hoistOverOp->getBlock()->getOperations(), |
| Block::iterator(hoistOverOp)); |
| |
| // Find the clone of the original affine.if op in the else version. |
| AffineIfOp ifCloneInElse; |
| hoistOverOpClone->walk([&](AffineIfOp ifClone) { |
| if (!ifClone->getAttr(idForIfOp)) |
| return WalkResult::advance(); |
| ifCloneInElse = ifClone; |
| return WalkResult::interrupt(); |
| }); |
| assert(ifCloneInElse && "if op clone should exist"); |
| // For the else block, promote the else block of the original 'if' if it had |
| // one; otherwise, the op itself is to be erased. |
| if (!ifCloneInElse.hasElse()) |
| ifCloneInElse.erase(); |
| else |
| promoteIfBlock(ifCloneInElse, /*elseBlock=*/true); |
| |
| // Move the else version into the else block of the hoisted if op. |
| auto *elseBlock = hoistedIfOp.getElseBlock(); |
| elseBlock->getOperations().splice( |
| elseBlock->begin(), hoistOverOpClone->getBlock()->getOperations(), |
| Block::iterator(hoistOverOpClone)); |
| |
| return hoistedIfOp; |
| } |
| |
| LogicalResult |
| mlir::affineParallelize(AffineForOp forOp, |
| ArrayRef<LoopReduction> parallelReductions) { |
| // Fail early if there are iter arguments that are not reductions. |
| unsigned numReductions = parallelReductions.size(); |
| if (numReductions != forOp.getNumIterOperands()) |
| return failure(); |
| |
| Location loc = forOp.getLoc(); |
| OpBuilder outsideBuilder(forOp); |
| AffineMap lowerBoundMap = forOp.getLowerBoundMap(); |
| ValueRange lowerBoundOperands = forOp.getLowerBoundOperands(); |
| AffineMap upperBoundMap = forOp.getUpperBoundMap(); |
| ValueRange upperBoundOperands = forOp.getUpperBoundOperands(); |
| |
| // Creating empty 1-D affine.parallel op. |
| auto reducedValues = llvm::to_vector<4>(llvm::map_range( |
| parallelReductions, [](const LoopReduction &red) { return red.value; })); |
| auto reductionKinds = llvm::to_vector<4>(llvm::map_range( |
| parallelReductions, [](const LoopReduction &red) { return red.kind; })); |
| AffineParallelOp newPloop = outsideBuilder.create<AffineParallelOp>( |
| loc, ValueRange(reducedValues).getTypes(), reductionKinds, |
| llvm::makeArrayRef(lowerBoundMap), lowerBoundOperands, |
| llvm::makeArrayRef(upperBoundMap), upperBoundOperands, |
| llvm::makeArrayRef(forOp.getStep())); |
| // Steal the body of the old affine for op. |
| newPloop.region().takeBody(forOp.region()); |
| Operation *yieldOp = &newPloop.getBody()->back(); |
| |
| // Handle the initial values of reductions because the parallel loop always |
| // starts from the neutral value. |
| SmallVector<Value> newResults; |
| newResults.reserve(numReductions); |
| for (unsigned i = 0; i < numReductions; ++i) { |
| Value init = forOp.getIterOperands()[i]; |
| // This works because we are only handling single-op reductions at the |
| // moment. A switch on reduction kind or a mechanism to collect operations |
| // participating in the reduction will be necessary for multi-op reductions. |
| Operation *reductionOp = yieldOp->getOperand(i).getDefiningOp(); |
| assert(reductionOp && "yielded value is expected to be produced by an op"); |
| outsideBuilder.getInsertionBlock()->getOperations().splice( |
| outsideBuilder.getInsertionPoint(), newPloop.getBody()->getOperations(), |
| reductionOp); |
| reductionOp->setOperands({init, newPloop->getResult(i)}); |
| forOp->getResult(i).replaceAllUsesWith(reductionOp->getResult(0)); |
| } |
| |
| // Update the loop terminator to yield reduced values bypassing the reduction |
| // operation itself (now moved outside of the loop) and erase the block |
| // arguments that correspond to reductions. Note that the loop always has one |
| // "main" induction variable whenc coming from a non-parallel for. |
| unsigned numIVs = 1; |
| yieldOp->setOperands(reducedValues); |
| newPloop.getBody()->eraseArguments( |
| llvm::to_vector<4>(llvm::seq<unsigned>(numIVs, numReductions + numIVs))); |
| |
| forOp.erase(); |
| return success(); |
| } |
| |
| // Returns success if any hoisting happened. |
| LogicalResult mlir::hoistAffineIfOp(AffineIfOp ifOp, bool *folded) { |
| // Bail out early if the ifOp returns a result. TODO: Consider how to |
| // properly support this case. |
| if (ifOp.getNumResults() != 0) |
| return failure(); |
| |
| // Apply canonicalization patterns and folding - this is necessary for the |
| // hoisting check to be correct (operands should be composed), and to be more |
| // effective (no unused operands). Since the pattern rewriter's folding is |
| // entangled with application of patterns, we may fold/end up erasing the op, |
| // in which case we return with `folded` being set. |
| RewritePatternSet patterns(ifOp.getContext()); |
| AffineIfOp::getCanonicalizationPatterns(patterns, ifOp.getContext()); |
| bool erased; |
| FrozenRewritePatternSet frozenPatterns(std::move(patterns)); |
| (void)applyOpPatternsAndFold(ifOp, frozenPatterns, &erased); |
| if (erased) { |
| if (folded) |
| *folded = true; |
| return failure(); |
| } |
| if (folded) |
| *folded = false; |
| |
| // The folding above should have ensured this, but the affine.if's |
| // canonicalization is missing composition of affine.applys into it. |
| assert(llvm::all_of(ifOp.getOperands(), |
| [](Value v) { |
| return isTopLevelValue(v) || isForInductionVar(v); |
| }) && |
| "operands not composed"); |
| |
| // We are going hoist as high as possible. |
| // TODO: this could be customized in the future. |
| auto *hoistOverOp = getOutermostInvariantForOp(ifOp); |
| |
| AffineIfOp hoistedIfOp = ::hoistAffineIfOp(ifOp, hoistOverOp); |
| // Nothing to hoist over. |
| if (hoistedIfOp == ifOp) |
| return failure(); |
| |
| // Canonicalize to remove dead else blocks (happens whenever an 'if' moves up |
| // a sequence of affine.fors that are all perfectly nested). |
| (void)applyPatternsAndFoldGreedily( |
| hoistedIfOp->getParentWithTrait<OpTrait::IsIsolatedFromAbove>(), |
| frozenPatterns); |
| |
| return success(); |
| } |
| |
| // Return the min expr after replacing the given dim. |
| AffineExpr mlir::substWithMin(AffineExpr e, AffineExpr dim, AffineExpr min, |
| AffineExpr max, bool positivePath) { |
| if (e == dim) |
| return positivePath ? min : max; |
| if (auto bin = e.dyn_cast<AffineBinaryOpExpr>()) { |
| AffineExpr lhs = bin.getLHS(); |
| AffineExpr rhs = bin.getRHS(); |
| if (bin.getKind() == mlir::AffineExprKind::Add) |
| return substWithMin(lhs, dim, min, max, positivePath) + |
| substWithMin(rhs, dim, min, max, positivePath); |
| |
| auto c1 = bin.getLHS().dyn_cast<AffineConstantExpr>(); |
| auto c2 = bin.getRHS().dyn_cast<AffineConstantExpr>(); |
| if (c1 && c1.getValue() < 0) |
| return getAffineBinaryOpExpr( |
| bin.getKind(), c1, substWithMin(rhs, dim, min, max, !positivePath)); |
| if (c2 && c2.getValue() < 0) |
| return getAffineBinaryOpExpr( |
| bin.getKind(), substWithMin(lhs, dim, min, max, !positivePath), c2); |
| return getAffineBinaryOpExpr( |
| bin.getKind(), substWithMin(lhs, dim, min, max, positivePath), |
| substWithMin(rhs, dim, min, max, positivePath)); |
| } |
| return e; |
| } |
| |
| void mlir::normalizeAffineParallel(AffineParallelOp op) { |
| // Loops with min/max in bounds are not normalized at the moment. |
| if (op.hasMinMaxBounds()) |
| return; |
| |
| AffineMap lbMap = op.lowerBoundsMap(); |
| SmallVector<int64_t, 8> steps = op.getSteps(); |
| // No need to do any work if the parallel op is already normalized. |
| bool isAlreadyNormalized = |
| llvm::all_of(llvm::zip(steps, lbMap.getResults()), [](auto tuple) { |
| int64_t step = std::get<0>(tuple); |
| auto lbExpr = |
| std::get<1>(tuple).template dyn_cast<AffineConstantExpr>(); |
| return lbExpr && lbExpr.getValue() == 0 && step == 1; |
| }); |
| if (isAlreadyNormalized) |
| return; |
| |
| AffineValueMap ranges; |
| AffineValueMap::difference(op.getUpperBoundsValueMap(), |
| op.getLowerBoundsValueMap(), &ranges); |
| auto builder = OpBuilder::atBlockBegin(op.getBody()); |
| auto zeroExpr = builder.getAffineConstantExpr(0); |
| SmallVector<AffineExpr, 8> lbExprs; |
| SmallVector<AffineExpr, 8> ubExprs; |
| for (unsigned i = 0, e = steps.size(); i < e; ++i) { |
| int64_t step = steps[i]; |
| |
| // Adjust the lower bound to be 0. |
| lbExprs.push_back(zeroExpr); |
| |
| // Adjust the upper bound expression: 'range / step'. |
| AffineExpr ubExpr = ranges.getResult(i).ceilDiv(step); |
| ubExprs.push_back(ubExpr); |
| |
| // Adjust the corresponding IV: 'lb + i * step'. |
| BlockArgument iv = op.getBody()->getArgument(i); |
| AffineExpr lbExpr = lbMap.getResult(i); |
| unsigned nDims = lbMap.getNumDims(); |
| auto expr = lbExpr + builder.getAffineDimExpr(nDims) * step; |
| auto map = AffineMap::get(/*dimCount=*/nDims + 1, |
| /*symbolCount=*/lbMap.getNumSymbols(), expr); |
| |
| // Use an 'affine.apply' op that will be simplified later in subsequent |
| // canonicalizations. |
| OperandRange lbOperands = op.getLowerBoundsOperands(); |
| OperandRange dimOperands = lbOperands.take_front(nDims); |
| OperandRange symbolOperands = lbOperands.drop_front(nDims); |
| SmallVector<Value, 8> applyOperands{dimOperands}; |
| applyOperands.push_back(iv); |
| applyOperands.append(symbolOperands.begin(), symbolOperands.end()); |
| auto apply = builder.create<AffineApplyOp>(op.getLoc(), map, applyOperands); |
| iv.replaceAllUsesExcept(apply, apply); |
| } |
| |
| SmallVector<int64_t, 8> newSteps(op.getNumDims(), 1); |
| op.setSteps(newSteps); |
| auto newLowerMap = AffineMap::get( |
| /*dimCount=*/0, /*symbolCount=*/0, lbExprs, op.getContext()); |
| op.setLowerBounds({}, newLowerMap); |
| auto newUpperMap = AffineMap::get(ranges.getNumDims(), ranges.getNumSymbols(), |
| ubExprs, op.getContext()); |
| op.setUpperBounds(ranges.getOperands(), newUpperMap); |
| } |
| |
| /// Normalizes affine.for ops. If the affine.for op has only a single iteration |
| /// only then it is simply promoted, else it is normalized in the traditional |
| /// way, by converting the lower bound to zero and loop step to one. The upper |
| /// bound is set to the trip count of the loop. For now, original loops must |
| /// have lower bound with a single result only. There is no such restriction on |
| /// upper bounds. |
| void mlir::normalizeAffineFor(AffineForOp op) { |
| if (succeeded(promoteIfSingleIteration(op))) |
| return; |
| |
| // Check if the forop is already normalized. |
| if (op.hasConstantLowerBound() && (op.getConstantLowerBound() == 0) && |
| (op.getStep() == 1)) |
| return; |
| |
| // Check if the lower bound has a single result only. Loops with a max lower |
| // bound can't be normalized without additional support like |
| // affine.execute_region's. If the lower bound does not have a single result |
| // then skip this op. |
| if (op.getLowerBoundMap().getNumResults() != 1) |
| return; |
| |
| Location loc = op.getLoc(); |
| OpBuilder opBuilder(op); |
| int64_t origLoopStep = op.getStep(); |
| |
| // Calculate upperBound for normalized loop. |
| SmallVector<Value, 4> ubOperands; |
| AffineBound lb = op.getLowerBound(); |
| AffineBound ub = op.getUpperBound(); |
| ubOperands.reserve(ub.getNumOperands() + lb.getNumOperands()); |
| AffineMap origLbMap = lb.getMap(); |
| AffineMap origUbMap = ub.getMap(); |
| |
| // Add dimension operands from upper/lower bound. |
| for (unsigned j = 0, e = origUbMap.getNumDims(); j < e; ++j) |
| ubOperands.push_back(ub.getOperand(j)); |
| for (unsigned j = 0, e = origLbMap.getNumDims(); j < e; ++j) |
| ubOperands.push_back(lb.getOperand(j)); |
| |
| // Add symbol operands from upper/lower bound. |
| for (unsigned j = 0, e = origUbMap.getNumSymbols(); j < e; ++j) |
| ubOperands.push_back(ub.getOperand(origUbMap.getNumDims() + j)); |
| for (unsigned j = 0, e = origLbMap.getNumSymbols(); j < e; ++j) |
| ubOperands.push_back(lb.getOperand(origLbMap.getNumDims() + j)); |
| |
| // Add original result expressions from lower/upper bound map. |
| SmallVector<AffineExpr, 1> origLbExprs(origLbMap.getResults().begin(), |
| origLbMap.getResults().end()); |
| SmallVector<AffineExpr, 2> origUbExprs(origUbMap.getResults().begin(), |
| origUbMap.getResults().end()); |
| SmallVector<AffineExpr, 4> newUbExprs; |
| |
| // The original upperBound can have more than one result. For the new |
| // upperBound of this loop, take difference of all possible combinations of |
| // the ub results and lb result and ceildiv with the loop step. For e.g., |
| // |
| // affine.for %i1 = 0 to min affine_map<(d0)[] -> (d0 + 32, 1024)>(%i0) |
| // will have an upperBound map as, |
| // affine_map<(d0)[] -> (((d0 + 32) - 0) ceildiv 1, (1024 - 0) ceildiv |
| // 1)>(%i0) |
| // |
| // Insert all combinations of upper/lower bound results. |
| for (unsigned i = 0, e = origUbExprs.size(); i < e; ++i) { |
| newUbExprs.push_back( |
| (origUbExprs[i] - origLbExprs[0]).ceilDiv(origLoopStep)); |
| } |
| |
| // Construct newUbMap. |
| AffineMap newUbMap = |
| AffineMap::get(origLbMap.getNumDims() + origUbMap.getNumDims(), |
| origLbMap.getNumSymbols() + origUbMap.getNumSymbols(), |
| newUbExprs, opBuilder.getContext()); |
| |
| // Normalize the loop. |
| op.setUpperBound(ubOperands, newUbMap); |
| op.setLowerBound({}, opBuilder.getConstantAffineMap(0)); |
| op.setStep(1); |
| |
| // Calculate the Value of new loopIV. Create affine.apply for the value of |
| // the loopIV in normalized loop. |
| opBuilder.setInsertionPointToStart(op.getBody()); |
| SmallVector<Value, 4> lbOperands(lb.getOperands().begin(), |
| lb.getOperands().begin() + |
| lb.getMap().getNumDims()); |
| // Add an extra dim operand for loopIV. |
| lbOperands.push_back(op.getInductionVar()); |
| // Add symbol operands from lower bound. |
| for (unsigned j = 0, e = origLbMap.getNumSymbols(); j < e; ++j) |
| lbOperands.push_back(lb.getOperand(origLbMap.getNumDims() + j)); |
| |
| AffineExpr origIVExpr = opBuilder.getAffineDimExpr(lb.getMap().getNumDims()); |
| AffineExpr newIVExpr = origIVExpr * origLoopStep + origLbMap.getResult(0); |
| AffineMap ivMap = AffineMap::get(origLbMap.getNumDims() + 1, |
| origLbMap.getNumSymbols(), newIVExpr); |
| Operation *newIV = opBuilder.create<AffineApplyOp>(loc, ivMap, lbOperands); |
| op.getInductionVar().replaceAllUsesExcept(newIV->getResult(0), newIV); |
| } |