| //===- StackArrays.cpp ----------------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "flang/Optimizer/Builder/FIRBuilder.h" |
| #include "flang/Optimizer/Builder/LowLevelIntrinsics.h" |
| #include "flang/Optimizer/Dialect/FIRAttr.h" |
| #include "flang/Optimizer/Dialect/FIRDialect.h" |
| #include "flang/Optimizer/Dialect/FIROps.h" |
| #include "flang/Optimizer/Dialect/FIRType.h" |
| #include "flang/Optimizer/Dialect/Support/FIRContext.h" |
| #include "flang/Optimizer/Transforms/Passes.h" |
| #include "mlir/Analysis/DataFlow/ConstantPropagationAnalysis.h" |
| #include "mlir/Analysis/DataFlow/DeadCodeAnalysis.h" |
| #include "mlir/Analysis/DataFlow/DenseAnalysis.h" |
| #include "mlir/Analysis/DataFlowFramework.h" |
| #include "mlir/Dialect/Func/IR/FuncOps.h" |
| #include "mlir/Dialect/OpenMP/OpenMPDialect.h" |
| #include "mlir/IR/Builders.h" |
| #include "mlir/IR/Diagnostics.h" |
| #include "mlir/IR/Value.h" |
| #include "mlir/Interfaces/LoopLikeInterface.h" |
| #include "mlir/Pass/Pass.h" |
| #include "mlir/Support/LogicalResult.h" |
| #include "mlir/Transforms/GreedyPatternRewriteDriver.h" |
| #include "mlir/Transforms/Passes.h" |
| #include "llvm/ADT/DenseMap.h" |
| #include "llvm/ADT/DenseSet.h" |
| #include "llvm/ADT/PointerUnion.h" |
| #include "llvm/Support/Casting.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include <optional> |
| |
| namespace fir { |
| #define GEN_PASS_DEF_STACKARRAYS |
| #include "flang/Optimizer/Transforms/Passes.h.inc" |
| } // namespace fir |
| |
| #define DEBUG_TYPE "stack-arrays" |
| |
| static llvm::cl::opt<std::size_t> maxAllocsPerFunc( |
| "stack-arrays-max-allocs", |
| llvm::cl::desc("The maximum number of heap allocations to consider in one " |
| "function before skipping (to save compilation time). Set " |
| "to 0 for no limit."), |
| llvm::cl::init(1000), llvm::cl::Hidden); |
| |
| namespace { |
| |
| /// The state of an SSA value at each program point |
| enum class AllocationState { |
| /// This means that the allocation state of a variable cannot be determined |
| /// at this program point, e.g. because one route through a conditional freed |
| /// the variable and the other route didn't. |
| /// This asserts a known-unknown: different from the unknown-unknown of having |
| /// no AllocationState stored for a particular SSA value |
| Unknown, |
| /// Means this SSA value was allocated on the heap in this function and has |
| /// now been freed |
| Freed, |
| /// Means this SSA value was allocated on the heap in this function and is a |
| /// candidate for moving to the stack |
| Allocated, |
| }; |
| |
| /// Stores where an alloca should be inserted. If the PointerUnion is an |
| /// Operation the alloca should be inserted /after/ the operation. If it is a |
| /// block, the alloca can be placed anywhere in that block. |
| class InsertionPoint { |
| llvm::PointerUnion<mlir::Operation *, mlir::Block *> location; |
| bool saveRestoreStack; |
| |
| /// Get contained pointer type or nullptr |
| template <class T> |
| T *tryGetPtr() const { |
| if (location.is<T *>()) |
| return location.get<T *>(); |
| return nullptr; |
| } |
| |
| public: |
| template <class T> |
| InsertionPoint(T *ptr, bool saveRestoreStack = false) |
| : location(ptr), saveRestoreStack{saveRestoreStack} {} |
| InsertionPoint(std::nullptr_t null) |
| : location(null), saveRestoreStack{false} {} |
| |
| /// Get contained operation, or nullptr |
| mlir::Operation *tryGetOperation() const { |
| return tryGetPtr<mlir::Operation>(); |
| } |
| |
| /// Get contained block, or nullptr |
| mlir::Block *tryGetBlock() const { return tryGetPtr<mlir::Block>(); } |
| |
| /// Get whether the stack should be saved/restored. If yes, an llvm.stacksave |
| /// intrinsic should be added before the alloca, and an llvm.stackrestore |
| /// intrinsic should be added where the freemem is |
| bool shouldSaveRestoreStack() const { return saveRestoreStack; } |
| |
| operator bool() const { return tryGetOperation() || tryGetBlock(); } |
| |
| bool operator==(const InsertionPoint &rhs) const { |
| return (location == rhs.location) && |
| (saveRestoreStack == rhs.saveRestoreStack); |
| } |
| |
| bool operator!=(const InsertionPoint &rhs) const { return !(*this == rhs); } |
| }; |
| |
| /// Maps SSA values to their AllocationState at a particular program point. |
| /// Also caches the insertion points for the new alloca operations |
| class LatticePoint : public mlir::dataflow::AbstractDenseLattice { |
| // Maps all values we are interested in to states |
| llvm::SmallDenseMap<mlir::Value, AllocationState, 1> stateMap; |
| |
| public: |
| MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(LatticePoint) |
| using AbstractDenseLattice::AbstractDenseLattice; |
| |
| bool operator==(const LatticePoint &rhs) const { |
| return stateMap == rhs.stateMap; |
| } |
| |
| /// Join the lattice accross control-flow edges |
| mlir::ChangeResult join(const AbstractDenseLattice &lattice) override; |
| |
| void print(llvm::raw_ostream &os) const override; |
| |
| /// Clear all modifications |
| mlir::ChangeResult reset(); |
| |
| /// Set the state of an SSA value |
| mlir::ChangeResult set(mlir::Value value, AllocationState state); |
| |
| /// Get fir.allocmem ops which were allocated in this function and always |
| /// freed before the function returns, plus whre to insert replacement |
| /// fir.alloca ops |
| void appendFreedValues(llvm::DenseSet<mlir::Value> &out) const; |
| |
| std::optional<AllocationState> get(mlir::Value val) const; |
| }; |
| |
| class AllocationAnalysis |
| : public mlir::dataflow::DenseForwardDataFlowAnalysis<LatticePoint> { |
| public: |
| using DenseForwardDataFlowAnalysis::DenseForwardDataFlowAnalysis; |
| |
| void visitOperation(mlir::Operation *op, const LatticePoint &before, |
| LatticePoint *after) override; |
| |
| /// At an entry point, the last modifications of all memory resources are |
| /// yet to be determined |
| void setToEntryState(LatticePoint *lattice) override; |
| |
| protected: |
| /// Visit control flow operations and decide whether to call visitOperation |
| /// to apply the transfer function |
| void processOperation(mlir::Operation *op) override; |
| }; |
| |
| /// Drives analysis to find candidate fir.allocmem operations which could be |
| /// moved to the stack. Intended to be used with mlir::Pass::getAnalysis |
| class StackArraysAnalysisWrapper { |
| public: |
| MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(StackArraysAnalysisWrapper) |
| |
| // Maps fir.allocmem -> place to insert alloca |
| using AllocMemMap = llvm::DenseMap<mlir::Operation *, InsertionPoint>; |
| |
| StackArraysAnalysisWrapper(mlir::Operation *op) {} |
| |
| // returns nullptr if analysis failed |
| const AllocMemMap *getCandidateOps(mlir::Operation *func); |
| |
| private: |
| llvm::DenseMap<mlir::Operation *, AllocMemMap> funcMaps; |
| |
| mlir::LogicalResult analyseFunction(mlir::Operation *func); |
| }; |
| |
| /// Converts a fir.allocmem to a fir.alloca |
| class AllocMemConversion : public mlir::OpRewritePattern<fir::AllocMemOp> { |
| public: |
| explicit AllocMemConversion( |
| mlir::MLIRContext *ctx, |
| const StackArraysAnalysisWrapper::AllocMemMap &candidateOps) |
| : OpRewritePattern(ctx), candidateOps{candidateOps} {} |
| |
| mlir::LogicalResult |
| matchAndRewrite(fir::AllocMemOp allocmem, |
| mlir::PatternRewriter &rewriter) const override; |
| |
| /// Determine where to insert the alloca operation. The returned value should |
| /// be checked to see if it is inside a loop |
| static InsertionPoint findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc); |
| |
| private: |
| /// Handle to the DFA (already run) |
| const StackArraysAnalysisWrapper::AllocMemMap &candidateOps; |
| |
| /// If we failed to find an insertion point not inside a loop, see if it would |
| /// be safe to use an llvm.stacksave/llvm.stackrestore inside the loop |
| static InsertionPoint findAllocaLoopInsertionPoint(fir::AllocMemOp &oldAlloc); |
| |
| /// Returns the alloca if it was successfully inserted, otherwise {} |
| std::optional<fir::AllocaOp> |
| insertAlloca(fir::AllocMemOp &oldAlloc, |
| mlir::PatternRewriter &rewriter) const; |
| |
| /// Inserts a stacksave before oldAlloc and a stackrestore after each freemem |
| void insertStackSaveRestore(fir::AllocMemOp &oldAlloc, |
| mlir::PatternRewriter &rewriter) const; |
| }; |
| |
| class StackArraysPass : public fir::impl::StackArraysBase<StackArraysPass> { |
| public: |
| StackArraysPass() = default; |
| StackArraysPass(const StackArraysPass &pass); |
| |
| llvm::StringRef getDescription() const override; |
| |
| void runOnOperation() override; |
| void runOnFunc(mlir::Operation *func); |
| |
| private: |
| Statistic runCount{this, "stackArraysRunCount", |
| "Number of heap allocations moved to the stack"}; |
| }; |
| |
| } // namespace |
| |
| static void print(llvm::raw_ostream &os, AllocationState state) { |
| switch (state) { |
| case AllocationState::Unknown: |
| os << "Unknown"; |
| break; |
| case AllocationState::Freed: |
| os << "Freed"; |
| break; |
| case AllocationState::Allocated: |
| os << "Allocated"; |
| break; |
| } |
| } |
| |
| /// Join two AllocationStates for the same value coming from different CFG |
| /// blocks |
| static AllocationState join(AllocationState lhs, AllocationState rhs) { |
| // | Allocated | Freed | Unknown |
| // ========= | ========= | ========= | ========= |
| // Allocated | Allocated | Unknown | Unknown |
| // Freed | Unknown | Freed | Unknown |
| // Unknown | Unknown | Unknown | Unknown |
| if (lhs == rhs) |
| return lhs; |
| return AllocationState::Unknown; |
| } |
| |
| mlir::ChangeResult LatticePoint::join(const AbstractDenseLattice &lattice) { |
| const auto &rhs = static_cast<const LatticePoint &>(lattice); |
| mlir::ChangeResult changed = mlir::ChangeResult::NoChange; |
| |
| // add everything from rhs to map, handling cases where values are in both |
| for (const auto &[value, rhsState] : rhs.stateMap) { |
| auto it = stateMap.find(value); |
| if (it != stateMap.end()) { |
| // value is present in both maps |
| AllocationState myState = it->second; |
| AllocationState newState = ::join(myState, rhsState); |
| if (newState != myState) { |
| changed = mlir::ChangeResult::Change; |
| it->getSecond() = newState; |
| } |
| } else { |
| // value not present in current map: add it |
| stateMap.insert({value, rhsState}); |
| changed = mlir::ChangeResult::Change; |
| } |
| } |
| |
| return changed; |
| } |
| |
| void LatticePoint::print(llvm::raw_ostream &os) const { |
| for (const auto &[value, state] : stateMap) { |
| os << value << ": "; |
| ::print(os, state); |
| } |
| } |
| |
| mlir::ChangeResult LatticePoint::reset() { |
| if (stateMap.empty()) |
| return mlir::ChangeResult::NoChange; |
| stateMap.clear(); |
| return mlir::ChangeResult::Change; |
| } |
| |
| mlir::ChangeResult LatticePoint::set(mlir::Value value, AllocationState state) { |
| if (stateMap.count(value)) { |
| // already in map |
| AllocationState &oldState = stateMap[value]; |
| if (oldState != state) { |
| stateMap[value] = state; |
| return mlir::ChangeResult::Change; |
| } |
| return mlir::ChangeResult::NoChange; |
| } |
| stateMap.insert({value, state}); |
| return mlir::ChangeResult::Change; |
| } |
| |
| /// Get values which were allocated in this function and always freed before |
| /// the function returns |
| void LatticePoint::appendFreedValues(llvm::DenseSet<mlir::Value> &out) const { |
| for (auto &[value, state] : stateMap) { |
| if (state == AllocationState::Freed) |
| out.insert(value); |
| } |
| } |
| |
| std::optional<AllocationState> LatticePoint::get(mlir::Value val) const { |
| auto it = stateMap.find(val); |
| if (it == stateMap.end()) |
| return {}; |
| return it->second; |
| } |
| |
| void AllocationAnalysis::visitOperation(mlir::Operation *op, |
| const LatticePoint &before, |
| LatticePoint *after) { |
| LLVM_DEBUG(llvm::dbgs() << "StackArrays: Visiting operation: " << *op |
| << "\n"); |
| LLVM_DEBUG(llvm::dbgs() << "--Lattice in: " << before << "\n"); |
| |
| // propagate before -> after |
| mlir::ChangeResult changed = after->join(before); |
| |
| if (auto allocmem = mlir::dyn_cast<fir::AllocMemOp>(op)) { |
| assert(op->getNumResults() == 1 && "fir.allocmem has one result"); |
| auto attr = op->getAttrOfType<fir::MustBeHeapAttr>( |
| fir::MustBeHeapAttr::getAttrName()); |
| if (attr && attr.getValue()) { |
| LLVM_DEBUG(llvm::dbgs() << "--Found fir.must_be_heap: skipping\n"); |
| // skip allocation marked not to be moved |
| return; |
| } |
| |
| auto retTy = allocmem.getAllocatedType(); |
| if (!mlir::isa<fir::SequenceType>(retTy)) { |
| LLVM_DEBUG(llvm::dbgs() |
| << "--Allocation is not for an array: skipping\n"); |
| return; |
| } |
| |
| mlir::Value result = op->getResult(0); |
| changed |= after->set(result, AllocationState::Allocated); |
| } else if (mlir::isa<fir::FreeMemOp>(op)) { |
| assert(op->getNumOperands() == 1 && "fir.freemem has one operand"); |
| mlir::Value operand = op->getOperand(0); |
| std::optional<AllocationState> operandState = before.get(operand); |
| if (operandState && *operandState == AllocationState::Allocated) { |
| // don't tag things not allocated in this function as freed, so that we |
| // don't think they are candidates for moving to the stack |
| changed |= after->set(operand, AllocationState::Freed); |
| } |
| } else if (mlir::isa<fir::ResultOp>(op)) { |
| mlir::Operation *parent = op->getParentOp(); |
| LatticePoint *parentLattice = getLattice(parent); |
| assert(parentLattice); |
| mlir::ChangeResult parentChanged = parentLattice->join(*after); |
| propagateIfChanged(parentLattice, parentChanged); |
| } |
| |
| // we pass lattices straight through fir.call because called functions should |
| // not deallocate flang-generated array temporaries |
| |
| LLVM_DEBUG(llvm::dbgs() << "--Lattice out: " << *after << "\n"); |
| propagateIfChanged(after, changed); |
| } |
| |
| void AllocationAnalysis::setToEntryState(LatticePoint *lattice) { |
| propagateIfChanged(lattice, lattice->reset()); |
| } |
| |
| /// Mostly a copy of AbstractDenseLattice::processOperation - the difference |
| /// being that call operations are passed through to the transfer function |
| void AllocationAnalysis::processOperation(mlir::Operation *op) { |
| // If the containing block is not executable, bail out. |
| if (!getOrCreateFor<mlir::dataflow::Executable>(op, op->getBlock())->isLive()) |
| return; |
| |
| // Get the dense lattice to update |
| mlir::dataflow::AbstractDenseLattice *after = getLattice(op); |
| |
| // If this op implements region control-flow, then control-flow dictates its |
| // transfer function. |
| if (auto branch = mlir::dyn_cast<mlir::RegionBranchOpInterface>(op)) |
| return visitRegionBranchOperation(op, branch, after); |
| |
| // pass call operations through to the transfer function |
| |
| // Get the dense state before the execution of the op. |
| const mlir::dataflow::AbstractDenseLattice *before; |
| if (mlir::Operation *prev = op->getPrevNode()) |
| before = getLatticeFor(op, prev); |
| else |
| before = getLatticeFor(op, op->getBlock()); |
| |
| /// Invoke the operation transfer function |
| visitOperationImpl(op, *before, after); |
| } |
| |
| mlir::LogicalResult |
| StackArraysAnalysisWrapper::analyseFunction(mlir::Operation *func) { |
| assert(mlir::isa<mlir::func::FuncOp>(func)); |
| size_t nAllocs = 0; |
| func->walk([&nAllocs](fir::AllocMemOp) { nAllocs++; }); |
| // don't bother with the analysis if there are no heap allocations |
| if (nAllocs == 0) |
| return mlir::success(); |
| if ((maxAllocsPerFunc != 0) && (nAllocs > maxAllocsPerFunc)) { |
| LLVM_DEBUG(llvm::dbgs() << "Skipping stack arrays for function with " |
| << nAllocs << " heap allocations"); |
| return mlir::success(); |
| } |
| |
| mlir::DataFlowSolver solver; |
| // constant propagation is required for dead code analysis, dead code analysis |
| // is required to mark blocks live (required for mlir dense dfa) |
| solver.load<mlir::dataflow::SparseConstantPropagation>(); |
| solver.load<mlir::dataflow::DeadCodeAnalysis>(); |
| |
| auto [it, inserted] = funcMaps.try_emplace(func); |
| AllocMemMap &candidateOps = it->second; |
| |
| solver.load<AllocationAnalysis>(); |
| if (failed(solver.initializeAndRun(func))) { |
| llvm::errs() << "DataFlowSolver failed!"; |
| return mlir::failure(); |
| } |
| |
| LatticePoint point{func}; |
| auto joinOperationLattice = [&](mlir::Operation *op) { |
| const LatticePoint *lattice = solver.lookupState<LatticePoint>(op); |
| // there will be no lattice for an unreachable block |
| if (lattice) |
| (void)point.join(*lattice); |
| }; |
| func->walk([&](mlir::func::ReturnOp child) { joinOperationLattice(child); }); |
| func->walk([&](fir::UnreachableOp child) { joinOperationLattice(child); }); |
| llvm::DenseSet<mlir::Value> freedValues; |
| point.appendFreedValues(freedValues); |
| |
| // We only replace allocations which are definately freed on all routes |
| // through the function because otherwise the allocation may have an intende |
| // lifetime longer than the current stack frame (e.g. a heap allocation which |
| // is then freed by another function). |
| for (mlir::Value freedValue : freedValues) { |
| fir::AllocMemOp allocmem = freedValue.getDefiningOp<fir::AllocMemOp>(); |
| InsertionPoint insertionPoint = |
| AllocMemConversion::findAllocaInsertionPoint(allocmem); |
| if (insertionPoint) |
| candidateOps.insert({allocmem, insertionPoint}); |
| } |
| |
| LLVM_DEBUG(for (auto [allocMemOp, _] |
| : candidateOps) { |
| llvm::dbgs() << "StackArrays: Found candidate op: " << *allocMemOp << '\n'; |
| }); |
| return mlir::success(); |
| } |
| |
| const StackArraysAnalysisWrapper::AllocMemMap * |
| StackArraysAnalysisWrapper::getCandidateOps(mlir::Operation *func) { |
| if (!funcMaps.contains(func)) |
| if (mlir::failed(analyseFunction(func))) |
| return nullptr; |
| return &funcMaps[func]; |
| } |
| |
| /// Restore the old allocation type exected by existing code |
| static mlir::Value convertAllocationType(mlir::PatternRewriter &rewriter, |
| const mlir::Location &loc, |
| mlir::Value heap, mlir::Value stack) { |
| mlir::Type heapTy = heap.getType(); |
| mlir::Type stackTy = stack.getType(); |
| |
| if (heapTy == stackTy) |
| return stack; |
| |
| fir::HeapType firHeapTy = mlir::cast<fir::HeapType>(heapTy); |
| LLVM_ATTRIBUTE_UNUSED fir::ReferenceType firRefTy = |
| mlir::cast<fir::ReferenceType>(stackTy); |
| assert(firHeapTy.getElementType() == firRefTy.getElementType() && |
| "Allocations must have the same type"); |
| |
| auto insertionPoint = rewriter.saveInsertionPoint(); |
| rewriter.setInsertionPointAfter(stack.getDefiningOp()); |
| mlir::Value conv = |
| rewriter.create<fir::ConvertOp>(loc, firHeapTy, stack).getResult(); |
| rewriter.restoreInsertionPoint(insertionPoint); |
| return conv; |
| } |
| |
| mlir::LogicalResult |
| AllocMemConversion::matchAndRewrite(fir::AllocMemOp allocmem, |
| mlir::PatternRewriter &rewriter) const { |
| auto oldInsertionPt = rewriter.saveInsertionPoint(); |
| // add alloca operation |
| std::optional<fir::AllocaOp> alloca = insertAlloca(allocmem, rewriter); |
| rewriter.restoreInsertionPoint(oldInsertionPt); |
| if (!alloca) |
| return mlir::failure(); |
| |
| // remove freemem operations |
| llvm::SmallVector<mlir::Operation *> erases; |
| for (mlir::Operation *user : allocmem.getOperation()->getUsers()) |
| if (mlir::isa<fir::FreeMemOp>(user)) |
| erases.push_back(user); |
| // now we are done iterating the users, it is safe to mutate them |
| for (mlir::Operation *erase : erases) |
| rewriter.eraseOp(erase); |
| |
| // replace references to heap allocation with references to stack allocation |
| mlir::Value newValue = convertAllocationType( |
| rewriter, allocmem.getLoc(), allocmem.getResult(), alloca->getResult()); |
| rewriter.replaceAllUsesWith(allocmem.getResult(), newValue); |
| |
| // remove allocmem operation |
| rewriter.eraseOp(allocmem.getOperation()); |
| |
| return mlir::success(); |
| } |
| |
| static bool isInLoop(mlir::Block *block) { |
| return mlir::LoopLikeOpInterface::blockIsInLoop(block); |
| } |
| |
| static bool isInLoop(mlir::Operation *op) { |
| return isInLoop(op->getBlock()) || |
| op->getParentOfType<mlir::LoopLikeOpInterface>(); |
| } |
| |
| InsertionPoint |
| AllocMemConversion::findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc) { |
| // Ideally the alloca should be inserted at the end of the function entry |
| // block so that we do not allocate stack space in a loop. However, |
| // the operands to the alloca may not be available that early, so insert it |
| // after the last operand becomes available |
| // If the old allocmem op was in an openmp region then it should not be moved |
| // outside of that |
| LLVM_DEBUG(llvm::dbgs() << "StackArrays: findAllocaInsertionPoint: " |
| << oldAlloc << "\n"); |
| |
| // check that an Operation or Block we are about to return is not in a loop |
| auto checkReturn = [&](auto *point) -> InsertionPoint { |
| if (isInLoop(point)) { |
| mlir::Operation *oldAllocOp = oldAlloc.getOperation(); |
| if (isInLoop(oldAllocOp)) { |
| // where we want to put it is in a loop, and even the old location is in |
| // a loop. Give up. |
| return findAllocaLoopInsertionPoint(oldAlloc); |
| } |
| return {oldAllocOp}; |
| } |
| return {point}; |
| }; |
| |
| auto oldOmpRegion = |
| oldAlloc->getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>(); |
| |
| // Find when the last operand value becomes available |
| mlir::Block *operandsBlock = nullptr; |
| mlir::Operation *lastOperand = nullptr; |
| for (mlir::Value operand : oldAlloc.getOperands()) { |
| LLVM_DEBUG(llvm::dbgs() << "--considering operand " << operand << "\n"); |
| mlir::Operation *op = operand.getDefiningOp(); |
| if (!op) |
| return checkReturn(oldAlloc.getOperation()); |
| if (!operandsBlock) |
| operandsBlock = op->getBlock(); |
| else if (operandsBlock != op->getBlock()) { |
| LLVM_DEBUG(llvm::dbgs() |
| << "----operand declared in a different block!\n"); |
| // Operation::isBeforeInBlock requires the operations to be in the same |
| // block. The best we can do is the location of the allocmem. |
| return checkReturn(oldAlloc.getOperation()); |
| } |
| if (!lastOperand || lastOperand->isBeforeInBlock(op)) |
| lastOperand = op; |
| } |
| |
| if (lastOperand) { |
| // there were value operands to the allocmem so insert after the last one |
| LLVM_DEBUG(llvm::dbgs() |
| << "--Placing after last operand: " << *lastOperand << "\n"); |
| // check we aren't moving out of an omp region |
| auto lastOpOmpRegion = |
| lastOperand->getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>(); |
| if (lastOpOmpRegion == oldOmpRegion) |
| return checkReturn(lastOperand); |
| // Presumably this happened because the operands became ready before the |
| // start of this openmp region. (lastOpOmpRegion != oldOmpRegion) should |
| // imply that oldOmpRegion comes after lastOpOmpRegion. |
| return checkReturn(oldOmpRegion.getAllocaBlock()); |
| } |
| |
| // There were no value operands to the allocmem so we are safe to insert it |
| // as early as we want |
| |
| // handle openmp case |
| if (oldOmpRegion) |
| return checkReturn(oldOmpRegion.getAllocaBlock()); |
| |
| // fall back to the function entry block |
| mlir::func::FuncOp func = oldAlloc->getParentOfType<mlir::func::FuncOp>(); |
| assert(func && "This analysis is run on func.func"); |
| mlir::Block &entryBlock = func.getBlocks().front(); |
| LLVM_DEBUG(llvm::dbgs() << "--Placing at the start of func entry block\n"); |
| return checkReturn(&entryBlock); |
| } |
| |
| InsertionPoint |
| AllocMemConversion::findAllocaLoopInsertionPoint(fir::AllocMemOp &oldAlloc) { |
| mlir::Operation *oldAllocOp = oldAlloc; |
| // This is only called as a last resort. We should try to insert at the |
| // location of the old allocation, which is inside of a loop, using |
| // llvm.stacksave/llvm.stackrestore |
| |
| // find freemem ops |
| llvm::SmallVector<mlir::Operation *, 1> freeOps; |
| for (mlir::Operation *user : oldAllocOp->getUsers()) |
| if (mlir::isa<fir::FreeMemOp>(user)) |
| freeOps.push_back(user); |
| assert(freeOps.size() && "DFA should only return freed memory"); |
| |
| // Don't attempt to reason about a stacksave/stackrestore between different |
| // blocks |
| for (mlir::Operation *free : freeOps) |
| if (free->getBlock() != oldAllocOp->getBlock()) |
| return {nullptr}; |
| |
| // Check that there aren't any other stack allocations in between the |
| // stack save and stack restore |
| // note: for flang generated temporaries there should only be one free op |
| for (mlir::Operation *free : freeOps) { |
| for (mlir::Operation *op = oldAlloc; op && op != free; |
| op = op->getNextNode()) { |
| if (mlir::isa<fir::AllocaOp>(op)) |
| return {nullptr}; |
| } |
| } |
| |
| return InsertionPoint{oldAllocOp, /*shouldStackSaveRestore=*/true}; |
| } |
| |
| std::optional<fir::AllocaOp> |
| AllocMemConversion::insertAlloca(fir::AllocMemOp &oldAlloc, |
| mlir::PatternRewriter &rewriter) const { |
| auto it = candidateOps.find(oldAlloc.getOperation()); |
| if (it == candidateOps.end()) |
| return {}; |
| InsertionPoint insertionPoint = it->second; |
| if (!insertionPoint) |
| return {}; |
| |
| if (insertionPoint.shouldSaveRestoreStack()) |
| insertStackSaveRestore(oldAlloc, rewriter); |
| |
| mlir::Location loc = oldAlloc.getLoc(); |
| mlir::Type varTy = oldAlloc.getInType(); |
| if (mlir::Operation *op = insertionPoint.tryGetOperation()) { |
| rewriter.setInsertionPointAfter(op); |
| } else { |
| mlir::Block *block = insertionPoint.tryGetBlock(); |
| assert(block && "There must be a valid insertion point"); |
| rewriter.setInsertionPointToStart(block); |
| } |
| |
| auto unpackName = [](std::optional<llvm::StringRef> opt) -> llvm::StringRef { |
| if (opt) |
| return *opt; |
| return {}; |
| }; |
| |
| llvm::StringRef uniqName = unpackName(oldAlloc.getUniqName()); |
| llvm::StringRef bindcName = unpackName(oldAlloc.getBindcName()); |
| return rewriter.create<fir::AllocaOp>(loc, varTy, uniqName, bindcName, |
| oldAlloc.getTypeparams(), |
| oldAlloc.getShape()); |
| } |
| |
| void AllocMemConversion::insertStackSaveRestore( |
| fir::AllocMemOp &oldAlloc, mlir::PatternRewriter &rewriter) const { |
| auto oldPoint = rewriter.saveInsertionPoint(); |
| auto mod = oldAlloc->getParentOfType<mlir::ModuleOp>(); |
| fir::FirOpBuilder builder{rewriter, mod}; |
| |
| mlir::func::FuncOp stackSaveFn = fir::factory::getLlvmStackSave(builder); |
| mlir::SymbolRefAttr stackSaveSym = |
| builder.getSymbolRefAttr(stackSaveFn.getName()); |
| |
| builder.setInsertionPoint(oldAlloc); |
| mlir::Value sp = |
| builder |
| .create<fir::CallOp>(oldAlloc.getLoc(), |
| stackSaveFn.getFunctionType().getResults(), |
| stackSaveSym, mlir::ValueRange{}) |
| .getResult(0); |
| |
| mlir::func::FuncOp stackRestoreFn = |
| fir::factory::getLlvmStackRestore(builder); |
| mlir::SymbolRefAttr stackRestoreSym = |
| builder.getSymbolRefAttr(stackRestoreFn.getName()); |
| |
| for (mlir::Operation *user : oldAlloc->getUsers()) { |
| if (mlir::isa<fir::FreeMemOp>(user)) { |
| builder.setInsertionPoint(user); |
| builder.create<fir::CallOp>(user->getLoc(), |
| stackRestoreFn.getFunctionType().getResults(), |
| stackRestoreSym, mlir::ValueRange{sp}); |
| } |
| } |
| |
| rewriter.restoreInsertionPoint(oldPoint); |
| } |
| |
| StackArraysPass::StackArraysPass(const StackArraysPass &pass) |
| : fir::impl::StackArraysBase<StackArraysPass>(pass) {} |
| |
| llvm::StringRef StackArraysPass::getDescription() const { |
| return "Move heap allocated array temporaries to the stack"; |
| } |
| |
| void StackArraysPass::runOnOperation() { |
| mlir::ModuleOp mod = getOperation(); |
| |
| mod.walk([this](mlir::func::FuncOp func) { runOnFunc(func); }); |
| } |
| |
| void StackArraysPass::runOnFunc(mlir::Operation *func) { |
| assert(mlir::isa<mlir::func::FuncOp>(func)); |
| |
| auto &analysis = getAnalysis<StackArraysAnalysisWrapper>(); |
| const StackArraysAnalysisWrapper::AllocMemMap *candidateOps = |
| analysis.getCandidateOps(func); |
| if (!candidateOps) { |
| signalPassFailure(); |
| return; |
| } |
| |
| if (candidateOps->empty()) |
| return; |
| runCount += candidateOps->size(); |
| |
| llvm::SmallVector<mlir::Operation *> opsToConvert; |
| opsToConvert.reserve(candidateOps->size()); |
| for (auto [op, _] : *candidateOps) |
| opsToConvert.push_back(op); |
| |
| mlir::MLIRContext &context = getContext(); |
| mlir::RewritePatternSet patterns(&context); |
| mlir::GreedyRewriteConfig config; |
| // prevent the pattern driver form merging blocks |
| config.enableRegionSimplification = false; |
| |
| patterns.insert<AllocMemConversion>(&context, *candidateOps); |
| if (mlir::failed(mlir::applyOpPatternsAndFold(opsToConvert, |
| std::move(patterns), config))) { |
| mlir::emitError(func->getLoc(), "error in stack arrays optimization\n"); |
| signalPassFailure(); |
| } |
| } |