blob: 87a71b6041dbd2741763b07fd54aa2c9b772689c [file] [log] [blame]
//===- LoopFusionUtils.h - Loop fusion utilities ----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This header file defines prototypes for various loop fusion utility
// methods: these are not passes by themselves but are used either by passes,
// optimization sequences, or in turn by other transformation utilities.
//
//===----------------------------------------------------------------------===//
#ifndef MLIR_TRANSFORMS_LOOP_FUSION_UTILS_H
#define MLIR_TRANSFORMS_LOOP_FUSION_UTILS_H
#include "mlir/IR/Value.h"
#include "mlir/Support/LLVM.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
namespace mlir {
class AffineForOp;
struct ComputationSliceState;
class Operation;
// TODO: Extend this module to include utility functions for querying fusion
// cost/storage reduction, and for performing the loop fusion transformation.
struct FusionResult {
enum ResultEnum {
Success,
FailPrecondition, // Failed precondition for fusion. (e.g. same block).
FailBlockDependence, // Fusion would violate another dependence in block.
FailFusionDependence, // Fusion would reverse dependences between loops.
FailComputationSlice, // Unable to compute src loop computation slice.
FailIncorrectSlice, // Slice is computed, but it is incorrect.
} value;
FusionResult(ResultEnum v) : value(v) {}
};
/// Describes the fusion strategy to be used in the Affine loop fusion
/// utilities. Currently, it is used to specialized the loop fusion utilities
/// with the assumptions made in the AffineLoopFusion pass for producer-consumer
/// and sibling fusion, while sharing a single implementation. The latter
/// strategies are also limited to scenarios where a single memref is involved
/// in the producer-consume or sibling relationship between the candidate
/// loops. We use 'memref' to keep track of such a memref.
// TODO: Remove 'memref' when we support more generic scenarios.
// TODO: Generalize utilities so that producer-consumer and sibling fusion
// strategies can be used without the assumptions made in the AffineLoopFusion
// pass.
class FusionStrategy {
public:
enum StrategyEnum {
// Generic loop fusion: Arbitrary loops are considered for fusion. No
// assumptions about a specific fusion strategy from AffineLoopFusion pass
// are made.
// TODO: Generic fusion is not fully implemented by fusion utilities yet.
// It should only be used for testing.
Generic,
// Producer-consumer fusion: Only loops with a producer-consumer
// memref dependence are considered for fusion. Currently, assumptions from
// the producer-consumer fusion implementation in AffineLoopFusion pass are
// made. See pass for specific details.
ProducerConsumer,
// Sibling fusion: Only sibling loops with no producer-consumer memref
// dependences are considered for fusion. Memref reuse is taken into account
// for profitability. Currently, assumptions from the sibling fusion
// implementation in AffineLoopFusion pass are made. See pass for specific
// details.
Sibling
};
/// Construct a generic or producer-consumer fusion strategy.
FusionStrategy(StrategyEnum strategy) : strategy(strategy) {
assert(strategy != Sibling &&
"Sibling fusion strategy requires a specific memref");
}
/// Construct a sibling fusion strategy targeting 'memref'. This construct
/// should only be used for sibling fusion.
FusionStrategy(Value memref) : strategy(Sibling), memref(memref) {}
/// Returns the fusion strategy.
StrategyEnum getStrategy() const { return strategy; };
/// Returns the memref attached to this sibling fusion strategy.
Value getSiblingFusionMemRef() const {
assert(strategy == Sibling && "Memref is only valid for sibling fusion");
return memref;
}
private:
/// Fusion strategy.
StrategyEnum strategy;
/// Target memref for this fusion transformation. Only used for sibling
/// fusion.
Value memref;
};
/// Checks the feasibility of fusing the loop nest rooted at 'srcForOp' into the
/// loop nest rooted at 'dstForOp' at 'dstLoopDepth'. Returns FusionResult
/// 'Success' if fusion of the src/dst loop nests is feasible (i.e. they are
/// in the same block and dependences would not be violated). Otherwise
/// returns a FusionResult explaining why fusion is not feasible.
/// NOTE: This function is not feature complete and should only be used in
/// testing.
/// TODO: Update comments when this function is fully implemented.
FusionResult
canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp, unsigned dstLoopDepth,
ComputationSliceState *srcSlice,
FusionStrategy fusionStrategy = FusionStrategy::Generic);
/// Fuses 'srcForOp' into 'dstForOp' with destination loop block insertion
/// point and source slice loop bounds specified in 'srcSlice'.
/// `isInnermostSiblingInsertionFusion` enables cleanup of `srcForOp that is a
/// single-iteration reduction loop being sibling-fused into a 'dstForOp'.
void fuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
const ComputationSliceState &srcSlice,
bool isInnermostSiblingInsertionFusion = false);
/// LoopNestStats aggregates various per-loop statistics (eg. loop trip count
/// and operation count) for a loop nest up until (and including) the innermost
/// loop body.
struct LoopNestStats {
/// Map from AffineForOp to immediate child AffineForOps in its loop body.
DenseMap<Operation *, SmallVector<AffineForOp, 2>> loopMap;
/// Map from AffineForOp to count of operations in its loop body.
DenseMap<Operation *, uint64_t> opCountMap;
/// Map from AffineForOp to its constant trip count.
DenseMap<Operation *, uint64_t> tripCountMap;
};
/// Collect loop nest statistics (eg. loop trip count and operation count)
/// in 'stats' for loop nest rooted at 'forOp'. Returns true on success,
/// returns false otherwise.
// TODO: Consider moving this to LoopUtils.
bool getLoopNestStats(AffineForOp forOp, LoopNestStats *stats);
/// Computes the total cost of the loop nest rooted at 'forOp' using 'stats'.
/// Currently, the total cost is computed by counting the total operation
/// instance count (i.e. total number of operations in the loop body * loop
/// trip count) for the entire loop nest.
// TODO: Improve this cost model.
int64_t getComputeCost(AffineForOp forOp, LoopNestStats &stats);
/// Computes and returns in 'computeCost', the total compute cost of fusing the
/// 'slice' of the loop nest rooted at 'srcForOp' into 'dstForOp'. Currently,
/// the total cost is computed by counting the total operation instance count
/// (i.e. total number of operations in the loop body * loop trip count) for
/// the entire loop nest.
/// Returns true on success, failure otherwise (e.g. non-constant trip counts).
// TODO: Improve this cost model.
bool getFusionComputeCost(AffineForOp srcForOp, LoopNestStats &srcStats,
AffineForOp dstForOp, LoopNestStats &dstStats,
const ComputationSliceState &slice,
int64_t *computeCost);
/// Returns in 'producerConsumerMemrefs' the memrefs involved in a
/// producer-consumer dependence between write ops in 'srcOps' and read ops in
/// 'dstOps'.
void gatherProducerConsumerMemrefs(ArrayRef<Operation *> srcOps,
ArrayRef<Operation *> dstOps,
DenseSet<Value> &producerConsumerMemrefs);
} // end namespace mlir
#endif // MLIR_TRANSFORMS_LOOP_FUSION_UTILS_H