blob: 1012a9608aa27b788178f0793bbed5976b8a76cd [file] [log] [blame] [edit]
//===- DoConcurrentConversion.cpp -- map `DO CONCURRENT` to OpenMP loops --===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "flang/Optimizer/Builder/DirectivesCommon.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/HLFIRTools.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/OpenMP/Passes.h"
#include "flang/Optimizer/OpenMP/Utils.h"
#include "flang/Support/OpenMP-utils.h"
#include "flang/Utils/OpenMP.h"
#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/Transforms/DialectConversion.h"
#include "mlir/Transforms/RegionUtils.h"
#include "llvm/ADT/SmallPtrSet.h"
namespace flangomp {
#define GEN_PASS_DEF_DOCONCURRENTCONVERSIONPASS
#include "flang/Optimizer/OpenMP/Passes.h.inc"
} // namespace flangomp
#define DEBUG_TYPE "do-concurrent-conversion"
#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE << "]: ")
namespace {
namespace looputils {
/// Stores info needed about the induction/iteration variable for each `do
/// concurrent` in a loop nest.
struct InductionVariableInfo {
InductionVariableInfo(fir::DoConcurrentLoopOp loop,
mlir::Value inductionVar) {
populateInfo(loop, inductionVar);
}
/// The operation allocating memory for iteration variable.
mlir::Operation *iterVarMemDef;
/// the operation(s) updating the iteration variable with the current
/// iteration number.
llvm::SmallVector<mlir::Operation *, 2> indVarUpdateOps;
private:
/// For the \p doLoop parameter, find the following:
///
/// 1. The operation that declares its iteration variable or allocates memory
/// for it. For example, give the following loop:
/// ```
/// ...
/// %i:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : ...
/// ...
/// fir.do_concurrent.loop (%ind_var) = (%lb) to (%ub) step (%s) {
/// %ind_var_conv = fir.convert %ind_var : (index) -> i32
/// fir.store %ind_var_conv to %i#1 : !fir.ref<i32>
/// ...
/// }
/// ```
///
/// This function sets the `iterVarMemDef` member to the `hlfir.declare` op
/// for `%i`.
///
/// 2. The operation(s) that update the loop's iteration variable from its
/// induction variable. For the above example, the `indVarUpdateOps` is
/// populated with the first 2 ops in the loop's body.
///
/// Note: The current implementation is dependent on how flang emits loop
/// bodies; which is sufficient for the current simple test/use cases. If this
/// proves to be insufficient, this should be made more generic.
void populateInfo(fir::DoConcurrentLoopOp loop, mlir::Value inductionVar) {
mlir::Value result = nullptr;
// Checks if a StoreOp is updating the memref of the loop's iteration
// variable.
auto isStoringIV = [&](fir::StoreOp storeOp) {
// Direct store into the IV memref.
if (storeOp.getValue() == inductionVar) {
indVarUpdateOps.push_back(storeOp);
return true;
}
// Indirect store into the IV memref.
if (auto convertOp = mlir::dyn_cast<fir::ConvertOp>(
storeOp.getValue().getDefiningOp())) {
if (convertOp.getOperand() == inductionVar) {
indVarUpdateOps.push_back(convertOp);
indVarUpdateOps.push_back(storeOp);
return true;
}
}
return false;
};
for (mlir::Operation &op : loop) {
if (auto storeOp = mlir::dyn_cast<fir::StoreOp>(op))
if (isStoringIV(storeOp)) {
result = storeOp.getMemref();
break;
}
}
assert(result != nullptr && result.getDefiningOp() != nullptr);
iterVarMemDef = result.getDefiningOp();
}
};
using InductionVariableInfos = llvm::SmallVector<InductionVariableInfo>;
/// Collect the list of values used inside the loop but defined outside of it.
void collectLoopLiveIns(fir::DoConcurrentLoopOp loop,
llvm::SmallVectorImpl<mlir::Value> &liveIns) {
llvm::SmallDenseSet<mlir::Value> seenValues;
llvm::SmallPtrSet<mlir::Operation *, 8> seenOps;
for (auto [lb, ub, st] : llvm::zip_equal(
loop.getLowerBound(), loop.getUpperBound(), loop.getStep())) {
liveIns.push_back(lb);
liveIns.push_back(ub);
liveIns.push_back(st);
}
mlir::visitUsedValuesDefinedAbove(
loop.getRegion(), [&](mlir::OpOperand *operand) {
if (!seenValues.insert(operand->get()).second)
return;
mlir::Operation *definingOp = operand->get().getDefiningOp();
// We want to collect ops corresponding to live-ins only once.
if (definingOp && !seenOps.insert(definingOp).second)
return;
liveIns.push_back(operand->get());
});
for (mlir::Value local : loop.getLocalVars())
liveIns.push_back(local);
for (mlir::Value reduce : loop.getReduceVars())
liveIns.push_back(reduce);
}
/// Collects values that are local to a loop: "loop-local values". A loop-local
/// value is one that is used exclusively inside the loop but allocated outside
/// of it. This usually corresponds to temporary values that are used inside the
/// loop body for initialzing other variables for example.
///
/// See `flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90` for an
/// example of why we need this.
///
/// \param [in] doLoop - the loop within which the function searches for values
/// used exclusively inside.
///
/// \param [out] locals - the list of loop-local values detected for \p doLoop.
void collectLoopLocalValues(fir::DoConcurrentLoopOp loop,
llvm::SetVector<mlir::Value> &locals) {
loop.walk([&](mlir::Operation *op) {
for (mlir::Value operand : op->getOperands()) {
if (locals.contains(operand))
continue;
bool isLocal = true;
if (!mlir::isa_and_present<fir::AllocaOp>(operand.getDefiningOp()))
continue;
// Values defined inside the loop are not interesting since they do not
// need to be localized.
if (loop->isAncestor(operand.getDefiningOp()))
continue;
for (auto *user : operand.getUsers()) {
if (!loop->isAncestor(user)) {
isLocal = false;
break;
}
}
if (isLocal)
locals.insert(operand);
}
});
}
/// For a "loop-local" value \p local within a loop's scope, localizes that
/// value within the scope of the parallel region the loop maps to. Towards that
/// end, this function moves the allocation of \p local within \p allocRegion.
///
/// \param local - the value used exclusively within a loop's scope (see
/// collectLoopLocalValues).
///
/// \param allocRegion - the parallel region where \p local's allocation will be
/// privatized.
///
/// \param rewriter - builder used for updating \p allocRegion.
static void localizeLoopLocalValue(mlir::Value local, mlir::Region &allocRegion,
mlir::ConversionPatternRewriter &rewriter) {
rewriter.moveOpBefore(local.getDefiningOp(), &allocRegion.front().front());
}
} // namespace looputils
class DoConcurrentConversion
: public mlir::OpConversionPattern<fir::DoConcurrentOp> {
private:
struct TargetDeclareShapeCreationInfo {
// Note: We use `std::vector` (rather than `llvm::SmallVector` as usual) to
// interface more easily `ShapeShiftOp::getOrigins()` which returns
// `std::vector`.
std::vector<mlir::Value> startIndices;
std::vector<mlir::Value> extents;
TargetDeclareShapeCreationInfo(mlir::Value liveIn) {
mlir::Value shape = nullptr;
mlir::Operation *liveInDefiningOp = liveIn.getDefiningOp();
auto declareOp =
mlir::dyn_cast_if_present<hlfir::DeclareOp>(liveInDefiningOp);
if (declareOp != nullptr)
shape = declareOp.getShape();
if (!shape)
return;
auto shapeOp =
mlir::dyn_cast_if_present<fir::ShapeOp>(shape.getDefiningOp());
auto shapeShiftOp =
mlir::dyn_cast_if_present<fir::ShapeShiftOp>(shape.getDefiningOp());
if (!shapeOp && !shapeShiftOp)
TODO(liveIn.getLoc(),
"Shapes not defined by `fir.shape` or `fir.shape_shift` op's are"
"not supported yet.");
if (shapeShiftOp != nullptr)
startIndices = shapeShiftOp.getOrigins();
extents = shapeOp != nullptr
? std::vector<mlir::Value>(shapeOp.getExtents().begin(),
shapeOp.getExtents().end())
: shapeShiftOp.getExtents();
}
bool isShapedValue() const { return !extents.empty(); }
bool isShapeShiftedValue() const { return !startIndices.empty(); }
};
using LiveInShapeInfoMap =
llvm::DenseMap<mlir::Value, TargetDeclareShapeCreationInfo>;
public:
using mlir::OpConversionPattern<fir::DoConcurrentOp>::OpConversionPattern;
DoConcurrentConversion(
mlir::MLIRContext *context, bool mapToDevice,
llvm::DenseSet<fir::DoConcurrentOp> &concurrentLoopsToSkip,
mlir::SymbolTable &moduleSymbolTable)
: OpConversionPattern(context), mapToDevice(mapToDevice),
concurrentLoopsToSkip(concurrentLoopsToSkip),
moduleSymbolTable(moduleSymbolTable) {}
mlir::LogicalResult
matchAndRewrite(fir::DoConcurrentOp doLoop, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const override {
looputils::InductionVariableInfos ivInfos;
auto loop = mlir::cast<fir::DoConcurrentLoopOp>(
doLoop.getRegion().back().getTerminator());
auto indVars = loop.getLoopInductionVars();
assert(indVars.has_value());
for (mlir::Value indVar : *indVars)
ivInfos.emplace_back(loop, indVar);
llvm::SmallVector<mlir::Value> loopNestLiveIns;
looputils::collectLoopLiveIns(loop, loopNestLiveIns);
assert(!loopNestLiveIns.empty());
llvm::SetVector<mlir::Value> locals;
looputils::collectLoopLocalValues(loop, locals);
// We do not want to map "loop-local" values to the device through
// `omp.map.info` ops. Therefore, we remove them from the list of live-ins.
loopNestLiveIns.erase(llvm::remove_if(loopNestLiveIns,
[&](mlir::Value liveIn) {
return locals.contains(liveIn);
}),
loopNestLiveIns.end());
mlir::omp::TargetOp targetOp;
mlir::omp::LoopNestOperands loopNestClauseOps;
mlir::IRMapping mapper;
if (mapToDevice) {
mlir::ModuleOp module = doLoop->getParentOfType<mlir::ModuleOp>();
bool isTargetDevice =
llvm::cast<mlir::omp::OffloadModuleInterface>(*module)
.getIsTargetDevice();
mlir::omp::TargetOperands targetClauseOps;
genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, loopNestClauseOps,
isTargetDevice ? nullptr : &targetClauseOps);
LiveInShapeInfoMap liveInShapeInfoMap;
fir::FirOpBuilder builder(
rewriter,
fir::getKindMapping(doLoop->getParentOfType<mlir::ModuleOp>()));
for (mlir::Value liveIn : loopNestLiveIns) {
targetClauseOps.mapVars.push_back(
genMapInfoOpForLiveIn(builder, liveIn));
liveInShapeInfoMap.insert(
{liveIn, TargetDeclareShapeCreationInfo(liveIn)});
}
targetOp =
genTargetOp(doLoop.getLoc(), rewriter, mapper, loopNestLiveIns,
targetClauseOps, loopNestClauseOps, liveInShapeInfoMap);
genTeamsOp(rewriter, loop, mapper);
}
mlir::omp::ParallelOp parallelOp =
genParallelOp(rewriter, loop, ivInfos, mapper);
// Only set as composite when part of `distribute parallel do`.
parallelOp.setComposite(mapToDevice);
if (!mapToDevice)
genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, loopNestClauseOps);
for (mlir::Value local : locals)
looputils::localizeLoopLocalValue(local, parallelOp.getRegion(),
rewriter);
if (mapToDevice)
genDistributeOp(doLoop.getLoc(), rewriter).setComposite(/*val=*/true);
auto [loopNestOp, wsLoopOp] =
genWsLoopOp(rewriter, loop, mapper, loopNestClauseOps,
/*isComposite=*/mapToDevice);
// `local` region arguments are transferred/cloned from the `do concurrent`
// loop to the loopnest op when the region is cloned above. Instead, these
// region arguments should be on the workshare loop's region.
if (mapToDevice) {
for (auto [parallelArg, loopNestArg] : llvm::zip_equal(
parallelOp.getRegion().getArguments(),
loopNestOp.getRegion().getArguments().slice(
loop.getLocalOperandsStart(), loop.getNumLocalOperands())))
rewriter.replaceAllUsesWith(loopNestArg, parallelArg);
for (auto [wsloopArg, loopNestArg] : llvm::zip_equal(
wsLoopOp.getRegion().getArguments(),
loopNestOp.getRegion().getArguments().slice(
loop.getReduceOperandsStart(), loop.getNumReduceOperands())))
rewriter.replaceAllUsesWith(loopNestArg, wsloopArg);
} else {
for (auto [wsloopArg, loopNestArg] :
llvm::zip_equal(wsLoopOp.getRegion().getArguments(),
loopNestOp.getRegion().getArguments().drop_front(
loopNestClauseOps.loopLowerBounds.size())))
rewriter.replaceAllUsesWith(loopNestArg, wsloopArg);
}
for (unsigned i = 0;
i < loop.getLocalVars().size() + loop.getReduceVars().size(); ++i)
loopNestOp.getRegion().eraseArgument(
loopNestClauseOps.loopLowerBounds.size());
rewriter.setInsertionPoint(doLoop);
fir::FirOpBuilder builder(
rewriter,
fir::getKindMapping(doLoop->getParentOfType<mlir::ModuleOp>()));
// Collect iteration variable(s) allocations so that we can move them
// outside the `fir.do_concurrent` wrapper (before erasing it).
llvm::SmallVector<mlir::Operation *> opsToMove;
for (mlir::Operation &op : llvm::drop_end(doLoop))
opsToMove.push_back(&op);
mlir::Block *allocBlock = builder.getAllocaBlock();
for (mlir::Operation *op : llvm::reverse(opsToMove)) {
rewriter.moveOpBefore(op, allocBlock, allocBlock->begin());
}
// Mark `unordered` loops that are not perfectly nested to be skipped from
// the legality check of the `ConversionTarget` since we are not interested
// in mapping them to OpenMP.
loopNestOp->walk([&](fir::DoConcurrentOp doLoop) {
concurrentLoopsToSkip.insert(doLoop);
});
rewriter.eraseOp(doLoop);
return mlir::success();
}
private:
mlir::omp::ParallelOp
genParallelOp(mlir::ConversionPatternRewriter &rewriter,
fir::DoConcurrentLoopOp loop,
looputils::InductionVariableInfos &ivInfos,
mlir::IRMapping &mapper) const {
mlir::omp::ParallelOperands parallelOps;
if (mapToDevice)
genPrivatizers(rewriter, mapper, loop, parallelOps);
mlir::Location loc = loop.getLoc();
auto parallelOp = mlir::omp::ParallelOp::create(rewriter, loc, parallelOps);
Fortran::common::openmp::EntryBlockArgs parallelArgs;
parallelArgs.priv.vars = parallelOps.privateVars;
Fortran::common::openmp::genEntryBlock(rewriter, parallelArgs,
parallelOp.getRegion());
rewriter.setInsertionPoint(mlir::omp::TerminatorOp::create(rewriter, loc));
genLoopNestIndVarAllocs(rewriter, ivInfos, mapper);
return parallelOp;
}
void genLoopNestIndVarAllocs(mlir::ConversionPatternRewriter &rewriter,
looputils::InductionVariableInfos &ivInfos,
mlir::IRMapping &mapper) const {
for (auto &indVarInfo : ivInfos)
genInductionVariableAlloc(rewriter, indVarInfo.iterVarMemDef, mapper);
}
mlir::Operation *
genInductionVariableAlloc(mlir::ConversionPatternRewriter &rewriter,
mlir::Operation *indVarMemDef,
mlir::IRMapping &mapper) const {
assert(
indVarMemDef != nullptr &&
"Induction variable memdef is expected to have a defining operation.");
llvm::SmallSetVector<mlir::Operation *, 2> indVarDeclareAndAlloc;
for (auto operand : indVarMemDef->getOperands())
indVarDeclareAndAlloc.insert(operand.getDefiningOp());
indVarDeclareAndAlloc.insert(indVarMemDef);
mlir::Operation *result;
for (mlir::Operation *opToClone : indVarDeclareAndAlloc)
result = rewriter.clone(*opToClone, mapper);
return result;
}
void genLoopNestClauseOps(
mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
fir::DoConcurrentLoopOp loop,
mlir::omp::LoopNestOperands &loopNestClauseOps,
mlir::omp::TargetOperands *targetClauseOps = nullptr) const {
assert(loopNestClauseOps.loopLowerBounds.empty() &&
"Loop nest bounds were already emitted!");
auto populateBounds = [](mlir::Value var,
llvm::SmallVectorImpl<mlir::Value> &bounds) {
bounds.push_back(var.getDefiningOp()->getResult(0));
};
auto hostEvalCapture = [&](mlir::Value var,
llvm::SmallVectorImpl<mlir::Value> &bounds) {
populateBounds(var, bounds);
// Ensure that loop-nest bounds are evaluated in the host and forwarded to
// the nested omp constructs when we map to the device.
if (targetClauseOps)
targetClauseOps->hostEvalVars.push_back(var);
};
for (auto [lb, ub, st] : llvm::zip_equal(
loop.getLowerBound(), loop.getUpperBound(), loop.getStep())) {
hostEvalCapture(lb, loopNestClauseOps.loopLowerBounds);
hostEvalCapture(ub, loopNestClauseOps.loopUpperBounds);
hostEvalCapture(st, loopNestClauseOps.loopSteps);
}
loopNestClauseOps.loopInclusive = rewriter.getUnitAttr();
}
std::pair<mlir::omp::LoopNestOp, mlir::omp::WsloopOp>
genWsLoopOp(mlir::ConversionPatternRewriter &rewriter,
fir::DoConcurrentLoopOp loop, mlir::IRMapping &mapper,
const mlir::omp::LoopNestOperands &clauseOps,
bool isComposite) const {
mlir::omp::WsloopOperands wsloopClauseOps;
if (!mapToDevice)
genPrivatizers(rewriter, mapper, loop, wsloopClauseOps);
genReductions(rewriter, mapper, loop, wsloopClauseOps);
auto wsloopOp =
mlir::omp::WsloopOp::create(rewriter, loop.getLoc(), wsloopClauseOps);
wsloopOp.setComposite(isComposite);
Fortran::common::openmp::EntryBlockArgs wsloopArgs;
wsloopArgs.priv.vars = wsloopClauseOps.privateVars;
wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars;
Fortran::common::openmp::genEntryBlock(rewriter, wsloopArgs,
wsloopOp.getRegion());
auto loopNestOp =
mlir::omp::LoopNestOp::create(rewriter, loop.getLoc(), clauseOps);
// Clone the loop's body inside the loop nest construct using the
// mapped values.
rewriter.cloneRegionBefore(loop.getRegion(), loopNestOp.getRegion(),
loopNestOp.getRegion().begin(), mapper);
rewriter.setInsertionPointToEnd(&loopNestOp.getRegion().back());
mlir::omp::YieldOp::create(rewriter, loop->getLoc());
return {loopNestOp, wsloopOp};
}
void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value liveIn,
mlir::Value rawAddr,
llvm::SmallVectorImpl<mlir::Value> &boundsOps) const {
fir::ExtendedValue extVal =
hlfir::translateToExtendedValue(rawAddr.getLoc(), builder,
hlfir::Entity{liveIn},
/*contiguousHint=*/
true)
.first;
fir::factory::AddrAndBoundsInfo info = fir::factory::getDataOperandBaseAddr(
builder, rawAddr, /*isOptional=*/false, rawAddr.getLoc());
boundsOps = fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
mlir::omp::MapBoundsType>(
builder, info, extVal,
/*dataExvIsAssumedSize=*/false, rawAddr.getLoc());
}
mlir::omp::MapInfoOp genMapInfoOpForLiveIn(fir::FirOpBuilder &builder,
mlir::Value liveIn) const {
mlir::Value rawAddr = liveIn;
llvm::StringRef name;
mlir::Operation *liveInDefiningOp = liveIn.getDefiningOp();
auto declareOp =
mlir::dyn_cast_if_present<hlfir::DeclareOp>(liveInDefiningOp);
if (declareOp != nullptr) {
// Use the raw address to avoid unboxing `fir.box` values whenever
// possible. Put differently, if we have access to the direct value memory
// reference/address, we use it.
rawAddr = declareOp.getOriginalBase();
name = declareOp.getUniqName();
}
if (!llvm::isa<mlir::omp::PointerLikeType>(rawAddr.getType())) {
mlir::OpBuilder::InsertionGuard guard(builder);
builder.setInsertionPointAfter(liveInDefiningOp);
auto copyVal = builder.createTemporary(liveIn.getLoc(), liveIn.getType());
builder.createStoreWithConvert(copyVal.getLoc(), liveIn, copyVal);
rawAddr = copyVal;
}
mlir::Type liveInType = liveIn.getType();
mlir::Type eleType = liveInType;
if (auto refType = mlir::dyn_cast<fir::ReferenceType>(liveInType))
eleType = refType.getElementType();
mlir::omp::ClauseMapFlags mapFlag = mlir::omp::ClauseMapFlags::implicit;
mlir::omp::VariableCaptureKind captureKind =
mlir::omp::VariableCaptureKind::ByRef;
if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) {
captureKind = mlir::omp::VariableCaptureKind::ByCopy;
} else if (!fir::isa_builtin_cptr_type(eleType)) {
mapFlag |= mlir::omp::ClauseMapFlags::to;
mapFlag |= mlir::omp::ClauseMapFlags::from;
}
llvm::SmallVector<mlir::Value> boundsOps;
genBoundsOps(builder, liveIn, rawAddr, boundsOps);
return Fortran::utils::openmp::createMapInfoOp(
builder, liveIn.getLoc(), rawAddr,
/*varPtrPtr=*/{}, name.str(), boundsOps,
/*members=*/{},
/*membersIndex=*/mlir::ArrayAttr{}, mapFlag, captureKind,
rawAddr.getType());
}
mlir::omp::TargetOp
genTargetOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
mlir::IRMapping &mapper, llvm::ArrayRef<mlir::Value> mappedVars,
mlir::omp::TargetOperands &clauseOps,
mlir::omp::LoopNestOperands &loopNestClauseOps,
const LiveInShapeInfoMap &liveInShapeInfoMap) const {
auto targetOp = mlir::omp::TargetOp::create(rewriter, loc, clauseOps);
auto argIface = llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*targetOp);
mlir::Region &region = targetOp.getRegion();
llvm::SmallVector<mlir::Type> regionArgTypes;
llvm::SmallVector<mlir::Location> regionArgLocs;
for (auto var : llvm::concat<const mlir::Value>(clauseOps.hostEvalVars,
clauseOps.mapVars)) {
regionArgTypes.push_back(var.getType());
regionArgLocs.push_back(var.getLoc());
}
rewriter.createBlock(&region, {}, regionArgTypes, regionArgLocs);
fir::FirOpBuilder builder(
rewriter,
fir::getKindMapping(targetOp->getParentOfType<mlir::ModuleOp>()));
// Within the loop, it is possible that we discover other values that need
// to be mapped to the target region (the shape info values for arrays, for
// example). Therefore, the map block args might be extended and resized.
// Hence, we invoke `argIface.getMapBlockArgs()` every iteration to make
// sure we access the proper vector of data.
int idx = 0;
for (auto [mapInfoOp, mappedVar] :
llvm::zip_equal(clauseOps.mapVars, mappedVars)) {
auto miOp = mlir::cast<mlir::omp::MapInfoOp>(mapInfoOp.getDefiningOp());
hlfir::DeclareOp liveInDeclare =
genLiveInDeclare(builder, targetOp, argIface.getMapBlockArgs()[idx],
miOp, liveInShapeInfoMap.at(mappedVar));
++idx;
// If `mappedVar.getDefiningOp()` is a `fir::BoxAddrOp`, we probably
// need to "unpack" the box by getting the defining op of it's value.
// However, we did not hit this case in reality yet so leaving it as a
// todo for now.
if (mlir::isa<fir::BoxAddrOp>(mappedVar.getDefiningOp()))
TODO(mappedVar.getLoc(),
"Mapped variabled defined by `BoxAddrOp` are not supported yet");
auto mapHostValueToDevice = [&](mlir::Value hostValue,
mlir::Value deviceValue) {
if (!llvm::isa<mlir::omp::PointerLikeType>(hostValue.getType()))
mapper.map(hostValue,
builder.loadIfRef(hostValue.getLoc(), deviceValue));
else
mapper.map(hostValue, deviceValue);
};
mapHostValueToDevice(mappedVar, liveInDeclare.getOriginalBase());
if (auto origDeclareOp = mlir::dyn_cast_if_present<hlfir::DeclareOp>(
mappedVar.getDefiningOp()))
mapHostValueToDevice(origDeclareOp.getBase(), liveInDeclare.getBase());
}
for (auto [arg, hostEval] : llvm::zip_equal(argIface.getHostEvalBlockArgs(),
clauseOps.hostEvalVars))
mapper.map(hostEval, arg);
for (unsigned i = 0; i < loopNestClauseOps.loopLowerBounds.size(); ++i) {
loopNestClauseOps.loopLowerBounds[i] =
mapper.lookup(loopNestClauseOps.loopLowerBounds[i]);
loopNestClauseOps.loopUpperBounds[i] =
mapper.lookup(loopNestClauseOps.loopUpperBounds[i]);
loopNestClauseOps.loopSteps[i] =
mapper.lookup(loopNestClauseOps.loopSteps[i]);
}
// Check if cloning the bounds introduced any dependency on the outer
// region. If so, then either clone them as well if they are
// MemoryEffectFree, or else copy them to a new temporary and add them to
// the map and block_argument lists and replace their uses with the new
// temporary.
Fortran::utils::openmp::cloneOrMapRegionOutsiders(builder, targetOp);
rewriter.setInsertionPoint(
mlir::omp::TerminatorOp::create(rewriter, targetOp.getLoc()));
return targetOp;
}
hlfir::DeclareOp genLiveInDeclare(
fir::FirOpBuilder &builder, mlir::omp::TargetOp targetOp,
mlir::Value liveInArg, mlir::omp::MapInfoOp liveInMapInfoOp,
const TargetDeclareShapeCreationInfo &targetShapeCreationInfo) const {
mlir::Type liveInType = liveInArg.getType();
std::string liveInName = liveInMapInfoOp.getName().has_value()
? liveInMapInfoOp.getName().value().str()
: std::string("");
if (fir::isa_ref_type(liveInType))
liveInType = fir::unwrapRefType(liveInType);
mlir::Value shape = [&]() -> mlir::Value {
if (!targetShapeCreationInfo.isShapedValue())
return {};
if (targetShapeCreationInfo.isShapeShiftedValue()) {
llvm::SmallVector<mlir::Value> shapeShiftOperands;
size_t shapeIdx = 0;
for (auto [startIndex, extent] :
llvm::zip_equal(targetShapeCreationInfo.startIndices,
targetShapeCreationInfo.extents)) {
shapeShiftOperands.push_back(
Fortran::utils::openmp::mapTemporaryValue(
builder, targetOp, startIndex,
liveInName + ".start_idx.dim" + std::to_string(shapeIdx)));
shapeShiftOperands.push_back(
Fortran::utils::openmp::mapTemporaryValue(
builder, targetOp, extent,
liveInName + ".extent.dim" + std::to_string(shapeIdx)));
++shapeIdx;
}
auto shapeShiftType = fir::ShapeShiftType::get(
builder.getContext(), shapeShiftOperands.size() / 2);
return fir::ShapeShiftOp::create(builder, liveInArg.getLoc(),
shapeShiftType, shapeShiftOperands);
}
llvm::SmallVector<mlir::Value> shapeOperands;
size_t shapeIdx = 0;
for (auto extent : targetShapeCreationInfo.extents) {
shapeOperands.push_back(Fortran::utils::openmp::mapTemporaryValue(
builder, targetOp, extent,
liveInName + ".extent.dim" + std::to_string(shapeIdx)));
++shapeIdx;
}
return fir::ShapeOp::create(builder, liveInArg.getLoc(), shapeOperands);
}();
return hlfir::DeclareOp::create(builder, liveInArg.getLoc(), liveInArg,
liveInName, shape);
}
mlir::omp::TeamsOp genTeamsOp(mlir::ConversionPatternRewriter &rewriter,
fir::DoConcurrentLoopOp loop,
mlir::IRMapping &mapper) const {
mlir::omp::TeamsOperands teamsOps;
genReductions(rewriter, mapper, loop, teamsOps);
mlir::Location loc = loop.getLoc();
auto teamsOp = mlir::omp::TeamsOp::create(rewriter, loc, teamsOps);
Fortran::common::openmp::EntryBlockArgs teamsArgs;
teamsArgs.reduction.vars = teamsOps.reductionVars;
Fortran::common::openmp::genEntryBlock(rewriter, teamsArgs,
teamsOp.getRegion());
rewriter.setInsertionPoint(mlir::omp::TerminatorOp::create(rewriter, loc));
for (auto [loopVar, teamsArg] : llvm::zip_equal(
loop.getReduceVars(), teamsOp.getRegion().getArguments())) {
mapper.map(loopVar, teamsArg);
}
return teamsOp;
}
mlir::omp::DistributeOp
genDistributeOp(mlir::Location loc,
mlir::ConversionPatternRewriter &rewriter) const {
auto distOp = mlir::omp::DistributeOp::create(
rewriter, loc, /*clauses=*/mlir::omp::DistributeOperands{});
rewriter.createBlock(&distOp.getRegion());
return distOp;
}
void cloneFIRRegionToOMP(mlir::ConversionPatternRewriter &rewriter,
mlir::Region &firRegion,
mlir::Region &ompRegion) const {
if (!firRegion.empty()) {
rewriter.cloneRegionBefore(firRegion, ompRegion, ompRegion.begin());
auto firYield =
mlir::cast<fir::YieldOp>(ompRegion.back().getTerminator());
rewriter.setInsertionPoint(firYield);
mlir::omp::YieldOp::create(rewriter, firYield.getLoc(),
firYield.getOperands());
rewriter.eraseOp(firYield);
}
}
/// Generate bodies of OpenMP privatizers by cloning the bodies of FIR
/// privatizers.
///
/// \param [in] rewriter - used to driver IR generation for privatizers.
/// \param [in] mapper - value mapping from FIR to OpenMP constructs.
/// \param [in] loop - FIR loop to convert its localizers.
///
/// \param [out] privateClauseOps - OpenMP privatizers to gen their bodies.
void genPrivatizers(mlir::ConversionPatternRewriter &rewriter,
mlir::IRMapping &mapper, fir::DoConcurrentLoopOp loop,
mlir::omp::PrivateClauseOps &privateClauseOps) const {
// For `local` (and `local_init`) operands, emit corresponding `private`
// clauses and attach these clauses to the workshare loop.
if (!loop.getLocalVars().empty())
for (auto [var, sym, arg] : llvm::zip_equal(
loop.getLocalVars(),
loop.getLocalSymsAttr().getAsRange<mlir::SymbolRefAttr>(),
loop.getRegionLocalArgs())) {
auto localizer = moduleSymbolTable.lookup<fir::LocalitySpecifierOp>(
sym.getLeafReference());
if (localizer.getLocalitySpecifierType() ==
fir::LocalitySpecifierType::LocalInit)
TODO(localizer.getLoc(),
"local_init conversion is not supported yet");
mlir::OpBuilder::InsertionGuard guard(rewriter);
rewriter.setInsertionPointAfter(localizer);
auto privatizer = mlir::omp::PrivateClauseOp::create(
rewriter, localizer.getLoc(), sym.getLeafReference().str() + ".omp",
localizer.getTypeAttr().getValue(),
mlir::omp::DataSharingClauseType::Private);
cloneFIRRegionToOMP(rewriter, localizer.getInitRegion(),
privatizer.getInitRegion());
cloneFIRRegionToOMP(rewriter, localizer.getDeallocRegion(),
privatizer.getDeallocRegion());
moduleSymbolTable.insert(privatizer);
privateClauseOps.privateVars.push_back(mapToDevice ? mapper.lookup(var)
: var);
privateClauseOps.privateSyms.push_back(
mlir::SymbolRefAttr::get(privatizer));
}
}
void genReductions(mlir::ConversionPatternRewriter &rewriter,
mlir::IRMapping &mapper, fir::DoConcurrentLoopOp loop,
mlir::omp::ReductionClauseOps &reductionClauseOps) const {
if (!loop.getReduceVars().empty()) {
for (auto [var, byRef, sym, arg] : llvm::zip_equal(
loop.getReduceVars(), loop.getReduceByrefAttr().asArrayRef(),
loop.getReduceSymsAttr().getAsRange<mlir::SymbolRefAttr>(),
loop.getRegionReduceArgs())) {
auto firReducer = moduleSymbolTable.lookup<fir::DeclareReductionOp>(
sym.getLeafReference());
mlir::OpBuilder::InsertionGuard guard(rewriter);
rewriter.setInsertionPointAfter(firReducer);
std::string ompReducerName = sym.getLeafReference().str() + ".omp";
auto ompReducer =
moduleSymbolTable.lookup<mlir::omp::DeclareReductionOp>(
rewriter.getStringAttr(ompReducerName));
if (!ompReducer) {
ompReducer = mlir::omp::DeclareReductionOp::create(
rewriter, firReducer.getLoc(), ompReducerName,
firReducer.getTypeAttr().getValue(),
firReducer.getByrefElementTypeAttr());
cloneFIRRegionToOMP(rewriter, firReducer.getAllocRegion(),
ompReducer.getAllocRegion());
cloneFIRRegionToOMP(rewriter, firReducer.getInitializerRegion(),
ompReducer.getInitializerRegion());
cloneFIRRegionToOMP(rewriter, firReducer.getReductionRegion(),
ompReducer.getReductionRegion());
cloneFIRRegionToOMP(rewriter, firReducer.getAtomicReductionRegion(),
ompReducer.getAtomicReductionRegion());
cloneFIRRegionToOMP(rewriter, firReducer.getCleanupRegion(),
ompReducer.getCleanupRegion());
moduleSymbolTable.insert(ompReducer);
}
reductionClauseOps.reductionVars.push_back(
mapToDevice ? mapper.lookup(var) : var);
reductionClauseOps.reductionByref.push_back(byRef);
reductionClauseOps.reductionSyms.push_back(
mlir::SymbolRefAttr::get(ompReducer));
}
}
}
bool mapToDevice;
llvm::DenseSet<fir::DoConcurrentOp> &concurrentLoopsToSkip;
mlir::SymbolTable &moduleSymbolTable;
};
/// A listener that forwards notifyOperationErased to the given callback.
struct CallbackListener : public mlir::RewriterBase::Listener {
CallbackListener(std::function<void(mlir::Operation *op)> onOperationErased)
: onOperationErased(onOperationErased) {}
void notifyOperationErased(mlir::Operation *op) override {
onOperationErased(op);
}
std::function<void(mlir::Operation *op)> onOperationErased;
};
class DoConcurrentConversionPass
: public flangomp::impl::DoConcurrentConversionPassBase<
DoConcurrentConversionPass> {
public:
DoConcurrentConversionPass() = default;
DoConcurrentConversionPass(
const flangomp::DoConcurrentConversionPassOptions &options)
: DoConcurrentConversionPassBase(options) {}
void runOnOperation() override {
mlir::ModuleOp module = getOperation();
mlir::MLIRContext *context = &getContext();
mlir::SymbolTable moduleSymbolTable(module);
if (mapTo != flangomp::DoConcurrentMappingKind::DCMK_Host &&
mapTo != flangomp::DoConcurrentMappingKind::DCMK_Device) {
mlir::emitWarning(mlir::UnknownLoc::get(context),
"DoConcurrentConversionPass: invalid `map-to` value. "
"Valid values are: `host` or `device`");
return;
}
llvm::DenseSet<fir::DoConcurrentOp> concurrentLoopsToSkip;
CallbackListener callbackListener([&](mlir::Operation *op) {
if (auto loop = mlir::dyn_cast<fir::DoConcurrentOp>(op))
concurrentLoopsToSkip.erase(loop);
});
mlir::RewritePatternSet patterns(context);
patterns.insert<DoConcurrentConversion>(
context, mapTo == flangomp::DoConcurrentMappingKind::DCMK_Device,
concurrentLoopsToSkip, moduleSymbolTable);
mlir::ConversionTarget target(*context);
target.addDynamicallyLegalOp<fir::DoConcurrentOp>(
[&](fir::DoConcurrentOp op) {
return concurrentLoopsToSkip.contains(op);
});
target.markUnknownOpDynamicallyLegal(
[](mlir::Operation *) { return true; });
mlir::ConversionConfig config;
config.allowPatternRollback = false;
config.listener = &callbackListener;
if (mlir::failed(mlir::applyFullConversion(module, target,
std::move(patterns), config))) {
signalPassFailure();
}
}
};
} // namespace
std::unique_ptr<mlir::Pass>
flangomp::createDoConcurrentConversionPass(bool mapToDevice) {
DoConcurrentConversionPassOptions options;
options.mapTo = mapToDevice ? flangomp::DoConcurrentMappingKind::DCMK_Device
: flangomp::DoConcurrentMappingKind::DCMK_Host;
return std::make_unique<DoConcurrentConversionPass>(options);
}