blob: 099d5c604060f44cda3463d6c7cbde38dbb51c75 [file] [log] [blame]
//===-- OpenMP.cpp -- Open MP directive lowering --------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
//
//===----------------------------------------------------------------------===//
#include "flang/Lower/OpenMP.h"
#include "ClauseProcessor.h"
#include "Clauses.h"
#include "DataSharingProcessor.h"
#include "Decomposer.h"
#include "ReductionProcessor.h"
#include "Utils.h"
#include "flang/Common/idioms.h"
#include "flang/Lower/Bridge.h"
#include "flang/Lower/ConvertExpr.h"
#include "flang/Lower/ConvertVariable.h"
#include "flang/Lower/DirectivesCommon.h"
#include "flang/Lower/StatementContext.h"
#include "flang/Lower/SymbolMap.h"
#include "flang/Optimizer/Builder/BoxValue.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Parser/characters.h"
#include "flang/Parser/parse-tree.h"
#include "flang/Semantics/openmp-directive-sets.h"
#include "flang/Semantics/tools.h"
#include "flang/Support/OpenMP-utils.h"
#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/Transforms/RegionUtils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
using namespace Fortran::lower::omp;
using namespace Fortran::common::openmp;
//===----------------------------------------------------------------------===//
// Code generation helper functions
//===----------------------------------------------------------------------===//
static void genOMPDispatch(lower::AbstractConverter &converter,
lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue,
ConstructQueue::const_iterator item);
static void processHostEvalClauses(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
lower::StatementContext &stmtCtx,
lower::pft::Evaluation &eval,
mlir::Location loc);
namespace {
/// Structure holding information that is needed to pass host-evaluated
/// information to later lowering stages.
class HostEvalInfo {
public:
// Allow this function access to private members in order to initialize them.
friend void ::processHostEvalClauses(lower::AbstractConverter &,
semantics::SemanticsContext &,
lower::StatementContext &,
lower::pft::Evaluation &,
mlir::Location);
/// Fill \c vars with values stored in \c ops.
///
/// The order in which values are stored matches the one expected by \see
/// bindOperands().
void collectValues(llvm::SmallVectorImpl<mlir::Value> &vars) const {
vars.append(ops.loopLowerBounds);
vars.append(ops.loopUpperBounds);
vars.append(ops.loopSteps);
if (ops.numTeamsLower)
vars.push_back(ops.numTeamsLower);
if (ops.numTeamsUpper)
vars.push_back(ops.numTeamsUpper);
if (ops.numThreads)
vars.push_back(ops.numThreads);
if (ops.threadLimit)
vars.push_back(ops.threadLimit);
}
/// Update \c ops, replacing all values with the corresponding block argument
/// in \c args.
///
/// The order in which values are stored in \c args is the same as the one
/// used by \see collectValues().
void bindOperands(llvm::ArrayRef<mlir::BlockArgument> args) {
assert(args.size() ==
ops.loopLowerBounds.size() + ops.loopUpperBounds.size() +
ops.loopSteps.size() + (ops.numTeamsLower ? 1 : 0) +
(ops.numTeamsUpper ? 1 : 0) + (ops.numThreads ? 1 : 0) +
(ops.threadLimit ? 1 : 0) &&
"invalid block argument list");
int argIndex = 0;
for (size_t i = 0; i < ops.loopLowerBounds.size(); ++i)
ops.loopLowerBounds[i] = args[argIndex++];
for (size_t i = 0; i < ops.loopUpperBounds.size(); ++i)
ops.loopUpperBounds[i] = args[argIndex++];
for (size_t i = 0; i < ops.loopSteps.size(); ++i)
ops.loopSteps[i] = args[argIndex++];
if (ops.numTeamsLower)
ops.numTeamsLower = args[argIndex++];
if (ops.numTeamsUpper)
ops.numTeamsUpper = args[argIndex++];
if (ops.numThreads)
ops.numThreads = args[argIndex++];
if (ops.threadLimit)
ops.threadLimit = args[argIndex++];
}
/// Update \p clauseOps and \p ivOut with the corresponding host-evaluated
/// values and Fortran symbols, respectively, if they have already been
/// initialized but not yet applied.
///
/// \returns whether an update was performed. If not, these clauses were not
/// evaluated in the host device.
bool apply(mlir::omp::LoopNestOperands &clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> &ivOut) {
if (iv.empty() || loopNestApplied) {
loopNestApplied = true;
return false;
}
loopNestApplied = true;
clauseOps.loopLowerBounds = ops.loopLowerBounds;
clauseOps.loopUpperBounds = ops.loopUpperBounds;
clauseOps.loopSteps = ops.loopSteps;
ivOut.append(iv);
return true;
}
/// Update \p clauseOps with the corresponding host-evaluated values if they
/// have already been initialized but not yet applied.
///
/// \returns whether an update was performed. If not, these clauses were not
/// evaluated in the host device.
bool apply(mlir::omp::ParallelOperands &clauseOps) {
if (!ops.numThreads || parallelApplied) {
parallelApplied = true;
return false;
}
parallelApplied = true;
clauseOps.numThreads = ops.numThreads;
return true;
}
/// Update \p clauseOps with the corresponding host-evaluated values if they
/// have already been initialized.
///
/// \returns whether an update was performed. If not, these clauses were not
/// evaluated in the host device.
bool apply(mlir::omp::TeamsOperands &clauseOps) {
if (!ops.numTeamsLower && !ops.numTeamsUpper && !ops.threadLimit)
return false;
clauseOps.numTeamsLower = ops.numTeamsLower;
clauseOps.numTeamsUpper = ops.numTeamsUpper;
clauseOps.threadLimit = ops.threadLimit;
return true;
}
private:
mlir::omp::HostEvaluatedOperands ops;
llvm::SmallVector<const semantics::Symbol *> iv;
bool loopNestApplied = false, parallelApplied = false;
};
} // namespace
/// Stack of \see HostEvalInfo to represent the current nest of \c omp.target
/// operations being created.
///
/// The current implementation prevents nested 'target' regions from breaking
/// the handling of the outer region by keeping a stack of information
/// structures, but it will probably still require some further work to support
/// reverse offloading.
static llvm::SmallVector<HostEvalInfo, 0> hostEvalInfo;
/// Bind symbols to their corresponding entry block arguments.
///
/// The binding will be performed inside of the current block, which does not
/// necessarily have to be part of the operation for which the binding is done.
/// However, block arguments must be accessible. This enables controlling the
/// insertion point of any new MLIR operations related to the binding of
/// arguments of a loop wrapper operation.
///
/// \param [in] converter - PFT to MLIR conversion interface.
/// \param [in] op - owner operation of the block arguments to bind.
/// \param [in] args - entry block arguments information for the given
/// operation.
static void bindEntryBlockArgs(lower::AbstractConverter &converter,
mlir::omp::BlockArgOpenMPOpInterface op,
const EntryBlockArgs &args) {
assert(op != nullptr && "invalid block argument-defining operation");
assert(args.isValid() && "invalid args");
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
auto bindSingleMapLike = [&converter,
&firOpBuilder](const semantics::Symbol &sym,
const mlir::BlockArgument &arg) {
// Clones the `bounds` placing them inside the entry block and returns
// them.
auto cloneBound = [&](mlir::Value bound) {
if (mlir::isMemoryEffectFree(bound.getDefiningOp())) {
mlir::Operation *clonedOp = firOpBuilder.clone(*bound.getDefiningOp());
return clonedOp->getResult(0);
}
TODO(converter.getCurrentLocation(),
"target map-like clause operand unsupported bound type");
};
auto cloneBounds = [cloneBound](llvm::ArrayRef<mlir::Value> bounds) {
llvm::SmallVector<mlir::Value> clonedBounds;
llvm::transform(bounds, std::back_inserter(clonedBounds),
[&](mlir::Value bound) { return cloneBound(bound); });
return clonedBounds;
};
fir::ExtendedValue extVal = converter.getSymbolExtendedValue(sym);
auto refType = mlir::dyn_cast<fir::ReferenceType>(arg.getType());
if (refType && fir::isa_builtin_cptr_type(refType.getElementType())) {
converter.bindSymbol(sym, arg);
} else {
extVal.match(
[&](const fir::BoxValue &v) {
converter.bindSymbol(sym,
fir::BoxValue(arg, cloneBounds(v.getLBounds()),
v.getExplicitParameters(),
v.getExplicitExtents()));
},
[&](const fir::MutableBoxValue &v) {
converter.bindSymbol(
sym, fir::MutableBoxValue(arg, cloneBounds(v.getLBounds()),
v.getMutableProperties()));
},
[&](const fir::ArrayBoxValue &v) {
converter.bindSymbol(
sym, fir::ArrayBoxValue(arg, cloneBounds(v.getExtents()),
cloneBounds(v.getLBounds()),
v.getSourceBox()));
},
[&](const fir::CharArrayBoxValue &v) {
converter.bindSymbol(
sym, fir::CharArrayBoxValue(arg, cloneBound(v.getLen()),
cloneBounds(v.getExtents()),
cloneBounds(v.getLBounds())));
},
[&](const fir::CharBoxValue &v) {
converter.bindSymbol(
sym, fir::CharBoxValue(arg, cloneBound(v.getLen())));
},
[&](const fir::UnboxedValue &v) { converter.bindSymbol(sym, arg); },
[&](const auto &) {
TODO(converter.getCurrentLocation(),
"target map clause operand unsupported type");
});
}
};
auto bindMapLike =
[&bindSingleMapLike](llvm::ArrayRef<const semantics::Symbol *> syms,
llvm::ArrayRef<mlir::BlockArgument> args) {
// Structure component symbols don't have bindings, and can only be
// explicitly mapped individually. If a member is captured implicitly
// we map the entirety of the derived type when we find its symbol.
llvm::SmallVector<const semantics::Symbol *> processedSyms;
llvm::copy_if(syms, std::back_inserter(processedSyms),
[](auto *sym) { return !sym->owner().IsDerivedType(); });
for (auto [sym, arg] : llvm::zip_equal(processedSyms, args))
bindSingleMapLike(*sym, arg);
};
auto bindPrivateLike = [&converter, &firOpBuilder](
llvm::ArrayRef<const semantics::Symbol *> syms,
llvm::ArrayRef<mlir::Value> vars,
llvm::ArrayRef<mlir::BlockArgument> args) {
llvm::SmallVector<const semantics::Symbol *> processedSyms;
for (auto *sym : syms) {
if (const auto *commonDet =
sym->detailsIf<semantics::CommonBlockDetails>()) {
llvm::transform(commonDet->objects(), std::back_inserter(processedSyms),
[&](const auto &mem) { return &*mem; });
} else {
processedSyms.push_back(sym);
}
}
for (auto [sym, var, arg] : llvm::zip_equal(processedSyms, vars, args))
converter.bindSymbol(
*sym,
hlfir::translateToExtendedValue(
var.getLoc(), firOpBuilder, hlfir::Entity{arg},
/*contiguousHint=*/
evaluate::IsSimplyContiguous(*sym, converter.getFoldingContext()))
.first);
};
// Process in clause name alphabetical order to match block arguments order.
// Do not bind host_eval variables because they cannot be used inside of the
// corresponding region, except for very specific cases handled separately.
bindMapLike(args.hasDeviceAddr.syms, op.getHasDeviceAddrBlockArgs());
bindPrivateLike(args.inReduction.syms, args.inReduction.vars,
op.getInReductionBlockArgs());
bindMapLike(args.map.syms, op.getMapBlockArgs());
bindPrivateLike(args.priv.syms, args.priv.vars, op.getPrivateBlockArgs());
bindPrivateLike(args.reduction.syms, args.reduction.vars,
op.getReductionBlockArgs());
bindPrivateLike(args.taskReduction.syms, args.taskReduction.vars,
op.getTaskReductionBlockArgs());
bindMapLike(args.useDeviceAddr.syms, op.getUseDeviceAddrBlockArgs());
bindMapLike(args.useDevicePtr.syms, op.getUseDevicePtrBlockArgs());
}
/// Get the list of base values that the specified map-like variables point to.
///
/// This function must be kept in sync with changes to the `createMapInfoOp`
/// utility function, since it must take into account the potential introduction
/// of levels of indirection (i.e. intermediate ops).
///
/// \param [in] vars - list of values passed to map-like clauses, returned
/// by an `omp.map.info` operation.
/// \param [out] baseOps - populated with the `var_ptr` values of the
/// corresponding defining operations.
static void
extractMappedBaseValues(llvm::ArrayRef<mlir::Value> vars,
llvm::SmallVectorImpl<mlir::Value> &baseOps) {
llvm::transform(vars, std::back_inserter(baseOps), [](mlir::Value map) {
auto mapInfo = map.getDefiningOp<mlir::omp::MapInfoOp>();
assert(mapInfo && "expected all map vars to be defined by omp.map.info");
mlir::Value varPtr = mapInfo.getVarPtr();
if (auto boxAddr = varPtr.getDefiningOp<fir::BoxAddrOp>())
return boxAddr.getVal();
return varPtr;
});
}
/// Get the directive enumeration value corresponding to the given OpenMP
/// construct PFT node.
llvm::omp::Directive
extractOmpDirective(const parser::OpenMPConstruct &ompConstruct) {
return common::visit(
common::visitors{
[](const parser::OpenMPAllocatorsConstruct &c) {
return llvm::omp::OMPD_allocators;
},
[](const parser::OpenMPAssumeConstruct &c) {
return llvm::omp::OMPD_assume;
},
[](const parser::OpenMPAtomicConstruct &c) {
return llvm::omp::OMPD_atomic;
},
[](const parser::OpenMPBlockConstruct &c) {
return std::get<parser::OmpBlockDirective>(
std::get<parser::OmpBeginBlockDirective>(c.t).t)
.v;
},
[](const parser::OpenMPCriticalConstruct &c) {
return llvm::omp::OMPD_critical;
},
[](const parser::OpenMPDeclarativeAllocate &c) {
return llvm::omp::OMPD_allocate;
},
[](const parser::OpenMPDispatchConstruct &c) {
return llvm::omp::OMPD_dispatch;
},
[](const parser::OpenMPExecutableAllocate &c) {
return llvm::omp::OMPD_allocate;
},
[](const parser::OpenMPLoopConstruct &c) {
return std::get<parser::OmpLoopDirective>(
std::get<parser::OmpBeginLoopDirective>(c.t).t)
.v;
},
[](const parser::OpenMPSectionConstruct &c) {
return llvm::omp::OMPD_section;
},
[](const parser::OpenMPSectionsConstruct &c) {
return std::get<parser::OmpSectionsDirective>(
std::get<parser::OmpBeginSectionsDirective>(c.t).t)
.v;
},
[](const parser::OpenMPStandaloneConstruct &c) {
return common::visit(
common::visitors{
[](const parser::OpenMPSimpleStandaloneConstruct &c) {
return c.v.DirId();
},
[](const parser::OpenMPFlushConstruct &c) {
return llvm::omp::OMPD_flush;
},
[](const parser::OpenMPCancelConstruct &c) {
return llvm::omp::OMPD_cancel;
},
[](const parser::OpenMPCancellationPointConstruct &c) {
return llvm::omp::OMPD_cancellation_point;
},
[](const parser::OmpMetadirectiveDirective &c) {
return llvm::omp::OMPD_metadirective;
},
[](const parser::OpenMPDepobjConstruct &c) {
return llvm::omp::OMPD_depobj;
},
[](const parser::OpenMPInteropConstruct &c) {
return llvm::omp::OMPD_interop;
}},
c.u);
},
[](const parser::OpenMPUtilityConstruct &c) {
return common::visit(
common::visitors{[](const parser::OmpErrorDirective &c) {
return llvm::omp::OMPD_error;
},
[](const parser::OmpNothingDirective &c) {
return llvm::omp::OMPD_nothing;
}},
c.u);
}},
ompConstruct.u);
}
/// Populate the global \see hostEvalInfo after processing clauses for the given
/// \p eval OpenMP target construct, or nested constructs, if these must be
/// evaluated outside of the target region per the spec.
///
/// In particular, this will ensure that in 'target teams' and equivalent nested
/// constructs, the \c thread_limit and \c num_teams clauses will be evaluated
/// in the host. Additionally, loop bounds, steps and the \c num_threads clause
/// will also be evaluated in the host if a target SPMD construct is detected
/// (i.e. 'target teams distribute parallel do [simd]' or equivalent nesting).
///
/// The result, stored as a global, is intended to be used to populate the \c
/// host_eval operands of the associated \c omp.target operation, and also to be
/// checked and used by later lowering steps to populate the corresponding
/// operands of the \c omp.teams, \c omp.parallel or \c omp.loop_nest
/// operations.
static void processHostEvalClauses(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
lower::StatementContext &stmtCtx,
lower::pft::Evaluation &eval,
mlir::Location loc) {
// Obtain the list of clauses of the given OpenMP block or loop construct
// evaluation. Other evaluations passed to this lambda keep `clauses`
// unchanged.
auto extractClauses = [&semaCtx](lower::pft::Evaluation &eval,
List<Clause> &clauses) {
const auto *ompEval = eval.getIf<parser::OpenMPConstruct>();
if (!ompEval)
return;
const parser::OmpClauseList *beginClauseList = nullptr;
const parser::OmpClauseList *endClauseList = nullptr;
common::visit(
common::visitors{
[&](const parser::OpenMPBlockConstruct &ompConstruct) {
const auto &beginDirective =
std::get<parser::OmpBeginBlockDirective>(ompConstruct.t);
beginClauseList =
&std::get<parser::OmpClauseList>(beginDirective.t);
endClauseList = &std::get<parser::OmpClauseList>(
std::get<parser::OmpEndBlockDirective>(ompConstruct.t).t);
},
[&](const parser::OpenMPLoopConstruct &ompConstruct) {
const auto &beginDirective =
std::get<parser::OmpBeginLoopDirective>(ompConstruct.t);
beginClauseList =
&std::get<parser::OmpClauseList>(beginDirective.t);
if (auto &endDirective =
std::get<std::optional<parser::OmpEndLoopDirective>>(
ompConstruct.t))
endClauseList =
&std::get<parser::OmpClauseList>(endDirective->t);
},
[&](const auto &) {}},
ompEval->u);
assert(beginClauseList && "expected begin directive");
clauses.append(makeClauses(*beginClauseList, semaCtx));
if (endClauseList)
clauses.append(makeClauses(*endClauseList, semaCtx));
};
// Return the directive that is immediately nested inside of the given
// `parent` evaluation, if it is its only non-end-statement nested evaluation
// and it represents an OpenMP construct.
auto extractOnlyOmpNestedDir = [](lower::pft::Evaluation &parent)
-> std::optional<llvm::omp::Directive> {
if (!parent.hasNestedEvaluations())
return std::nullopt;
llvm::omp::Directive dir;
auto &nested = parent.getFirstNestedEvaluation();
if (const auto *ompEval = nested.getIf<parser::OpenMPConstruct>())
dir = extractOmpDirective(*ompEval);
else
return std::nullopt;
for (auto &sibling : parent.getNestedEvaluations())
if (&sibling != &nested && !sibling.isEndStmt())
return std::nullopt;
return dir;
};
// Process the given evaluation assuming it's part of a 'target' construct or
// captured by one, and store results in the global `hostEvalInfo`.
std::function<void(lower::pft::Evaluation &, const List<Clause> &)>
processEval;
processEval = [&](lower::pft::Evaluation &eval, const List<Clause> &clauses) {
using namespace llvm::omp;
ClauseProcessor cp(converter, semaCtx, clauses);
// Call `processEval` recursively with the immediately nested evaluation and
// its corresponding clauses if there is a single nested evaluation
// representing an OpenMP directive that passes the given test.
auto processSingleNestedIf = [&](llvm::function_ref<bool(Directive)> test) {
std::optional<Directive> nestedDir = extractOnlyOmpNestedDir(eval);
if (!nestedDir || !test(*nestedDir))
return;
lower::pft::Evaluation &nestedEval = eval.getFirstNestedEvaluation();
List<lower::omp::Clause> nestedClauses;
extractClauses(nestedEval, nestedClauses);
processEval(nestedEval, nestedClauses);
};
const auto *ompEval = eval.getIf<parser::OpenMPConstruct>();
if (!ompEval)
return;
HostEvalInfo &hostInfo = hostEvalInfo.back();
switch (extractOmpDirective(*ompEval)) {
case OMPD_teams_distribute_parallel_do:
case OMPD_teams_distribute_parallel_do_simd:
cp.processThreadLimit(stmtCtx, hostInfo.ops);
[[fallthrough]];
case OMPD_target_teams_distribute_parallel_do:
case OMPD_target_teams_distribute_parallel_do_simd:
cp.processNumTeams(stmtCtx, hostInfo.ops);
[[fallthrough]];
case OMPD_distribute_parallel_do:
case OMPD_distribute_parallel_do_simd:
cp.processNumThreads(stmtCtx, hostInfo.ops);
[[fallthrough]];
case OMPD_distribute:
case OMPD_distribute_simd:
cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv);
break;
case OMPD_teams:
cp.processThreadLimit(stmtCtx, hostInfo.ops);
[[fallthrough]];
case OMPD_target_teams:
cp.processNumTeams(stmtCtx, hostInfo.ops);
processSingleNestedIf([](Directive nestedDir) {
return topDistributeSet.test(nestedDir) || topLoopSet.test(nestedDir);
});
break;
case OMPD_teams_distribute:
case OMPD_teams_distribute_simd:
cp.processThreadLimit(stmtCtx, hostInfo.ops);
[[fallthrough]];
case OMPD_target_teams_distribute:
case OMPD_target_teams_distribute_simd:
cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv);
cp.processNumTeams(stmtCtx, hostInfo.ops);
break;
case OMPD_teams_loop:
cp.processThreadLimit(stmtCtx, hostInfo.ops);
[[fallthrough]];
case OMPD_target_teams_loop:
cp.processNumTeams(stmtCtx, hostInfo.ops);
[[fallthrough]];
case OMPD_loop:
cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv);
break;
// Standalone 'target' case.
case OMPD_target: {
processSingleNestedIf(
[](Directive nestedDir) { return topTeamsSet.test(nestedDir); });
break;
}
default:
break;
}
};
assert(!hostEvalInfo.empty() && "expected HOST_EVAL info structure");
const auto *ompEval = eval.getIf<parser::OpenMPConstruct>();
assert(ompEval &&
llvm::omp::allTargetSet.test(extractOmpDirective(*ompEval)) &&
"expected TARGET construct evaluation");
(void)ompEval;
// Use the whole list of clauses passed to the construct here, rather than the
// ones only applied to omp.target.
List<lower::omp::Clause> clauses;
extractClauses(eval, clauses);
processEval(eval, clauses);
}
static lower::pft::Evaluation *
getCollapsedLoopEval(lower::pft::Evaluation &eval, int collapseValue) {
// Return the Evaluation of the innermost collapsed loop, or the current one
// if there was no COLLAPSE.
if (collapseValue == 0)
return &eval;
lower::pft::Evaluation *curEval = &eval.getFirstNestedEvaluation();
for (int i = 1; i < collapseValue; i++) {
// The nested evaluations should be DoConstructs (i.e. they should form
// a loop nest). Each DoConstruct is a tuple <NonLabelDoStmt, Block,
// EndDoStmt>.
assert(curEval->isA<parser::DoConstruct>());
curEval = &*std::next(curEval->getNestedEvaluations().begin());
}
return curEval;
}
static void genNestedEvaluations(lower::AbstractConverter &converter,
lower::pft::Evaluation &eval,
int collapseValue = 0) {
lower::pft::Evaluation *curEval = getCollapsedLoopEval(eval, collapseValue);
for (lower::pft::Evaluation &e : curEval->getNestedEvaluations())
converter.genEval(e);
}
static fir::GlobalOp globalInitialization(lower::AbstractConverter &converter,
fir::FirOpBuilder &firOpBuilder,
const semantics::Symbol &sym,
const lower::pft::Variable &var,
mlir::Location currentLocation) {
std::string globalName = converter.mangleName(sym);
mlir::StringAttr linkage = firOpBuilder.createInternalLinkage();
return Fortran::lower::defineGlobal(converter, var, globalName, linkage);
}
// Get the extended value for \p val by extracting additional variable
// information from \p base.
static fir::ExtendedValue getExtendedValue(fir::ExtendedValue base,
mlir::Value val) {
return base.match(
[&](const fir::MutableBoxValue &box) -> fir::ExtendedValue {
return fir::MutableBoxValue(val, box.nonDeferredLenParams(), {});
},
[&](const auto &) -> fir::ExtendedValue {
return fir::substBase(base, val);
});
}
#ifndef NDEBUG
static bool isThreadPrivate(lower::SymbolRef sym) {
if (const auto *details = sym->detailsIf<semantics::CommonBlockDetails>()) {
for (const auto &obj : details->objects())
if (!obj->test(semantics::Symbol::Flag::OmpThreadprivate))
return false;
return true;
}
return sym->test(semantics::Symbol::Flag::OmpThreadprivate);
}
#endif
static void threadPrivatizeVars(lower::AbstractConverter &converter,
lower::pft::Evaluation &eval) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
mlir::Location currentLocation = converter.getCurrentLocation();
mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
// If the symbol corresponds to the original ThreadprivateOp, use the symbol
// value from that operation to create one ThreadprivateOp copy operation
// inside the parallel region.
// In some cases, however, the symbol will correspond to the original,
// non-threadprivate variable. This can happen, for instance, with a common
// block, declared in a separate module, used by a parent procedure and
// privatized in its child procedure.
auto genThreadprivateOp = [&](lower::SymbolRef sym) -> mlir::Value {
assert(isThreadPrivate(sym));
mlir::Value symValue = converter.getSymbolAddress(sym);
mlir::Operation *op = symValue.getDefiningOp();
if (auto declOp = mlir::dyn_cast<hlfir::DeclareOp>(op))
op = declOp.getMemref().getDefiningOp();
if (mlir::isa<mlir::omp::ThreadprivateOp>(op))
symValue = mlir::dyn_cast<mlir::omp::ThreadprivateOp>(op).getSymAddr();
return firOpBuilder.create<mlir::omp::ThreadprivateOp>(
currentLocation, symValue.getType(), symValue);
};
llvm::SetVector<const semantics::Symbol *> threadprivateSyms;
converter.collectSymbolSet(eval, threadprivateSyms,
semantics::Symbol::Flag::OmpThreadprivate,
/*collectSymbols=*/true,
/*collectHostAssociatedSymbols=*/true);
std::set<semantics::SourceName> threadprivateSymNames;
// For a COMMON block, the ThreadprivateOp is generated for itself instead of
// its members, so only bind the value of the new copied ThreadprivateOp
// inside the parallel region to the common block symbol only once for
// multiple members in one COMMON block.
llvm::SetVector<const semantics::Symbol *> commonSyms;
for (std::size_t i = 0; i < threadprivateSyms.size(); i++) {
const semantics::Symbol *sym = threadprivateSyms[i];
mlir::Value symThreadprivateValue;
// The variable may be used more than once, and each reference has one
// symbol with the same name. Only do once for references of one variable.
if (threadprivateSymNames.find(sym->name()) != threadprivateSymNames.end())
continue;
threadprivateSymNames.insert(sym->name());
if (const semantics::Symbol *common =
semantics::FindCommonBlockContaining(sym->GetUltimate())) {
mlir::Value commonThreadprivateValue;
if (commonSyms.contains(common)) {
commonThreadprivateValue = converter.getSymbolAddress(*common);
} else {
commonThreadprivateValue = genThreadprivateOp(*common);
converter.bindSymbol(*common, commonThreadprivateValue);
commonSyms.insert(common);
}
symThreadprivateValue = lower::genCommonBlockMember(
converter, currentLocation, sym->GetUltimate(),
commonThreadprivateValue);
} else {
symThreadprivateValue = genThreadprivateOp(*sym);
}
fir::ExtendedValue sexv = converter.getSymbolExtendedValue(*sym);
fir::ExtendedValue symThreadprivateExv =
getExtendedValue(sexv, symThreadprivateValue);
converter.bindSymbol(*sym, symThreadprivateExv);
}
}
static mlir::Operation *
createAndSetPrivatizedLoopVar(lower::AbstractConverter &converter,
mlir::Location loc, mlir::Value indexVal,
const semantics::Symbol *sym) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint();
firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
mlir::Type tempTy = converter.genType(*sym);
assert(converter.isPresentShallowLookup(*sym) &&
"Expected symbol to be in symbol table.");
firOpBuilder.restoreInsertionPoint(insPt);
mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, indexVal);
hlfir::Entity lhs{converter.getSymbolAddress(*sym)};
lhs = hlfir::derefPointersAndAllocatables(loc, firOpBuilder, lhs);
mlir::Operation *storeOp =
firOpBuilder.create<hlfir::AssignOp>(loc, cvtVal, lhs);
return storeOp;
}
// This helper function implements the functionality of "promoting" non-CPTR
// arguments of use_device_ptr to use_device_addr arguments (automagic
// conversion of use_device_ptr -> use_device_addr in these cases). The way we
// do so currently is through the shuffling of operands from the
// devicePtrOperands to deviceAddrOperands, as well as the types, locations and
// symbols.
//
// This effectively implements some deprecated OpenMP functionality that some
// legacy applications unfortunately depend on (deprecated in specification
// version 5.2):
//
// "If a list item in a use_device_ptr clause is not of type C_PTR, the behavior
// is as if the list item appeared in a use_device_addr clause. Support for
// such list items in a use_device_ptr clause is deprecated."
static void promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr(
llvm::SmallVectorImpl<mlir::Value> &useDeviceAddrVars,
llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceAddrSyms,
llvm::SmallVectorImpl<mlir::Value> &useDevicePtrVars,
llvm::SmallVectorImpl<const semantics::Symbol *> &useDevicePtrSyms) {
// Iterate over our use_device_ptr list and shift all non-cptr arguments into
// use_device_addr.
auto *varIt = useDevicePtrVars.begin();
auto *symIt = useDevicePtrSyms.begin();
while (varIt != useDevicePtrVars.end()) {
if (fir::isa_builtin_cptr_type(fir::unwrapRefType(varIt->getType()))) {
++varIt;
++symIt;
continue;
}
useDeviceAddrVars.push_back(*varIt);
useDeviceAddrSyms.push_back(*symIt);
varIt = useDevicePtrVars.erase(varIt);
symIt = useDevicePtrSyms.erase(symIt);
}
}
/// Extract the list of function and variable symbols affected by the given
/// 'declare target' directive and return the intended device type for them.
static void getDeclareTargetInfo(
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct,
mlir::omp::DeclareTargetOperands &clauseOps,
llvm::SmallVectorImpl<DeclareTargetCapturePair> &symbolAndClause) {
const auto &spec =
std::get<parser::OmpDeclareTargetSpecifier>(declareTargetConstruct.t);
if (const auto *objectList{parser::Unwrap<parser::OmpObjectList>(spec.u)}) {
ObjectList objects{makeObjects(*objectList, semaCtx)};
// Case: declare target(func, var1, var2)
gatherFuncAndVarSyms(objects, mlir::omp::DeclareTargetCaptureClause::to,
symbolAndClause);
} else if (const auto *clauseList{
parser::Unwrap<parser::OmpClauseList>(spec.u)}) {
List<Clause> clauses = makeClauses(*clauseList, semaCtx);
if (clauses.empty()) {
Fortran::lower::pft::FunctionLikeUnit *owningProc =
eval.getOwningProcedure();
if (owningProc && (!owningProc->isMainProgram() ||
owningProc->getMainProgramSymbol())) {
// Case: declare target, implicit capture of function
symbolAndClause.emplace_back(mlir::omp::DeclareTargetCaptureClause::to,
owningProc->getSubprogramSymbol());
}
}
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processDeviceType(clauseOps);
cp.processEnter(symbolAndClause);
cp.processLink(symbolAndClause);
cp.processTo(symbolAndClause);
cp.processTODO<clause::Indirect>(converter.getCurrentLocation(),
llvm::omp::Directive::OMPD_declare_target);
}
}
static void collectDeferredDeclareTargets(
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct,
llvm::SmallVectorImpl<lower::OMPDeferredDeclareTargetInfo>
&deferredDeclareTarget) {
mlir::omp::DeclareTargetOperands clauseOps;
llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct,
clauseOps, symbolAndClause);
// Return the device type only if at least one of the targets for the
// directive is a function or subroutine
mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
mlir::Operation *op = mod.lookupSymbol(
converter.mangleName(std::get<const semantics::Symbol &>(symClause)));
if (!op) {
deferredDeclareTarget.push_back({std::get<0>(symClause),
clauseOps.deviceType,
std::get<1>(symClause)});
}
}
}
static std::optional<mlir::omp::DeclareTargetDeviceType>
getDeclareTargetFunctionDevice(
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct) {
mlir::omp::DeclareTargetOperands clauseOps;
llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct,
clauseOps, symbolAndClause);
// Return the device type only if at least one of the targets for the
// directive is a function or subroutine
mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
mlir::Operation *op = mod.lookupSymbol(
converter.mangleName(std::get<const semantics::Symbol &>(symClause)));
if (mlir::isa_and_nonnull<mlir::func::FuncOp>(op))
return clauseOps.deviceType;
}
return std::nullopt;
}
/// Set up the entry block of the given `omp.loop_nest` operation, adding a
/// block argument for each loop induction variable and allocating and
/// initializing a private value to hold each of them.
///
/// This function can also bind the symbols of any variables that should match
/// block arguments on parent loop wrapper operations attached to the same
/// loop. This allows the introduction of any necessary `hlfir.declare`
/// operations inside of the entry block of the `omp.loop_nest` operation and
/// not directly under any of the wrappers, which would invalidate them.
///
/// \param [in] op - the loop nest operation.
/// \param [in] converter - PFT to MLIR conversion interface.
/// \param [in] loc - location.
/// \param [in] args - symbols of induction variables.
/// \param [in] wrapperArgs - list of parent loop wrappers and their associated
/// entry block arguments.
static void genLoopVars(
mlir::Operation *op, lower::AbstractConverter &converter,
mlir::Location &loc, llvm::ArrayRef<const semantics::Symbol *> args,
llvm::ArrayRef<
std::pair<mlir::omp::BlockArgOpenMPOpInterface, const EntryBlockArgs &>>
wrapperArgs = {}) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
auto &region = op->getRegion(0);
std::size_t loopVarTypeSize = 0;
for (const semantics::Symbol *arg : args)
loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size());
mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
llvm::SmallVector<mlir::Type> tiv(args.size(), loopVarType);
llvm::SmallVector<mlir::Location> locs(args.size(), loc);
firOpBuilder.createBlock(&region, {}, tiv, locs);
// Update nested wrapper operands if parent wrappers have mapped these values
// to block arguments.
//
// Binding these values earlier would take care of this, but we cannot rely on
// that approach because binding in between the creation of a wrapper and the
// next one would result in 'hlfir.declare' operations being introduced inside
// of a wrapper, which is illegal.
mlir::IRMapping mapper;
for (auto [argGeneratingOp, blockArgs] : wrapperArgs) {
for (mlir::OpOperand &operand : argGeneratingOp->getOpOperands())
operand.set(mapper.lookupOrDefault(operand.get()));
for (const auto [arg, var] : llvm::zip_equal(
argGeneratingOp->getRegion(0).getArguments(), blockArgs.getVars()))
mapper.map(var, arg);
}
// Bind the entry block arguments of parent wrappers to the corresponding
// symbols.
for (auto [argGeneratingOp, blockArgs] : wrapperArgs)
bindEntryBlockArgs(converter, argGeneratingOp, blockArgs);
// The argument is not currently in memory, so make a temporary for the
// argument, and store it there, then bind that location to the argument.
mlir::Operation *storeOp = nullptr;
for (auto [argIndex, argSymbol] : llvm::enumerate(args)) {
mlir::Value indexVal = fir::getBase(region.front().getArgument(argIndex));
storeOp =
createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol);
}
firOpBuilder.setInsertionPointAfter(storeOp);
}
static void
markDeclareTarget(mlir::Operation *op, lower::AbstractConverter &converter,
mlir::omp::DeclareTargetCaptureClause captureClause,
mlir::omp::DeclareTargetDeviceType deviceType) {
// TODO: Add support for program local variables with declare target applied
auto declareTargetOp = llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(op);
if (!declareTargetOp)
fir::emitFatalError(
converter.getCurrentLocation(),
"Attempt to apply declare target on unsupported operation");
// The function or global already has a declare target applied to it, very
// likely through implicit capture (usage in another declare target
// function/subroutine). It should be marked as any if it has been assigned
// both host and nohost, else we skip, as there is no change
if (declareTargetOp.isDeclareTarget()) {
if (declareTargetOp.getDeclareTargetDeviceType() != deviceType)
declareTargetOp.setDeclareTarget(mlir::omp::DeclareTargetDeviceType::any,
captureClause);
return;
}
declareTargetOp.setDeclareTarget(deviceType, captureClause);
}
//===----------------------------------------------------------------------===//
// Op body generation helper structures and functions
//===----------------------------------------------------------------------===//
struct OpWithBodyGenInfo {
/// A type for a code-gen callback function. This takes as argument the op for
/// which the code is being generated and returns the arguments of the op's
/// region.
using GenOMPRegionEntryCBFn =
std::function<llvm::SmallVector<const semantics::Symbol *>(
mlir::Operation *)>;
OpWithBodyGenInfo(lower::AbstractConverter &converter,
lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, mlir::Location loc,
lower::pft::Evaluation &eval, llvm::omp::Directive dir)
: converter(converter), symTable(symTable), semaCtx(semaCtx), loc(loc),
eval(eval), dir(dir) {}
OpWithBodyGenInfo &setClauses(const List<Clause> *value) {
clauses = value;
return *this;
}
OpWithBodyGenInfo &setDataSharingProcessor(DataSharingProcessor *value) {
dsp = value;
return *this;
}
OpWithBodyGenInfo &setEntryBlockArgs(const EntryBlockArgs *value) {
blockArgs = value;
return *this;
}
OpWithBodyGenInfo &setGenRegionEntryCb(GenOMPRegionEntryCBFn value) {
genRegionEntryCB = value;
return *this;
}
OpWithBodyGenInfo &setGenSkeletonOnly(bool value) {
genSkeletonOnly = value;
return *this;
}
/// [inout] converter to use for the clauses.
lower::AbstractConverter &converter;
/// [in] Symbol table
lower::SymMap &symTable;
/// [in] Semantics context
semantics::SemanticsContext &semaCtx;
/// [in] location in source code.
mlir::Location loc;
/// [in] current PFT node/evaluation.
lower::pft::Evaluation &eval;
/// [in] leaf directive for which to generate the op body.
llvm::omp::Directive dir;
/// [in] list of clauses to process.
const List<Clause> *clauses = nullptr;
/// [in] if provided, processes the construct's data-sharing attributes.
DataSharingProcessor *dsp = nullptr;
/// [in] if provided, it is used to create the op's region entry block. It is
/// overriden when a \see genRegionEntryCB is provided. This is only valid for
/// operations implementing the \see mlir::omp::BlockArgOpenMPOpInterface.
const EntryBlockArgs *blockArgs = nullptr;
/// [in] if provided, it overrides the default op's region entry block
/// creation.
GenOMPRegionEntryCBFn genRegionEntryCB = nullptr;
/// [in] if set to `true`, skip generating nested evaluations and dispatching
/// any further leaf constructs.
bool genSkeletonOnly = false;
};
/// Create the body (block) for an OpenMP Operation.
///
/// \param [in] op - the operation the body belongs to.
/// \param [in] info - options controlling code-gen for the construction.
/// \param [in] queue - work queue with nested constructs.
/// \param [in] item - item in the queue to generate body for.
static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info,
const ConstructQueue &queue,
ConstructQueue::const_iterator item) {
fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder();
auto insertMarker = [](fir::FirOpBuilder &builder) {
mlir::Value undef = builder.create<fir::UndefOp>(builder.getUnknownLoc(),
builder.getIndexType());
return undef.getDefiningOp();
};
// Create the entry block for the region and collect its arguments for use
// within the region. The entry block will be created as follows:
// - By default, it will be empty and have no arguments.
// - Operations implementing the omp::BlockArgOpenMPOpInterface can set the
// `info.blockArgs` pointer so that block arguments will be those
// corresponding to entry block argument-generating clauses. Binding of
// Fortran symbols to the new MLIR values is done automatically.
// - If the `info.genRegionEntryCB` callback is set, it takes precedence and
// allows callers to manually create the entry block with its intended
// list of arguments and to bind these arguments to their corresponding
// Fortran symbols. This is used for e.g. loop induction variables.
auto regionArgs = [&]() -> llvm::SmallVector<const semantics::Symbol *> {
if (info.genRegionEntryCB)
return info.genRegionEntryCB(&op);
if (info.blockArgs) {
genEntryBlock(firOpBuilder, *info.blockArgs, op.getRegion(0));
bindEntryBlockArgs(info.converter,
llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(op),
*info.blockArgs);
return llvm::to_vector(info.blockArgs->getSyms());
}
firOpBuilder.createBlock(&op.getRegion(0));
return {};
}();
// Mark the earliest insertion point.
mlir::Operation *marker = insertMarker(firOpBuilder);
// If it is an unstructured region, create empty blocks for all evaluations.
if (lower::omp::isLastItemInQueue(item, queue) &&
info.eval.lowerAsUnstructured()) {
lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp, mlir::omp::YieldOp>(
firOpBuilder, info.eval.getNestedEvaluations());
}
// Start with privatization, so that the lowering of the nested
// code will use the right symbols.
bool isLoop = llvm::omp::getDirectiveAssociation(info.dir) ==
llvm::omp::Association::Loop;
bool privatize = info.clauses;
firOpBuilder.setInsertionPoint(marker);
std::optional<DataSharingProcessor> tempDsp;
if (privatize && !info.dsp) {
tempDsp.emplace(info.converter, info.semaCtx, *info.clauses, info.eval,
Fortran::lower::omp::isLastItemInQueue(item, queue),
/*useDelayedPrivatization=*/false, info.symTable);
tempDsp->processStep1();
}
if (info.dir == llvm::omp::Directive::OMPD_parallel) {
threadPrivatizeVars(info.converter, info.eval);
if (info.clauses) {
firOpBuilder.setInsertionPoint(marker);
ClauseProcessor(info.converter, info.semaCtx, *info.clauses)
.processCopyin();
}
}
if (!info.genSkeletonOnly) {
if (ConstructQueue::const_iterator next = std::next(item);
next != queue.end()) {
genOMPDispatch(info.converter, info.symTable, info.semaCtx, info.eval,
info.loc, queue, next);
} else {
// genFIR(Evaluation&) tries to patch up unterminated blocks, causing
// a lot of complications for our approach if the terminator generation
// is delayed past this point. Insert a temporary terminator here, then
// delete it.
firOpBuilder.setInsertionPointToEnd(&op.getRegion(0).back());
auto *temp = lower::genOpenMPTerminator(firOpBuilder, &op, info.loc);
firOpBuilder.setInsertionPointAfter(marker);
genNestedEvaluations(info.converter, info.eval);
temp->erase();
}
}
// Get or create a unique exiting block from the given region, or
// return nullptr if there is no exiting block.
auto getUniqueExit = [&](mlir::Region &region) -> mlir::Block * {
// Find the blocks where the OMP terminator should go. In simple cases
// it is the single block in the operation's region. When the region
// is more complicated, especially with unstructured control flow, there
// may be multiple blocks, and some of them may have non-OMP terminators
// resulting from lowering of the code contained within the operation.
// All the remaining blocks are potential exit points from the op's region.
//
// Explicit control flow cannot exit any OpenMP region (other than via
// STOP), and that is enforced by semantic checks prior to lowering. STOP
// statements are lowered to a function call.
// Collect unterminated blocks.
llvm::SmallVector<mlir::Block *> exits;
for (mlir::Block &b : region) {
if (b.empty() || !b.back().hasTrait<mlir::OpTrait::IsTerminator>())
exits.push_back(&b);
}
if (exits.empty())
return nullptr;
// If there already is a unique exiting block, do not create another one.
// Additionally, some ops (e.g. omp.sections) require only 1 block in
// its region.
if (exits.size() == 1)
return exits[0];
mlir::Block *exit = firOpBuilder.createBlock(&region);
for (mlir::Block *b : exits) {
firOpBuilder.setInsertionPointToEnd(b);
firOpBuilder.create<mlir::cf::BranchOp>(info.loc, exit);
}
return exit;
};
if (auto *exitBlock = getUniqueExit(op.getRegion(0))) {
firOpBuilder.setInsertionPointToEnd(exitBlock);
auto *term = lower::genOpenMPTerminator(firOpBuilder, &op, info.loc);
// Only insert lastprivate code when there actually is an exit block.
// Such a block may not exist if the nested code produced an infinite
// loop (this may not make sense in production code, but a user could
// write that and we should handle it).
firOpBuilder.setInsertionPoint(term);
if (privatize) {
// DataSharingProcessor::processStep2() may create operations before/after
// the one passed as argument. We need to treat loop wrappers and their
// nested loop as a unit, so we need to pass the bottom level wrapper (if
// present). Otherwise, these operations will be inserted within a
// wrapper region.
mlir::Operation *privatizationBottomLevelOp = &op;
if (auto loopNest = llvm::dyn_cast<mlir::omp::LoopNestOp>(op)) {
llvm::SmallVector<mlir::omp::LoopWrapperInterface> wrappers;
loopNest.gatherWrappers(wrappers);
if (!wrappers.empty())
privatizationBottomLevelOp = &*wrappers.front();
}
if (!info.dsp) {
assert(tempDsp.has_value());
tempDsp->processStep2(privatizationBottomLevelOp, isLoop);
} else {
if (isLoop && regionArgs.size() > 0) {
for (const auto &regionArg : regionArgs) {
info.dsp->pushLoopIV(info.converter.getSymbolAddress(*regionArg));
}
}
info.dsp->processStep2(privatizationBottomLevelOp, isLoop);
}
}
}
firOpBuilder.setInsertionPointAfter(marker);
marker->erase();
}
static void genBodyOfTargetDataOp(
lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
mlir::omp::TargetDataOp &dataOp, const EntryBlockArgs &args,
const mlir::Location &currentLocation, const ConstructQueue &queue,
ConstructQueue::const_iterator item) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
genEntryBlock(firOpBuilder, args, dataOp.getRegion());
bindEntryBlockArgs(converter, dataOp, args);
// Insert dummy instruction to remember the insertion position. The
// marker will be deleted by clean up passes since there are no uses.
// Remembering the position for further insertion is important since
// there are hlfir.declares inserted above while setting block arguments
// and new code from the body should be inserted after that.
mlir::Value undefMarker = firOpBuilder.create<fir::UndefOp>(
dataOp.getLoc(), firOpBuilder.getIndexType());
// Create blocks for unstructured regions. This has to be done since
// blocks are initially allocated with the function as the parent region.
if (eval.lowerAsUnstructured()) {
lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp, mlir::omp::YieldOp>(
firOpBuilder, eval.getNestedEvaluations());
}
firOpBuilder.create<mlir::omp::TerminatorOp>(currentLocation);
// Set the insertion point after the marker.
firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp());
if (ConstructQueue::const_iterator next = std::next(item);
next != queue.end()) {
genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
next);
} else {
genNestedEvaluations(converter, eval);
}
}
// This generates intermediate common block member accesses within a region
// and then rebinds the members symbol to the intermediate accessors we have
// generated so that subsequent code generation will utilise these instead.
//
// When the scope changes, the bindings to the intermediate accessors should
// be dropped in place of the original symbol bindings.
//
// This is for utilisation with TargetOp.
static void genIntermediateCommonBlockAccessors(
Fortran::lower::AbstractConverter &converter,
const mlir::Location &currentLocation,
llvm::ArrayRef<mlir::BlockArgument> mapBlockArgs,
llvm::ArrayRef<const Fortran::semantics::Symbol *> mapSyms) {
// Iterate over the symbol list, which will be shorter than the list of
// arguments if new entry block arguments were introduced to implicitly map
// outside values used by the bounds cloned into the target region. In that
// case, the additional block arguments do not need processing here.
for (auto [mapSym, mapArg] : llvm::zip_first(mapSyms, mapBlockArgs)) {
auto *details = mapSym->detailsIf<Fortran::semantics::CommonBlockDetails>();
if (!details)
continue;
for (auto obj : details->objects()) {
auto targetCBMemberBind = Fortran::lower::genCommonBlockMember(
converter, currentLocation, *obj, mapArg);
fir::ExtendedValue sexv = converter.getSymbolExtendedValue(*obj);
fir::ExtendedValue targetCBExv =
getExtendedValue(sexv, targetCBMemberBind);
converter.bindSymbol(*obj, targetCBExv);
}
}
}
// This functions creates a block for the body of the targetOp's region. It adds
// all the symbols present in mapSymbols as block arguments to this block.
static void genBodyOfTargetOp(
lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
mlir::omp::TargetOp &targetOp, const EntryBlockArgs &args,
const mlir::Location &currentLocation, const ConstructQueue &queue,
ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
auto argIface = llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*targetOp);
mlir::Region &region = targetOp.getRegion();
mlir::Block *entryBlock = genEntryBlock(firOpBuilder, args, region);
bindEntryBlockArgs(converter, targetOp, args);
if (!hostEvalInfo.empty())
hostEvalInfo.back().bindOperands(argIface.getHostEvalBlockArgs());
// Check if cloning the bounds introduced any dependency on the outer region.
// If so, then either clone them as well if they are MemoryEffectFree, or else
// copy them to a new temporary and add them to the map and block_argument
// lists and replace their uses with the new temporary.
llvm::SetVector<mlir::Value> valuesDefinedAbove;
mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
while (!valuesDefinedAbove.empty()) {
for (mlir::Value val : valuesDefinedAbove) {
mlir::Operation *valOp = val.getDefiningOp();
assert(valOp != nullptr);
// NOTE: We skip BoxDimsOp's as the lesser of two evils is to map the
// indices separately, as the alternative is to eventually map the Box,
// which comes with a fairly large overhead comparatively. We could be
// more robust about this and check using a BackwardsSlice to see if we
// run the risk of mapping a box.
if (mlir::isMemoryEffectFree(valOp) &&
!mlir::isa<fir::BoxDimsOp>(valOp)) {
mlir::Operation *clonedOp = valOp->clone();
entryBlock->push_front(clonedOp);
auto replace = [entryBlock](mlir::OpOperand &use) {
return use.getOwner()->getBlock() == entryBlock;
};
valOp->getResults().replaceUsesWithIf(clonedOp->getResults(), replace);
valOp->replaceUsesWithIf(clonedOp, replace);
} else {
auto savedIP = firOpBuilder.getInsertionPoint();
firOpBuilder.setInsertionPointAfter(valOp);
auto copyVal =
firOpBuilder.createTemporary(val.getLoc(), val.getType());
firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal);
fir::factory::AddrAndBoundsInfo info =
fir::factory::getDataOperandBaseAddr(
firOpBuilder, val, /*isOptional=*/false, val.getLoc());
llvm::SmallVector<mlir::Value> bounds =
fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
mlir::omp::MapBoundsType>(
firOpBuilder, info,
hlfir::translateToExtendedValue(val.getLoc(), firOpBuilder,
hlfir::Entity{val})
.first,
/*dataExvIsAssumedSize=*/false, val.getLoc());
std::stringstream name;
firOpBuilder.setInsertionPoint(targetOp);
llvm::omp::OpenMPOffloadMappingFlags mapFlag =
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
mlir::omp::VariableCaptureKind captureKind =
mlir::omp::VariableCaptureKind::ByRef;
mlir::Type eleType = copyVal.getType();
if (auto refType =
mlir::dyn_cast<fir::ReferenceType>(copyVal.getType()))
eleType = refType.getElementType();
if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) {
captureKind = mlir::omp::VariableCaptureKind::ByCopy;
} else if (!fir::isa_builtin_cptr_type(eleType)) {
mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
}
mlir::Value mapOp = createMapInfoOp(
firOpBuilder, copyVal.getLoc(), copyVal,
/*varPtrPtr=*/mlir::Value{}, name.str(), bounds,
/*members=*/llvm::SmallVector<mlir::Value>{},
/*membersIndex=*/mlir::ArrayAttr{},
static_cast<
std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
mapFlag),
captureKind, copyVal.getType());
// Get the index of the first non-map argument before modifying mapVars,
// then append an element to mapVars and an associated entry block
// argument at that index.
unsigned insertIndex =
argIface.getMapBlockArgsStart() + argIface.numMapBlockArgs();
targetOp.getMapVarsMutable().append(mapOp);
mlir::Value clonedValArg = region.insertArgument(
insertIndex, copyVal.getType(), copyVal.getLoc());
firOpBuilder.setInsertionPointToStart(entryBlock);
auto loadOp = firOpBuilder.create<fir::LoadOp>(clonedValArg.getLoc(),
clonedValArg);
val.replaceUsesWithIf(loadOp->getResult(0),
[entryBlock](mlir::OpOperand &use) {
return use.getOwner()->getBlock() == entryBlock;
});
firOpBuilder.setInsertionPoint(entryBlock, savedIP);
}
}
valuesDefinedAbove.clear();
mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
}
// Insert dummy instruction to remember the insertion position. The
// marker will be deleted since there are not uses.
// In the HLFIR flow there are hlfir.declares inserted above while
// setting block arguments.
mlir::Value undefMarker = firOpBuilder.create<fir::UndefOp>(
targetOp.getLoc(), firOpBuilder.getIndexType());
// Create blocks for unstructured regions. This has to be done since
// blocks are initially allocated with the function as the parent region.
if (lower::omp::isLastItemInQueue(item, queue) &&
eval.lowerAsUnstructured()) {
lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp, mlir::omp::YieldOp>(
firOpBuilder, eval.getNestedEvaluations());
}
firOpBuilder.create<mlir::omp::TerminatorOp>(currentLocation);
// Create the insertion point after the marker.
firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp());
// If we map a common block using it's symbol e.g. map(tofrom: /common_block/)
// and accessing its members within the target region, there is a large
// chance we will end up with uses external to the region accessing the common
// resolve these, we do so by generating new common block member accesses
// within the region, binding them to the member symbol for the scope of the
// region so that subsequent code generation within the region will utilise
// our new member accesses we have created.
genIntermediateCommonBlockAccessors(
converter, currentLocation, argIface.getMapBlockArgs(), args.map.syms);
if (ConstructQueue::const_iterator next = std::next(item);
next != queue.end()) {
genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
next);
} else {
genNestedEvaluations(converter, eval);
}
dsp.processStep2(targetOp, /*isLoop=*/false);
}
template <typename OpTy, typename... Args>
static OpTy genOpWithBody(const OpWithBodyGenInfo &info,
const ConstructQueue &queue,
ConstructQueue::const_iterator item, Args &&...args) {
auto op = info.converter.getFirOpBuilder().create<OpTy>(
info.loc, std::forward<Args>(args)...);
createBodyOfOp(*op, info, queue, item);
return op;
}
template <typename OpTy, typename ClauseOpsTy>
static OpTy genWrapperOp(lower::AbstractConverter &converter,
mlir::Location loc, const ClauseOpsTy &clauseOps,
const EntryBlockArgs &args) {
static_assert(
OpTy::template hasTrait<mlir::omp::LoopWrapperInterface::Trait>(),
"expected a loop wrapper");
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
// Create wrapper.
auto op = firOpBuilder.create<OpTy>(loc, clauseOps);
// Create entry block with arguments.
genEntryBlock(firOpBuilder, args, op.getRegion());
return op;
}
//===----------------------------------------------------------------------===//
// Code generation functions for clauses
//===----------------------------------------------------------------------===//
static void genCancelClauses(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
const List<Clause> &clauses, mlir::Location loc,
mlir::omp::CancelOperands &clauseOps) {
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processCancelDirectiveName(clauseOps);
cp.processIf(llvm::omp::Directive::OMPD_cancel, clauseOps);
}
static void
genCancellationPointClauses(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
const List<Clause> &clauses, mlir::Location loc,
mlir::omp::CancellationPointOperands &clauseOps) {
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processCancelDirectiveName(clauseOps);
}
static void genCriticalDeclareClauses(
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
const List<Clause> &clauses, mlir::Location loc,
mlir::omp::CriticalDeclareOperands &clauseOps, llvm::StringRef name) {
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processHint(clauseOps);
clauseOps.symName =
mlir::StringAttr::get(converter.getFirOpBuilder().getContext(), name);
}
static void genDistributeClauses(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
lower::StatementContext &stmtCtx,
const List<Clause> &clauses,
mlir::Location loc,
mlir::omp::DistributeOperands &clauseOps) {
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processAllocate(clauseOps);
cp.processDistSchedule(stmtCtx, clauseOps);
cp.processOrder(clauseOps);
}
static void genFlushClauses(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
const ObjectList &objects,
const List<Clause> &clauses, mlir::Location loc,
llvm::SmallVectorImpl<mlir::Value> &operandRange) {
if (!objects.empty())
genObjectList(objects, converter, operandRange);
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processTODO<clause::AcqRel, clause::Acquire, clause::Release,
clause::SeqCst>(loc, llvm::omp::OMPD_flush);
}
static void
genLoopNestClauses(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, const List<Clause> &clauses,
mlir::Location loc, mlir::omp::LoopNestOperands &clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> &iv) {
ClauseProcessor cp(converter, semaCtx, clauses);
if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps, iv))
cp.processCollapse(loc, eval, clauseOps, iv);
clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr();
}
static void genLoopClauses(
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
const List<Clause> &clauses, mlir::Location loc,
mlir::omp::LoopOperands &clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processBind(clauseOps);
cp.processOrder(clauseOps);
cp.processReduction(loc, clauseOps, reductionSyms);
cp.processTODO<clause::Lastprivate>(loc, llvm::omp::Directive::OMPD_loop);
}
static void genMaskedClauses(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
lower::StatementContext &stmtCtx,
const List<Clause> &clauses, mlir::Location loc,
mlir::omp::MaskedOperands &clauseOps) {
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processFilter(stmtCtx, clauseOps);
}
static void
genOrderedRegionClauses(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
const List<Clause> &clauses, mlir::Location loc,
mlir::omp::OrderedRegionOperands &clauseOps) {
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processTODO<clause::Simd>(loc, llvm::omp::Directive::OMPD_ordered);
}
static void genParallelClauses(
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
lower::StatementContext &stmtCtx, const List<Clause> &clauses,
mlir::Location loc, mlir::omp::ParallelOperands &clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processAllocate(clauseOps);
cp.processIf(llvm::omp::Directive::OMPD_parallel, clauseOps);
if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps))
cp.processNumThreads(stmtCtx, clauseOps);
cp.processProcBind(clauseOps);
cp.processReduction(loc, clauseOps, reductionSyms);
}
static void genScanClauses(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
const List<Clause> &clauses, mlir::Location loc,
mlir::omp::ScanOperands &clauseOps) {
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processInclusive(loc, clauseOps);
cp.processExclusive(loc, clauseOps);
}
static void genSectionsClauses(
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
const List<Clause> &clauses, mlir::Location loc,
mlir::omp::SectionsOperands &clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processAllocate(clauseOps);
cp.processNowait(clauseOps);
cp.processReduction(loc, clauseOps, reductionSyms);
// TODO Support delayed privatization.
}
static void genSimdClauses(
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
const List<Clause> &clauses, mlir::Location loc,
mlir::omp::SimdOperands &clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processAligned(clauseOps);
cp.processIf(llvm::omp::Directive::OMPD_simd, clauseOps);
cp.processNontemporal(clauseOps);
cp.processOrder(clauseOps);
cp.processReduction(loc, clauseOps, reductionSyms);
cp.processSafelen(clauseOps);
cp.processSimdlen(clauseOps);
cp.processTODO<clause::Linear>(loc, llvm::omp::Directive::OMPD_simd);
}
static void genSingleClauses(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
const List<Clause> &clauses, mlir::Location loc,
mlir::omp::SingleOperands &clauseOps) {
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processAllocate(clauseOps);
cp.processCopyprivate(loc, clauseOps);
cp.processNowait(clauseOps);
// TODO Support delayed privatization.
}
static void genTargetClauses(
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
lower::SymMap &symTable, lower::StatementContext &stmtCtx,
lower::pft::Evaluation &eval, const List<Clause> &clauses,
mlir::Location loc, mlir::omp::TargetOperands &clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> &hasDeviceAddrSyms,
llvm::SmallVectorImpl<const semantics::Symbol *> &isDevicePtrSyms,
llvm::SmallVectorImpl<const semantics::Symbol *> &mapSyms) {
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processBare(clauseOps);
cp.processDepend(symTable, stmtCtx, clauseOps);
cp.processDevice(stmtCtx, clauseOps);
cp.processHasDeviceAddr(stmtCtx, clauseOps, hasDeviceAddrSyms);
if (!hostEvalInfo.empty()) {
// Only process host_eval if compiling for the host device.
processHostEvalClauses(converter, semaCtx, stmtCtx, eval, loc);
hostEvalInfo.back().collectValues(clauseOps.hostEvalVars);
}
cp.processIf(llvm::omp::Directive::OMPD_target, clauseOps);
cp.processIsDevicePtr(clauseOps, isDevicePtrSyms);
cp.processMap(loc, stmtCtx, clauseOps, &mapSyms);
cp.processNowait(clauseOps);
cp.processThreadLimit(stmtCtx, clauseOps);
cp.processTODO<clause::Allocate, clause::Defaultmap, clause::InReduction,
clause::UsesAllocators>(loc,
llvm::omp::Directive::OMPD_target);
// `target private(..)` is only supported in delayed privatization mode.
if (!enableDelayedPrivatizationStaging)
cp.processTODO<clause::Firstprivate, clause::Private>(
loc, llvm::omp::Directive::OMPD_target);
}
static void genTargetDataClauses(
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
lower::StatementContext &stmtCtx, const List<Clause> &clauses,
mlir::Location loc, mlir::omp::TargetDataOperands &clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceAddrSyms,
llvm::SmallVectorImpl<const semantics::Symbol *> &useDevicePtrSyms) {
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processDevice(stmtCtx, clauseOps);
cp.processIf(llvm::omp::Directive::OMPD_target_data, clauseOps);
cp.processMap(loc, stmtCtx, clauseOps);
cp.processUseDeviceAddr(stmtCtx, clauseOps, useDeviceAddrSyms);
cp.processUseDevicePtr(stmtCtx, clauseOps, useDevicePtrSyms);
// This function implements the deprecated functionality of use_device_ptr
// that allows users to provide non-CPTR arguments to it with the caveat
// that the compiler will treat them as use_device_addr. A lot of legacy
// code may still depend on this functionality, so we should support it
// in some manner. We do so currently by simply shifting non-cptr operands
// from the use_device_ptr lists into the use_device_addr lists.
// TODO: Perhaps create a user provideable compiler option that will
// re-introduce a hard-error rather than a warning in these cases.
promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr(
clauseOps.useDeviceAddrVars, useDeviceAddrSyms,
clauseOps.useDevicePtrVars, useDevicePtrSyms);
}
static void genTargetEnterExitUpdateDataClauses(
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
lower::SymMap &symTable, lower::StatementContext &stmtCtx,
const List<Clause> &clauses, mlir::Location loc,
llvm::omp::Directive directive,
mlir::omp::TargetEnterExitUpdateDataOperands &clauseOps) {
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processDepend(symTable, stmtCtx, clauseOps);
cp.processDevice(stmtCtx, clauseOps);
cp.processIf(directive, clauseOps);
if (directive == llvm::omp::Directive::OMPD_target_update)
cp.processMotionClauses(stmtCtx, clauseOps);
else
cp.processMap(loc, stmtCtx, clauseOps);
cp.processNowait(clauseOps);
}
static void genTaskClauses(
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
lower::SymMap &symTable, lower::StatementContext &stmtCtx,
const List<Clause> &clauses, mlir::Location loc,
mlir::omp::TaskOperands &clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> &inReductionSyms) {
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processAllocate(clauseOps);
cp.processDepend(symTable, stmtCtx, clauseOps);
cp.processFinal(stmtCtx, clauseOps);
cp.processIf(llvm::omp::Directive::OMPD_task, clauseOps);
cp.processInReduction(loc, clauseOps, inReductionSyms);
cp.processMergeable(clauseOps);
cp.processPriority(stmtCtx, clauseOps);
cp.processUntied(clauseOps);
cp.processDetach(clauseOps);
cp.processTODO<clause::Affinity>(loc, llvm::omp::Directive::OMPD_task);
}
static void genTaskgroupClauses(
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
const List<Clause> &clauses, mlir::Location loc,
mlir::omp::TaskgroupOperands &clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> &taskReductionSyms) {
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processAllocate(clauseOps);
cp.processTaskReduction(loc, clauseOps, taskReductionSyms);
}
static void genTaskloopClauses(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
const List<Clause> &clauses, mlir::Location loc,
mlir::omp::TaskloopOperands &clauseOps) {
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processTODO<clause::Allocate, clause::Collapse, clause::Default,
clause::Final, clause::Grainsize, clause::If,
clause::InReduction, clause::Lastprivate, clause::Mergeable,
clause::Nogroup, clause::NumTasks, clause::Priority,
clause::Reduction, clause::Shared, clause::Untied>(
loc, llvm::omp::Directive::OMPD_taskloop);
}
static void genTaskwaitClauses(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
const List<Clause> &clauses, mlir::Location loc,
mlir::omp::TaskwaitOperands &clauseOps) {
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processTODO<clause::Depend, clause::Nowait>(
loc, llvm::omp::Directive::OMPD_taskwait);
}
static void genWorkshareClauses(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
lower::StatementContext &stmtCtx,
const List<Clause> &clauses, mlir::Location loc,
mlir::omp::WorkshareOperands &clauseOps) {
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processNowait(clauseOps);
}
static void genTeamsClauses(
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
lower::StatementContext &stmtCtx, const List<Clause> &clauses,
mlir::Location loc, mlir::omp::TeamsOperands &clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processAllocate(clauseOps);
cp.processIf(llvm::omp::Directive::OMPD_teams, clauseOps);
if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps)) {
cp.processNumTeams(stmtCtx, clauseOps);
cp.processThreadLimit(stmtCtx, clauseOps);
}
cp.processReduction(loc, clauseOps, reductionSyms);
// TODO Support delayed privatization.
}
static void genWsloopClauses(
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
lower::StatementContext &stmtCtx, const List<Clause> &clauses,
mlir::Location loc, mlir::omp::WsloopOperands &clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processNowait(clauseOps);
cp.processOrder(clauseOps);
cp.processOrdered(clauseOps);
cp.processReduction(loc, clauseOps, reductionSyms);
cp.processSchedule(stmtCtx, clauseOps);
cp.processTODO<clause::Allocate, clause::Linear>(
loc, llvm::omp::Directive::OMPD_do);
}
//===----------------------------------------------------------------------===//
// Code generation functions for leaf constructs
//===----------------------------------------------------------------------===//
static mlir::omp::BarrierOp
genBarrierOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
mlir::Location loc, const ConstructQueue &queue,
ConstructQueue::const_iterator item) {
return converter.getFirOpBuilder().create<mlir::omp::BarrierOp>(loc);
}
static mlir::omp::CancelOp genCancelOp(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
mlir::Location loc,
const ConstructQueue &queue,
ConstructQueue::const_iterator item) {
mlir::omp::CancelOperands clauseOps;
genCancelClauses(converter, semaCtx, item->clauses, loc, clauseOps);
return converter.getFirOpBuilder().create<mlir::omp::CancelOp>(loc,
clauseOps);
}
static mlir::omp::CancellationPointOp genCancellationPointOp(
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
mlir::omp::CancellationPointOperands clauseOps;
genCancellationPointClauses(converter, semaCtx, item->clauses, loc,
clauseOps);
return converter.getFirOpBuilder().create<mlir::omp::CancellationPointOp>(
loc, clauseOps);
}
static mlir::omp::CriticalOp
genCriticalOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue, ConstructQueue::const_iterator item,
const std::optional<parser::Name> &name) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
mlir::FlatSymbolRefAttr nameAttr;
if (name) {
std::string nameStr = name->ToString();
mlir::ModuleOp mod = firOpBuilder.getModule();
auto global = mod.lookupSymbol<mlir::omp::CriticalDeclareOp>(nameStr);
if (!global) {
mlir::omp::CriticalDeclareOperands clauseOps;
genCriticalDeclareClauses(converter, semaCtx, item->clauses, loc,
clauseOps, nameStr);
mlir::OpBuilder modBuilder(mod.getBodyRegion());
global = modBuilder.create<mlir::omp::CriticalDeclareOp>(loc, clauseOps);
}
nameAttr = mlir::FlatSymbolRefAttr::get(firOpBuilder.getContext(),
global.getSymName());
}
return genOpWithBody<mlir::omp::CriticalOp>(
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
llvm::omp::Directive::OMPD_critical),
queue, item, nameAttr);
}
static mlir::omp::FlushOp
genFlushOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
mlir::Location loc, const ObjectList &objects,
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
llvm::SmallVector<mlir::Value> operandRange;
genFlushClauses(converter, semaCtx, objects, item->clauses, loc,
operandRange);
return converter.getFirOpBuilder().create<mlir::omp::FlushOp>(
converter.getCurrentLocation(), operandRange);
}
static mlir::omp::LoopNestOp genLoopNestOp(
lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
mlir::Location loc, const ConstructQueue &queue,
ConstructQueue::const_iterator item, mlir::omp::LoopNestOperands &clauseOps,
llvm::ArrayRef<const semantics::Symbol *> iv,
llvm::ArrayRef<
std::pair<mlir::omp::BlockArgOpenMPOpInterface, const EntryBlockArgs &>>
wrapperArgs,
llvm::omp::Directive directive, DataSharingProcessor &dsp) {
auto ivCallback = [&](mlir::Operation *op) {
genLoopVars(op, converter, loc, iv, wrapperArgs);
return llvm::SmallVector<const semantics::Symbol *>(iv);
};
auto *nestedEval =
getCollapsedLoopEval(eval, getCollapseValue(item->clauses));
return genOpWithBody<mlir::omp::LoopNestOp>(
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval,
directive)
.setClauses(&item->clauses)
.setDataSharingProcessor(&dsp)
.setGenRegionEntryCb(ivCallback),
queue, item, clauseOps);
}
static mlir::omp::LoopOp
genLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
mlir::Location loc, const ConstructQueue &queue,
ConstructQueue::const_iterator item) {
mlir::omp::LoopOperands loopClauseOps;
llvm::SmallVector<const semantics::Symbol *> loopReductionSyms;
genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps,
loopReductionSyms);
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
/*shouldCollectPreDeterminedSymbols=*/true,
/*useDelayedPrivatization=*/true, symTable);
dsp.processStep1(&loopClauseOps);
mlir::omp::LoopNestOperands loopNestClauseOps;
llvm::SmallVector<const semantics::Symbol *> iv;
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
loopNestClauseOps, iv);
EntryBlockArgs loopArgs;
loopArgs.priv.syms = dsp.getDelayedPrivSymbols();
loopArgs.priv.vars = loopClauseOps.privateVars;
loopArgs.reduction.syms = loopReductionSyms;
loopArgs.reduction.vars = loopClauseOps.reductionVars;
auto loopOp =
genWrapperOp<mlir::omp::LoopOp>(converter, loc, loopClauseOps, loopArgs);
genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
loopNestClauseOps, iv, {{loopOp, loopArgs}},
llvm::omp::Directive::OMPD_loop, dsp);
return loopOp;
}
static mlir::omp::MaskedOp
genMaskedOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
lower::StatementContext &stmtCtx,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
mlir::Location loc, const ConstructQueue &queue,
ConstructQueue::const_iterator item) {
mlir::omp::MaskedOperands clauseOps;
genMaskedClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps);
return genOpWithBody<mlir::omp::MaskedOp>(
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
llvm::omp::Directive::OMPD_masked),
queue, item, clauseOps);
}
static mlir::omp::MasterOp
genMasterOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
mlir::Location loc, const ConstructQueue &queue,
ConstructQueue::const_iterator item) {
return genOpWithBody<mlir::omp::MasterOp>(
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
llvm::omp::Directive::OMPD_master),
queue, item);
}
static mlir::omp::OrderedOp
genOrderedOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
mlir::Location loc, const ConstructQueue &queue,
ConstructQueue::const_iterator item) {
TODO(loc, "OMPD_ordered");
return nullptr;
}
static mlir::omp::OrderedRegionOp
genOrderedRegionOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue,
ConstructQueue::const_iterator item) {
mlir::omp::OrderedRegionOperands clauseOps;
genOrderedRegionClauses(converter, semaCtx, item->clauses, loc, clauseOps);
return genOpWithBody<mlir::omp::OrderedRegionOp>(
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
llvm::omp::Directive::OMPD_ordered),
queue, item, clauseOps);
}
static mlir::omp::ParallelOp
genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue, ConstructQueue::const_iterator item,
mlir::omp::ParallelOperands &clauseOps,
const EntryBlockArgs &args, DataSharingProcessor *dsp,
bool isComposite = false) {
assert((!enableDelayedPrivatization || dsp) &&
"expected valid DataSharingProcessor");
OpWithBodyGenInfo genInfo =
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
llvm::omp::Directive::OMPD_parallel)
.setClauses(&item->clauses)
.setEntryBlockArgs(&args)
.setGenSkeletonOnly(isComposite)
.setDataSharingProcessor(dsp);
auto parallelOp =
genOpWithBody<mlir::omp::ParallelOp>(genInfo, queue, item, clauseOps);
parallelOp.setComposite(isComposite);
return parallelOp;
}
static mlir::omp::ScanOp
genScanOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, mlir::Location loc,
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
mlir::omp::ScanOperands clauseOps;
genScanClauses(converter, semaCtx, item->clauses, loc, clauseOps);
return converter.getFirOpBuilder().create<mlir::omp::ScanOp>(
converter.getCurrentLocation(), clauseOps);
}
/// This breaks the normal prototype of the gen*Op functions: adding the
/// sectionBlocks argument so that the enclosed section constructs can be
/// lowered here with correct reduction symbol remapping.
static mlir::omp::SectionsOp
genSectionsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue, ConstructQueue::const_iterator item,
const parser::OmpSectionBlocks &sectionBlocks) {
mlir::omp::SectionsOperands clauseOps;
llvm::SmallVector<const semantics::Symbol *> reductionSyms;
genSectionsClauses(converter, semaCtx, item->clauses, loc, clauseOps,
reductionSyms);
auto &builder = converter.getFirOpBuilder();
// Insert privatizations before SECTIONS
lower::SymMapScope scope(symTable);
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
lower::omp::isLastItemInQueue(item, queue),
/*useDelayedPrivatization=*/false, symTable);
dsp.processStep1();
List<Clause> nonDsaClauses;
List<const clause::Lastprivate *> lastprivates;
for (const Clause &clause : item->clauses) {
if (clause.id == llvm::omp::Clause::OMPC_lastprivate) {
auto &lastp = std::get<clause::Lastprivate>(clause.u);
lastprivateModifierNotSupported(lastp, converter.getCurrentLocation());
lastprivates.push_back(&lastp);
} else {
switch (clause.id) {
case llvm::omp::Clause::OMPC_firstprivate:
case llvm::omp::Clause::OMPC_private:
case llvm::omp::Clause::OMPC_shared:
break;
default:
nonDsaClauses.push_back(clause);
}
}
}
// SECTIONS construct.
auto sectionsOp = builder.create<mlir::omp::SectionsOp>(loc, clauseOps);
// Create entry block with reduction variables as arguments.
EntryBlockArgs args;
// TODO: Add private syms and vars.
args.reduction.syms = reductionSyms;
args.reduction.vars = clauseOps.reductionVars;
genEntryBlock(builder, args, sectionsOp.getRegion());
mlir::Operation *terminator =
lower::genOpenMPTerminator(builder, sectionsOp, loc);
// Generate nested SECTION constructs.
// This is done here rather than in genOMP([...], OpenMPSectionConstruct )
// because we need to run genReductionVars on each omp.section so that the
// reduction variable gets mapped to the private version
for (auto [construct, nestedEval] :
llvm::zip(sectionBlocks.v, eval.getNestedEvaluations())) {
const auto *sectionConstruct =
std::get_if<parser::OpenMPSectionConstruct>(&construct.u);
if (!sectionConstruct) {
assert(false &&
"unexpected construct nested inside of SECTIONS construct");
continue;
}
ConstructQueue sectionQueue{buildConstructQueue(
converter.getFirOpBuilder().getModule(), semaCtx, nestedEval,
sectionConstruct->source, llvm::omp::Directive::OMPD_section, {})};
builder.setInsertionPoint(terminator);
genOpWithBody<mlir::omp::SectionOp>(
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, nestedEval,
llvm::omp::Directive::OMPD_section)
.setClauses(&sectionQueue.begin()->clauses)
.setDataSharingProcessor(&dsp)
.setEntryBlockArgs(&args),
sectionQueue, sectionQueue.begin());
}
if (!lastprivates.empty()) {
mlir::Region &sectionsBody = sectionsOp.getRegion();
assert(sectionsBody.hasOneBlock());
mlir::Block &body = sectionsBody.front();
auto lastSectionOp = llvm::find_if(
llvm::reverse(body.getOperations()), [](const mlir::Operation &op) {
return llvm::isa<mlir::omp::SectionOp>(op);
});
assert(lastSectionOp != body.rend());
for (const clause::Lastprivate *lastp : lastprivates) {
builder.setInsertionPoint(
lastSectionOp->getRegion(0).back().getTerminator());
mlir::OpBuilder::InsertPoint insp = builder.saveInsertionPoint();
const auto &objList = std::get<ObjectList>(lastp->t);
for (const Object &object : objList) {
semantics::Symbol *sym = object.sym();
if (const auto *common =
sym->detailsIf<semantics::CommonBlockDetails>()) {
for (const auto &obj : common->objects())
converter.copyHostAssociateVar(*obj, &insp, /*hostIsSource=*/false);
} else {
converter.copyHostAssociateVar(*sym, &insp, /*hostIsSource=*/false);
}
}
}
}
// Perform DataSharingProcessor's step2 out of SECTIONS
builder.setInsertionPointAfter(sectionsOp.getOperation());
dsp.processStep2(sectionsOp, false);
// Emit implicit barrier to synchronize threads and avoid data
// races on post-update of lastprivate variables when `nowait`
// clause is present.
if (clauseOps.nowait && !lastprivates.empty())
builder.create<mlir::omp::BarrierOp>(loc);
return sectionsOp;
}
static mlir::Operation *
genScopeOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
mlir::Location loc, const ConstructQueue &queue,
ConstructQueue::const_iterator item) {
TODO(loc, "Scope construct");
return nullptr;
}
static mlir::omp::SingleOp
genSingleOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
mlir::Location loc, const ConstructQueue &queue,
ConstructQueue::const_iterator item) {
mlir::omp::SingleOperands clauseOps;
genSingleClauses(converter, semaCtx, item->clauses, loc, clauseOps);
return genOpWithBody<mlir::omp::SingleOp>(
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
llvm::omp::Directive::OMPD_single)
.setClauses(&item->clauses),
queue, item, clauseOps);
}
static mlir::omp::TargetOp
genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
lower::StatementContext &stmtCtx,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
mlir::Location loc, const ConstructQueue &queue,
ConstructQueue::const_iterator item) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
bool isTargetDevice =
llvm::cast<mlir::omp::OffloadModuleInterface>(*converter.getModuleOp())
.getIsTargetDevice();
// Introduce a new host_eval information structure for this target region.
if (!isTargetDevice)
hostEvalInfo.emplace_back();
mlir::omp::TargetOperands clauseOps;
llvm::SmallVector<const semantics::Symbol *> mapSyms, isDevicePtrSyms,
hasDeviceAddrSyms;
genTargetClauses(converter, semaCtx, symTable, stmtCtx, eval, item->clauses,
loc, clauseOps, hasDeviceAddrSyms, isDevicePtrSyms, mapSyms);
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
/*shouldCollectPreDeterminedSymbols=*/
lower::omp::isLastItemInQueue(item, queue),
/*useDelayedPrivatization=*/true, symTable);
dsp.processStep1(&clauseOps);
// Check if a value of type `type` can be passed to the kernel by value.
// All kernel parameters are of pointer type, so if the value can be
// represented inside of a pointer, then it can be passed by value.
auto isLiteralType = [&](mlir::Type type) {
const mlir::DataLayout &dl = firOpBuilder.getDataLayout();
mlir::Type ptrTy =
mlir::LLVM::LLVMPointerType::get(&converter.getMLIRContext());
uint64_t ptrSize = dl.getTypeSize(ptrTy);
uint64_t ptrAlign = dl.getTypePreferredAlignment(ptrTy);
auto [size, align] = fir::getTypeSizeAndAlignmentOrCrash(
loc, type, dl, converter.getKindMap());
return size <= ptrSize && align <= ptrAlign;
};
// 5.8.1 Implicit Data-Mapping Attribute Rules
// The following code follows the implicit data-mapping rules to map all the
// symbols used inside the region that do not have explicit data-environment
// attribute clauses (neither data-sharing; e.g. `private`, nor `map`
// clauses).
auto captureImplicitMap = [&](const semantics::Symbol &sym) {
if (dsp.getAllSymbolsToPrivatize().contains(&sym))
return;
// These symbols are mapped individually in processHasDeviceAddr.
if (llvm::is_contained(hasDeviceAddrSyms, &sym))
return;
// Structure component symbols don't have bindings, and can only be
// explicitly mapped individually. If a member is captured implicitly
// we map the entirety of the derived type when we find its symbol.
if (sym.owner().IsDerivedType())
return;
// if the symbol is part of an already mapped common block, do not make a
// map for it.
if (const Fortran::semantics::Symbol *common =
Fortran::semantics::FindCommonBlockContaining(sym.GetUltimate()))
if (llvm::is_contained(mapSyms, common))
return;
// If we come across a symbol without a symbol address, we
// return as we cannot process it, this is intended as a
// catch all early exit for symbols that do not have a
// corresponding extended value. Such as subroutines,
// interfaces and named blocks.
if (!converter.getSymbolAddress(sym))
return;
if (!llvm::is_contained(mapSyms, &sym)) {
if (const auto *details =
sym.template detailsIf<semantics::HostAssocDetails>())
converter.copySymbolBinding(details->symbol(), sym);
std::stringstream name;
fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(sym);
name << sym.name().ToString();
mlir::FlatSymbolRefAttr mapperId;
if (sym.GetType()->category() == semantics::DeclTypeSpec::TypeDerived) {
auto &typeSpec = sym.GetType()->derivedTypeSpec();
std::string mapperIdName = typeSpec.name().ToString() + ".default";
mapperIdName = converter.mangleName(mapperIdName, *typeSpec.GetScope());
if (converter.getModuleOp().lookupSymbol(mapperIdName))
mapperId = mlir::FlatSymbolRefAttr::get(&converter.getMLIRContext(),
mapperIdName);
}
fir::factory::AddrAndBoundsInfo info =
Fortran::lower::getDataOperandBaseAddr(
converter, firOpBuilder, sym, converter.getCurrentLocation());
llvm::SmallVector<mlir::Value> bounds =
fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
mlir::omp::MapBoundsType>(
firOpBuilder, info, dataExv,
semantics::IsAssumedSizeArray(sym.GetUltimate()),
converter.getCurrentLocation());
llvm::omp::OpenMPOffloadMappingFlags mapFlag =
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
mlir::omp::VariableCaptureKind captureKind =
mlir::omp::VariableCaptureKind::ByRef;
mlir::Value baseOp = info.rawInput;
mlir::Type eleType = baseOp.getType();
if (auto refType = mlir::dyn_cast<fir::ReferenceType>(baseOp.getType()))
eleType = refType.getElementType();
// If a variable is specified in declare target link and if device
// type is not specified as `nohost`, it needs to be mapped tofrom
mlir::ModuleOp mod = firOpBuilder.getModule();
mlir::Operation *op = mod.lookupSymbol(converter.mangleName(sym));
auto declareTargetOp =
llvm::dyn_cast_if_present<mlir::omp::DeclareTargetInterface>(op);
if (declareTargetOp && declareTargetOp.isDeclareTarget()) {
if (declareTargetOp.getDeclareTargetCaptureClause() ==
mlir::omp::DeclareTargetCaptureClause::link &&
declareTargetOp.getDeclareTargetDeviceType() !=
mlir::omp::DeclareTargetDeviceType::nohost) {
mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
}
} else if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) {
// Scalars behave as if they were "firstprivate".
// TODO: Handle objects that are shared/lastprivate or were listed
// in an in_reduction clause.
if (isLiteralType(eleType)) {
captureKind = mlir::omp::VariableCaptureKind::ByCopy;
} else {
mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
}
} else if (!fir::isa_builtin_cptr_type(eleType)) {
mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
}
auto location =
mlir::NameLoc::get(mlir::StringAttr::get(firOpBuilder.getContext(),
sym.name().ToString()),
baseOp.getLoc());
mlir::Value mapOp = createMapInfoOp(
firOpBuilder, location, baseOp, /*varPtrPtr=*/mlir::Value{},
name.str(), bounds, /*members=*/{},
/*membersIndex=*/mlir::ArrayAttr{},
static_cast<
std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
mapFlag),
captureKind, baseOp.getType(), /*partialMap=*/false, mapperId);
clauseOps.mapVars.push_back(mapOp);
mapSyms.push_back(&sym);
}
};
lower::pft::visitAllSymbols(eval, captureImplicitMap);
auto targetOp = firOpBuilder.create<mlir::omp::TargetOp>(loc, clauseOps);
llvm::SmallVector<mlir::Value> hasDeviceAddrBaseValues, mapBaseValues;
extractMappedBaseValues(clauseOps.hasDeviceAddrVars, hasDeviceAddrBaseValues);
extractMappedBaseValues(clauseOps.mapVars, mapBaseValues);
EntryBlockArgs args;
args.hasDeviceAddr.syms = hasDeviceAddrSyms;
args.hasDeviceAddr.vars = hasDeviceAddrBaseValues;
args.hostEvalVars = clauseOps.hostEvalVars;
// TODO: Add in_reduction syms and vars.
args.map.syms = mapSyms;
args.map.vars = mapBaseValues;
args.priv.syms = dsp.getDelayedPrivSymbols();
args.priv.vars = clauseOps.privateVars;
genBodyOfTargetOp(converter, symTable, semaCtx, eval, targetOp, args, loc,
queue, item, dsp);
// Remove the host_eval information structure created for this target region.
if (!isTargetDevice)
hostEvalInfo.pop_back();
return targetOp;
}
static mlir::omp::TargetDataOp genTargetDataOp(
lower::AbstractConverter &converter, lower::SymMap &symTable,
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
mlir::omp::TargetDataOperands clauseOps;
llvm::SmallVector<const semantics::Symbol *> useDeviceAddrSyms,
useDevicePtrSyms;
genTargetDataClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
clauseOps, useDeviceAddrSyms, useDevicePtrSyms);
auto targetDataOp =
converter.getFirOpBuilder().create<mlir::omp::TargetDataOp>(loc,
clauseOps);
llvm::SmallVector<mlir::Value> useDeviceAddrBaseValues,
useDevicePtrBaseValues;
extractMappedBaseValues(clauseOps.useDeviceAddrVars, useDeviceAddrBaseValues);
extractMappedBaseValues(clauseOps.useDevicePtrVars, useDevicePtrBaseValues);
EntryBlockArgs args;
args.useDeviceAddr.syms = useDeviceAddrSyms;
args.useDeviceAddr.vars = useDeviceAddrBaseValues;
args.useDevicePtr.syms = useDevicePtrSyms;
args.useDevicePtr.vars = useDevicePtrBaseValues;
genBodyOfTargetDataOp(converter, symTable, semaCtx, eval, targetDataOp, args,
loc, queue, item);
return targetDataOp;
}
template <typename OpTy>
static OpTy genTargetEnterExitUpdateDataOp(
lower::AbstractConverter &converter, lower::SymMap &symTable,
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
mlir::Location loc, const ConstructQueue &queue,
ConstructQueue::const_iterator item) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
// GCC 9.3.0 emits a (probably) bogus warning about an unused variable.
[[maybe_unused]] llvm::omp::Directive directive;
if constexpr (std::is_same_v<OpTy, mlir::omp::TargetEnterDataOp>) {
directive = llvm::omp::Directive::OMPD_target_enter_data;
} else if constexpr (std::is_same_v<OpTy, mlir::omp::TargetExitDataOp>) {
directive = llvm::omp::Directive::OMPD_target_exit_data;
} else if constexpr (std::is_same_v<OpTy, mlir::omp::TargetUpdateOp>) {
directive = llvm::omp::Directive::OMPD_target_update;
} else {
llvm_unreachable("Unexpected TARGET DATA construct");
}
mlir::omp::TargetEnterExitUpdateDataOperands clauseOps;
genTargetEnterExitUpdateDataClauses(converter, semaCtx, symTable, stmtCtx,
item->clauses, loc, directive, clauseOps);
return firOpBuilder.create<OpTy>(loc, clauseOps);
}
static mlir::omp::TaskOp
genTaskOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
lower::StatementContext &stmtCtx,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
mlir::Location loc, const ConstructQueue &queue,
ConstructQueue::const_iterator item) {
mlir::omp::TaskOperands clauseOps;
llvm::SmallVector<const semantics::Symbol *> inReductionSyms;
genTaskClauses(converter, semaCtx, symTable, stmtCtx, item->clauses, loc,
clauseOps, inReductionSyms);
if (!enableDelayedPrivatization)
return genOpWithBody<mlir::omp::TaskOp>(
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
llvm::omp::Directive::OMPD_task)
.setClauses(&item->clauses),
queue, item, clauseOps);
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
lower::omp::isLastItemInQueue(item, queue),
/*useDelayedPrivatization=*/true, symTable);
dsp.processStep1(&clauseOps);
EntryBlockArgs taskArgs;
taskArgs.priv.syms = dsp.getDelayedPrivSymbols();
taskArgs.priv.vars = clauseOps.privateVars;
taskArgs.inReduction.syms = inReductionSyms;
taskArgs.inReduction.vars = clauseOps.inReductionVars;
return genOpWithBody<mlir::omp::TaskOp>(
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
llvm::omp::Directive::OMPD_task)
.setClauses(&item->clauses)
.setDataSharingProcessor(&dsp)
.setEntryBlockArgs(&taskArgs),
queue, item, clauseOps);
}
static mlir::omp::TaskgroupOp
genTaskgroupOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue,
ConstructQueue::const_iterator item) {
mlir::omp::TaskgroupOperands clauseOps;
llvm::SmallVector<const semantics::Symbol *> taskReductionSyms;
genTaskgroupClauses(converter, semaCtx, item->clauses, loc, clauseOps,
taskReductionSyms);
EntryBlockArgs taskgroupArgs;
taskgroupArgs.taskReduction.syms = taskReductionSyms;
taskgroupArgs.taskReduction.vars = clauseOps.taskReductionVars;
return genOpWithBody<mlir::omp::TaskgroupOp>(
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
llvm::omp::Directive::OMPD_taskgroup)
.setClauses(&item->clauses)
.setEntryBlockArgs(&taskgroupArgs),
queue, item, clauseOps);
}
static mlir::omp::TaskwaitOp
genTaskwaitOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue,
ConstructQueue::const_iterator item) {
mlir::omp::TaskwaitOperands clauseOps;
genTaskwaitClauses(converter, semaCtx, item->clauses, loc, clauseOps);
return converter.getFirOpBuilder().create<mlir::omp::TaskwaitOp>(loc,
clauseOps);
}
static mlir::omp::TaskyieldOp
genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue,
ConstructQueue::const_iterator item) {
return converter.getFirOpBuilder().create<mlir::omp::TaskyieldOp>(loc);
}
static mlir::omp::WorkshareOp genWorkshareOp(
lower::AbstractConverter &converter, lower::SymMap &symTable,
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
mlir::omp::WorkshareOperands clauseOps;
genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
clauseOps);
return genOpWithBody<mlir::omp::WorkshareOp>(
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
llvm::omp::Directive::OMPD_workshare)
.setClauses(&item->clauses),
queue, item, clauseOps);
}
static mlir::omp::TeamsOp
genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
lower::StatementContext &stmtCtx,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
mlir::Location loc, const ConstructQueue &queue,
ConstructQueue::const_iterator item) {
mlir::omp::TeamsOperands clauseOps;
llvm::SmallVector<const semantics::Symbol *> reductionSyms;
genTeamsClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps,
reductionSyms);
EntryBlockArgs args;
// TODO: Add private syms and vars.
args.reduction.syms = reductionSyms;
args.reduction.vars = clauseOps.reductionVars;
return genOpWithBody<mlir::omp::TeamsOp>(
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
llvm::omp::Directive::OMPD_teams)
.setClauses(&item->clauses)
.setEntryBlockArgs(&args),
queue, item, clauseOps);
}
//===----------------------------------------------------------------------===//
// Code generation for atomic operations
//===----------------------------------------------------------------------===//
/// Populates \p hint and \p memoryOrder with appropriate clause information
/// if present on atomic construct.
static void genOmpAtomicHintAndMemoryOrderClauses(
lower::AbstractConverter &converter,
const parser::OmpAtomicClauseList &clauseList, mlir::IntegerAttr &hint,
mlir::omp::ClauseMemoryOrderKindAttr &memoryOrder) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
for (const parser::OmpAtomicClause &clause : clauseList.v) {
common::visit(
common::visitors{
[&](const parser::OmpMemoryOrderClause &s) {
auto kind = common::visit(
common::visitors{
[&](const parser::OmpClause::AcqRel &) {
return mlir::omp::ClauseMemoryOrderKind::Acq_rel;
},
[&](const parser::OmpClause::Acquire &) {
return mlir::omp::ClauseMemoryOrderKind::Acquire;
},
[&](const parser::OmpClause::Relaxed &) {
return mlir::omp::ClauseMemoryOrderKind::Relaxed;
},
[&](const parser::OmpClause::Release &) {
return mlir::omp::ClauseMemoryOrderKind::Release;
},
[&](const parser::OmpClause::SeqCst &) {
return mlir::omp::ClauseMemoryOrderKind::Seq_cst;
},
[&](auto &&) -> mlir::omp::ClauseMemoryOrderKind {
llvm_unreachable("Unexpected clause");
},
},
s.v.u);
memoryOrder = mlir::omp::ClauseMemoryOrderKindAttr::get(
firOpBuilder.getContext(), kind);
},
[&](const parser::OmpHintClause &s) {
const auto *expr = semantics::GetExpr(s.v);
uint64_t hintExprValue = *evaluate::ToInt64(*expr);
hint = firOpBuilder.getI64IntegerAttr(hintExprValue);
},
[&](const parser::OmpFailClause &) {},
},
clause.u);
}
}
static void processOmpAtomicTODO(mlir::Type elementType, mlir::Location loc) {
if (!elementType)
return;
assert(fir::isa_trivial(fir::unwrapRefType(elementType)) &&
"is supported type for omp atomic");
}
/// Used to generate atomic.read operation which is created in existing
/// location set by builder.
static void genAtomicCaptureStatement(
lower::AbstractConverter &converter, mlir::Value fromAddress,
mlir::Value toAddress,
const parser::OmpAtomicClauseList *leftHandClauseList,
const parser::OmpAtomicClauseList *rightHandClauseList,
mlir::Type elementType, mlir::Location loc) {
// Generate `atomic.read` operation for atomic assigment statements
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
processOmpAtomicTODO(elementType, loc);
// If no hint clause is specified, the effect is as if
// hint(omp_sync_hint_none) had been specified.
mlir::IntegerAttr hint = nullptr;
mlir::omp::ClauseMemoryOrderKindAttr memoryOrder = nullptr;
if (leftHandClauseList)
genOmpAtomicHintAndMemoryOrderClauses(converter, *leftHandClauseList, hint,
memoryOrder);
if (rightHandClauseList)
genOmpAtomicHintAndMemoryOrderClauses(converter, *rightHandClauseList, hint,
memoryOrder);
firOpBuilder.create<mlir::omp::AtomicReadOp>(loc, fromAddress, toAddress,
mlir::TypeAttr::get(elementType),
hint, memoryOrder);
}
/// Used to generate atomic.write operation which is created in existing
/// location set by builder.
static void genAtomicWriteStatement(
lower::AbstractConverter &converter, mlir::Value lhsAddr,
mlir::Value rhsExpr, const parser::OmpAtomicClauseList *leftHandClauseList,
const parser::OmpAtomicClauseList *rightHandClauseList, mlir::Location loc,
mlir::Value *evaluatedExprValue = nullptr) {
// Generate `atomic.write` operation for atomic assignment statements
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
mlir::Type varType = fir::unwrapRefType(lhsAddr.getType());
// Create a conversion outside the capture block.
auto insertionPoint = firOpBuilder.saveInsertionPoint();
firOpBuilder.setInsertionPointAfter(rhsExpr.getDefiningOp());
rhsExpr = firOpBuilder.createConvert(loc, varType, rhsExpr);
firOpBuilder.restoreInsertionPoint(insertionPoint);
processOmpAtomicTODO(varType, loc);
// If no hint clause is specified, the effect is as if
// hint(omp_sync_hint_none) had been specified.
mlir::IntegerAttr hint = nullptr;
mlir::omp::ClauseMemoryOrderKindAttr memoryOrder = nullptr;
if (leftHandClauseList)
genOmpAtomicHintAndMemoryOrderClauses(converter, *leftHandClauseList, hint,
memoryOrder);
if (rightHandClauseList)
genOmpAtomicHintAndMemoryOrderClauses(converter, *rightHandClauseList, hint,
memoryOrder);
firOpBuilder.create<mlir::omp::AtomicWriteOp>(loc, lhsAddr, rhsExpr, hint,
memoryOrder);
}
/// Used to generate atomic.update operation which is created in existing
/// location set by builder.
static void genAtomicUpdateStatement(
lower::AbstractConverter &converter, mlir::Value lhsAddr,
mlir::Type varType, const parser::Variable &assignmentStmtVariable,
const parser::Expr &assignmentStmtExpr,
const parser::OmpAtomicClauseList *leftHandClauseList,
const parser::OmpAtomicClauseList *rightHandClauseList, mlir::Location loc,
mlir::Operation *atomicCaptureOp = nullptr) {
// Generate `atomic.update` operation for atomic assignment statements
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
mlir::Location currentLocation = converter.getCurrentLocation();
// Create the omp.atomic.update or acc.atomic.update operation
//
// func.func @_QPsb() {
// %0 = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFsbEa"}
// %1 = fir.alloca i32 {bindc_name = "b", uniq_name = "_QFsbEb"}
// %2 = fir.load %1 : !fir.ref<i32>
// omp.atomic.update %0 : !fir.ref<i32> {
// ^bb0(%arg0: i32):
// %3 = arith.addi %arg0, %2 : i32
// omp.yield(%3 : i32)
// }
// return
// }
auto getArgExpression =
[](std::list<parser::ActualArgSpec>::const_iterator it) {
const auto &arg{std::get<parser::ActualArg>((*it).t)};
const auto *parserExpr{
std::get_if<common::Indirection<parser::Expr>>(&arg.u)};
return parserExpr;
};
// Lower any non atomic sub-expression before the atomic operation, and
// map its lowered value to the semantic representation.
lower::ExprToValueMap exprValueOverrides;
// Max and min intrinsics can have a list of Args. Hence we need a list
// of nonAtomicSubExprs to hoist. Currently, only the load is hoisted.
llvm::SmallVector<const lower::SomeExpr *> nonAtomicSubExprs;
common::visit(
common::visitors{
[&](const common::Indirection<parser::FunctionReference> &funcRef)
-> void {
const auto &args{std::get<std::list<parser::ActualArgSpec>>(
funcRef.value().v.t)};
std::list<parser::ActualArgSpec>::const_iterator beginIt =
args.begin();
std::list<parser::ActualArgSpec>::const_iterator endIt = args.end();
const auto *exprFirst{getArgExpression(beginIt)};
if (exprFirst && exprFirst->value().source ==
assignmentStmtVariable.GetSource()) {
// Add everything except the first
beginIt++;
} else {
// Add everything except the last
endIt--;
}
std::list<parser::ActualArgSpec>::const_iterator it;
for (it = beginIt; it != endIt; it++) {
const common::Indirection<parser::Expr> *expr =
getArgExpression(it);
if (expr)
nonAtomicSubExprs.push_back(semantics::GetExpr(*expr));
}
},
[&](const auto &op) -> void {
using T = std::decay_t<decltype(op)>;
if constexpr (std::is_base_of<parser::Expr::IntrinsicBinary,
T>::value) {
const auto &exprLeft{std::get<0>(op.t)};
const auto &exprRight{std::get<1>(op.t)};
if (exprLeft.value().source == assignmentStmtVariable.GetSource())
nonAtomicSubExprs.push_back(semantics::GetExpr(exprRight));
else
nonAtomicSubExprs.push_back(semantics::GetExpr(exprLeft));
}
},
},
assignmentStmtExpr.u);
lower::StatementContext nonAtomicStmtCtx;
if (!nonAtomicSubExprs.empty()) {
// Generate non atomic part before all the atomic operations.
auto insertionPoint = firOpBuilder.saveInsertionPoint();
if (atomicCaptureOp)
firOpBuilder.setInsertionPoint(atomicCaptureOp);
mlir::Value nonAtomicVal;
for (auto *nonAtomicSubExpr : nonAtomicSubExprs) {
nonAtomicVal = fir::getBase(converter.genExprValue(
currentLocation, *nonAtomicSubExpr, nonAtomicStmtCtx));
exprValueOverrides.try_emplace(nonAtomicSubExpr, nonAtomicVal);
}
if (atomicCaptureOp)
firOpBuilder.restoreInsertionPoint(insertionPoint);
}
mlir::Operation *atomicUpdateOp = nullptr;
// If no hint clause is specified, the effect is as if
// hint(omp_sync_hint_none) had been specified.
mlir::IntegerAttr hint = nullptr;
mlir::omp::ClauseMemoryOrderKindAttr memoryOrder = nullptr;
if (leftHandClauseList)
genOmpAtomicHintAndMemoryOrderClauses(converter, *leftHandClauseList, hint,
memoryOrder);
if (rightHandClauseList)
genOmpAtomicHintAndMemoryOrderClauses(converter, *rightHandClauseList, hint,
memoryOrder);
atomicUpdateOp = firOpBuilder.create<mlir::omp::AtomicUpdateOp>(
currentLocation, lhsAddr, hint, memoryOrder);
processOmpAtomicTODO(varType, loc);
llvm::SmallVector<mlir::Type> varTys = {varType};
llvm::SmallVector<mlir::Location> locs = {currentLocation};
firOpBuilder.createBlock(&atomicUpdateOp->getRegion(0), {}, varTys, locs);
mlir::Value val =
fir::getBase(atomicUpdateOp->getRegion(0).front().getArgument(0));
exprValueOverrides.try_emplace(semantics::GetExpr(assignmentStmtVariable),
val);
{
// statement context inside the atomic block.
converter.overrideExprValues(&exprValueOverrides);
lower::StatementContext atomicStmtCtx;
mlir::Value rhsExpr = fir::getBase(converter.genExprValue(
*semantics::GetExpr(assignmentStmtExpr), atomicStmtCtx));
mlir::Type exprType = fir::unwrapRefType(rhsExpr.getType());
if (fir::isa_complex(exprType) && !fir::isa_complex(varType)) {
// Emit an additional `ExtractValueOp` if the expression is of complex
// type
auto extract = firOpBuilder.create<fir::ExtractValueOp>(
currentLocation,
mlir::cast<mlir::ComplexType>(exprType).getElementType(), rhsExpr,
firOpBuilder.getArrayAttr(
firOpBuilder.getIntegerAttr(firOpBuilder.getIndexType(), 0)));
mlir::Value convertResult = firOpBuilder.create<fir::ConvertOp>(
currentLocation, varType, extract);
firOpBuilder.create<mlir::omp::YieldOp>(currentLocation, convertResult);
} else {
mlir::Value convertResult =
firOpBuilder.createConvert(currentLocation, varType, rhsExpr);
firOpBuilder.create<mlir::omp::YieldOp>(currentLocation, convertResult);
}
converter.resetExprOverrides();
}
firOpBuilder.setInsertionPointAfter(atomicUpdateOp);
}
/// Processes an atomic construct with write clause.
static void genAtomicWrite(lower::AbstractConverter &converter,
const parser::OmpAtomicWrite &atomicWrite,
mlir::Location loc) {
const parser::OmpAtomicClauseList *rightHandClauseList = nullptr;
const parser::OmpAtomicClauseList *leftHandClauseList = nullptr;
// Get the address of atomic read operands.
rightHandClauseList = &std::get<2>(atomicWrite.t);
leftHandClauseList = &std::get<0>(atomicWrite.t);
const parser::AssignmentStmt &stmt =
std::get<parser::Statement<parser::AssignmentStmt>>(atomicWrite.t)
.statement;
const evaluate::Assignment &assign = *stmt.typedAssignment->v;
lower::StatementContext stmtCtx;
// Get the value and address of atomic write operands.
mlir::Value rhsExpr =
fir::getBase(converter.genExprValue(assign.rhs, stmtCtx));
mlir::Value lhsAddr =
fir::getBase(converter.genExprAddr(assign.lhs, stmtCtx));
genAtomicWriteStatement(converter, lhsAddr, rhsExpr, leftHandClauseList,
rightHandClauseList, loc);
}
/// Processes an atomic construct with read clause.
static void genAtomicRead(lower::AbstractConverter &converter,
const parser::OmpAtomicRead &atomicRead,
mlir::Location loc) {
const parser::OmpAtomicClauseList *rightHandClauseList = nullptr;
const parser::OmpAtomicClauseList *leftHandClauseList = nullptr;
// Get the address of atomic read operands.
rightHandClauseList = &std::get<2>(atomicRead.t);
leftHandClauseList = &std::get<0>(atomicRead.t);
const auto &assignmentStmtExpr = std::get<parser::Expr>(
std::get<parser::Statement<parser::AssignmentStmt>>(atomicRead.t)
.statement.t);
const auto &assignmentStmtVariable = std::get<parser::Variable>(
std::get<parser::Statement<parser::AssignmentStmt>>(atomicRead.t)
.statement.t);
lower::StatementContext stmtCtx;
const semantics::SomeExpr &fromExpr = *semantics::GetExpr(assignmentStmtExpr);
mlir::Type elementType = converter.genType(fromExpr);
mlir::Value fromAddress =
fir::getBase(converter.genExprAddr(fromExpr, stmtCtx));
mlir::Value toAddress = fir::getBase(converter.genExprAddr(
*semantics::GetExpr(assignmentStmtVariable), stmtCtx));
if (fromAddress.getType() != toAddress.getType()) {
// Emit an implicit cast. Different yet compatible types on
// omp.atomic.read constitute valid Fortran. The OMPIRBuilder will
// emit atomic instructions (on primitive types) and `__atomic_load`
// libcall (on complex type) without explicitly converting
// between such compatible types. The OMPIRBuilder relies on the
// frontend to resolve such inconsistencies between `omp.atomic.read `
// operand types. Similar inconsistencies between operand types in
// `omp.atomic.write` are resolved through implicit casting by use of typed
// assignment (i.e. `evaluate::Assignment`). However, use of typed
// assignment in `omp.atomic.read` (of form `v = x`) leads to an unsafe,
// non-atomic load of `x` into a temporary `alloca`, followed by an atomic
// read of form `v = alloca`. Hence, it is needed to perform a custom
// implicit cast.
// An atomic read of form `v = x` would (without implicit casting)
// lower to `omp.atomic.read %v = %x : !fir.ref<type1>, !fir.ref<type2>,
// type2`. This implicit casting will rather generate the following FIR:
//
// %alloca = fir.alloca type2
// omp.atomic.read %alloca = %x : !fir.ref<type2>, !fir.ref<type2>, type2
// %load = fir.load %alloca : !fir.ref<type2>
// %cvt = fir.convert %load : (type2) -> type1
// fir.store %cvt to %v : !fir.ref<type1>
// These sequence of operations is thread-safe since each thread allocates
// the `alloca` in its stack, and performs `%alloca = %x` atomically. Once
// safely read, each thread performs the implicit cast on the local
// `alloca`, and writes the final result to `%v`.
mlir::Type toType = fir::unwrapRefType(toAddress.getType());
mlir::Type fromType = fir::unwrapRefType(fromAddress.getType());
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
auto oldIP = builder.saveInsertionPoint();
builder.setInsertionPointToStart(builder.getAllocaBlock());
mlir::Value alloca = builder.create<fir::AllocaOp>(
loc, fromType); // Thread scope `alloca` to atomically read `%x`.
builder.restoreInsertionPoint(oldIP);
genAtomicCaptureStatement(converter, fromAddress, alloca,
leftHandClauseList, rightHandClauseList,
elementType, loc);
auto load = builder.create<fir::LoadOp>(loc, alloca);
if (fir::isa_complex(fromType) && !fir::isa_complex(toType)) {
// Emit an additional `ExtractValueOp` if `fromAddress` is of complex
// type, but `toAddress` is not.
auto extract = builder.create<fir::ExtractValueOp>(
loc, mlir::cast<mlir::ComplexType>(fromType).getElementType(), load,
builder.getArrayAttr(
builder.getIntegerAttr(builder.getIndexType(), 0)));
auto cvt = builder.create<fir::ConvertOp>(loc, toType, extract);
builder.create<fir::StoreOp>(loc, cvt, toAddress);
} else if (!fir::isa_complex(fromType) && fir::isa_complex(toType)) {
// Emit an additional `InsertValueOp` if `toAddress` is of complex
// type, but `fromAddress` is not.
mlir::Value undef = builder.create<fir::UndefOp>(loc, toType);
mlir::Type complexEleTy =
mlir::cast<mlir::ComplexType>(toType).getElementType();
mlir::Value cvt = builder.create<fir::ConvertOp>(loc, complexEleTy, load);
mlir::Value zero = builder.createRealZeroConstant(loc, complexEleTy);
mlir::Value idx0 = builder.create<fir::InsertValueOp>(
loc, toType, undef, cvt,
builder.getArrayAttr(
builder.getIntegerAttr(builder.getIndexType(), 0)));
mlir::Value idx1 = builder.create<fir::InsertValueOp>(
loc, toType, idx0, zero,
builder.getArrayAttr(
builder.getIntegerAttr(builder.getIndexType(), 1)));
builder.create<fir::StoreOp>(loc, idx1, toAddress);
} else {
auto cvt = builder.create<fir::ConvertOp>(loc, toType, load);
builder.create<fir::StoreOp>(loc, cvt, toAddress);
}
} else
genAtomicCaptureStatement(converter, fromAddress, toAddress,
leftHandClauseList, rightHandClauseList,
elementType, loc);
}
/// Processes an atomic construct with update clause.
static void genAtomicUpdate(lower::AbstractConverter &converter,
const parser::OmpAtomicUpdate &atomicUpdate,
mlir::Location loc) {
const parser::OmpAtomicClauseList *rightHandClauseList = nullptr;
const parser::OmpAtomicClauseList *leftHandClauseList = nullptr;
// Get the address of atomic read operands.
rightHandClauseList = &std::get<2>(atomicUpdate.t);
leftHandClauseList = &std::get<0>(atomicUpdate.t);
const auto &assignmentStmtExpr = std::get<parser::Expr>(
std::get<parser::Statement<parser::AssignmentStmt>>(atomicUpdate.t)
.statement.t);
const auto &assignmentStmtVariable = std::get<parser::Variable>(
std::get<parser::Statement<parser::AssignmentStmt>>(atomicUpdate.t)
.statement.t);
lower::StatementContext stmtCtx;
mlir::Value lhsAddr = fir::getBase(converter.genExprAddr(
*semantics::GetExpr(assignmentStmtVariable), stmtCtx));
mlir::Type varType = fir::unwrapRefType(lhsAddr.getType());
genAtomicUpdateStatement(converter, lhsAddr, varType, assignmentStmtVariable,
assignmentStmtExpr, leftHandClauseList,
rightHandClauseList, loc);
}
/// Processes an atomic construct with no clause - which implies update clause.
static void genOmpAtomic(lower::AbstractConverter &converter,
const parser::OmpAtomic &atomicConstruct,
mlir::Location loc) {
const parser::OmpAtomicClauseList &atomicClauseList =
std::get<parser::OmpAtomicClauseList>(atomicConstruct.t);
const auto &assignmentStmtExpr = std::get<parser::Expr>(
std::get<parser::Statement<parser::AssignmentStmt>>(atomicConstruct.t)
.statement.t);
const auto &assignmentStmtVariable = std::get<parser::Variable>(
std::get<parser::Statement<parser::AssignmentStmt>>(atomicConstruct.t)
.statement.t);
lower::StatementContext stmtCtx;
mlir::Value lhsAddr = fir::getBase(converter.genExprAddr(
*semantics::GetExpr(assignmentStmtVariable), stmtCtx));
mlir::Type varType = fir::unwrapRefType(lhsAddr.getType());
// If atomic-clause is not present on the construct, the behaviour is as if
// the update clause is specified (for both OpenMP and OpenACC).
genAtomicUpdateStatement(converter, lhsAddr, varType, assignmentStmtVariable,
assignmentStmtExpr, &atomicClauseList, nullptr, loc);
}
/// Processes an atomic construct with capture clause.
static void genAtomicCapture(lower::AbstractConverter &converter,
const parser::OmpAtomicCapture &atomicCapture,
mlir::Location loc) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
const parser::AssignmentStmt &stmt1 =
std::get<parser::OmpAtomicCapture::Stmt1>(atomicCapture.t).v.statement;
const evaluate::Assignment &assign1 = *stmt1.typedAssignment->v;
const auto &stmt1Var{std::get<parser::Variable>(stmt1.t)};
const auto &stmt1Expr{std::get<parser::Expr>(stmt1.t)};
const parser::AssignmentStmt &stmt2 =
std::get<parser::OmpAtomicCapture::Stmt2>(atomicCapture.t).v.statement;
const evaluate::Assignment &assign2 = *stmt2.typedAssignment->v;
const auto &stmt2Var{std::get<parser::Variable>(stmt2.t)};
const auto &stmt2Expr{std::get<parser::Expr>(stmt2.t)};
// Pre-evaluate expressions to be used in the various operations inside
// `atomic.capture` since it is not desirable to have anything other than
// a `atomic.read`, `atomic.write`, or `atomic.update` operation
// inside `atomic.capture`
lower::StatementContext stmtCtx;
// LHS evaluations are common to all combinations of `atomic.capture`
mlir::Value stmt1LHSArg =
fir::getBase(converter.genExprAddr(assign1.lhs, stmtCtx));
mlir::Value stmt2LHSArg =
fir::getBase(converter.genExprAddr(assign2.lhs, stmtCtx));
// Type information used in generation of `atomic.update` operation
mlir::Type stmt1VarType =
fir::getBase(converter.genExprValue(assign1.lhs, stmtCtx)).getType();
mlir::Type stmt2VarType =
fir::getBase(converter.genExprValue(assign2.lhs, stmtCtx)).getType();
// Check if implicit type is needed
if (stmt1VarType != stmt2VarType)
TODO(loc, "atomic capture requiring implicit type casts");
mlir::Operation *atomicCaptureOp = nullptr;
mlir::IntegerAttr hint = nullptr;
mlir::omp::ClauseMemoryOrderKindAttr memoryOrder = nullptr;
const parser::OmpAtomicClauseList &rightHandClauseList =
std::get<2>(atomicCapture.t);
const parser::OmpAtomicClauseList &leftHandClauseList =
std::get<0>(atomicCapture.t);
genOmpAtomicHintAndMemoryOrderClauses(converter, leftHandClauseList, hint,
memoryOrder);
genOmpAtomicHintAndMemoryOrderClauses(converter, rightHandClauseList, hint,
memoryOrder);
atomicCaptureOp =
firOpBuilder.create<mlir::omp::AtomicCaptureOp>(loc, hint, memoryOrder);
firOpBuilder.createBlock(&(atomicCaptureOp->getRegion(0)));
mlir::Block &block = atomicCaptureOp->getRegion(0).back();
firOpBuilder.setInsertionPointToStart(&block);
if (semantics::checkForSingleVariableOnRHS(stmt1)) {
if (semantics::checkForSymbolMatch(stmt2)) {
// Atomic capture construct is of the form [capture-stmt, update-stmt]
const semantics::SomeExpr &fromExpr = *semantics::GetExpr(stmt1Expr);
mlir::Type elementType = converter.genType(fromExpr);
genAtomicCaptureStatement(converter, stmt2LHSArg, stmt1LHSArg,
/*leftHandClauseList=*/nullptr,
/*rightHandClauseList=*/nullptr, elementType,
loc);
genAtomicUpdateStatement(
converter, stmt2LHSArg, stmt2VarType, stmt2Var, stmt2Expr,
/*leftHandClauseList=*/nullptr,
/*rightHandClauseList=*/nullptr, loc, atomicCaptureOp);
} else {
// Atomic capture construct is of the form [capture-stmt, write-stmt]
firOpBuilder.setInsertionPoint(atomicCaptureOp);
mlir::Value stmt2RHSArg =
fir::getBase(converter.genExprValue(assign2.rhs, stmtCtx));
firOpBuilder.setInsertionPointToStart(&block);
const semantics::SomeExpr &fromExpr = *semantics::GetExpr(stmt1Expr);
mlir::Type elementType = converter.genType(fromExpr);
genAtomicCaptureStatement(converter, stmt2LHSArg, stmt1LHSArg,
/*leftHandClauseList=*/nullptr,
/*rightHandClauseList=*/nullptr, elementType,
loc);
genAtomicWriteStatement(converter, stmt2LHSArg, stmt2RHSArg,
/*leftHandClauseList=*/nullptr,
/*rightHandClauseList=*/nullptr, loc);
}
} else {
// Atomic capture construct is of the form [update-stmt, capture-stmt]
const semantics::SomeExpr &fromExpr = *semantics::GetExpr(stmt2Expr);
mlir::Type elementType = converter.genType(fromExpr);
genAtomicUpdateStatement(
converter, stmt1LHSArg, stmt1VarType, stmt1Var, stmt1Expr,
/*leftHandClauseList=*/nullptr,
/*rightHandClauseList=*/nullptr, loc, atomicCaptureOp);
genAtomicCaptureStatement(converter, stmt1LHSArg, stmt2LHSArg,
/*leftHandClauseList=*/nullptr,
/*rightHandClauseList=*/nullptr, elementType,
loc);
}
firOpBuilder.setInsertionPointToEnd(&block);
firOpBuilder.create<mlir::omp::TerminatorOp>(loc);
firOpBuilder.setInsertionPointToStart(&block);
}
//===----------------------------------------------------------------------===//
// Code generation functions for the standalone version of constructs that can
// also be a leaf of a composite construct
//===----------------------------------------------------------------------===//
static mlir::omp::DistributeOp genStandaloneDistribute(
lower::AbstractConverter &converter, lower::SymMap &symTable,
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
mlir::omp::DistributeOperands distributeClauseOps;
genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
distributeClauseOps);
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
/*shouldCollectPreDeterminedSymbols=*/true,
enableDelayedPrivatization, symTable);
dsp.processStep1(&distributeClauseOps);
mlir::omp::LoopNestOperands loopNestClauseOps;
llvm::SmallVector<const semantics::Symbol *> iv;
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
loopNestClauseOps, iv);
EntryBlockArgs distributeArgs;
distributeArgs.priv.syms = dsp.getDelayedPrivSymbols();
distributeArgs.priv.vars = distributeClauseOps.privateVars;
auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>(
converter, loc, distributeClauseOps, distributeArgs);
genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
loopNestClauseOps, iv, {{distributeOp, distributeArgs}},
llvm::omp::Directive::OMPD_distribute, dsp);
return distributeOp;
}
static mlir::omp::WsloopOp genStandaloneDo(
lower::AbstractConverter &converter, lower::SymMap &symTable,
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
mlir::omp::WsloopOperands wsloopClauseOps;
llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms;
genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
wsloopClauseOps, wsloopReductionSyms);
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
/*shouldCollectPreDeterminedSymbols=*/true,
enableDelayedPrivatization, symTable);
dsp.processStep1(&wsloopClauseOps);
mlir::omp::LoopNestOperands loopNestClauseOps;
llvm::SmallVector<const semantics::Symbol *> iv;
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
loopNestClauseOps, iv);
EntryBlockArgs wsloopArgs;
wsloopArgs.priv.syms = dsp.getDelayedPrivSymbols();
wsloopArgs.priv.vars = wsloopClauseOps.privateVars;
wsloopArgs.reduction.syms = wsloopReductionSyms;
wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars;
auto wsloopOp = genWrapperOp<mlir::omp::WsloopOp>(
converter, loc, wsloopClauseOps, wsloopArgs);
genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
loopNestClauseOps, iv, {{wsloopOp, wsloopArgs}},
llvm::omp::Directive::OMPD_do, dsp);
return wsloopOp;
}
static mlir::omp::ParallelOp genStandaloneParallel(
lower::AbstractConverter &converter, lower::SymMap &symTable,
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
mlir::omp::ParallelOperands parallelClauseOps;
llvm::SmallVector<const semantics::Symbol *> parallelReductionSyms;
genParallelClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
parallelClauseOps, parallelReductionSyms);
std::optional<DataSharingProcessor> dsp;
if (enableDelayedPrivatization) {
dsp.emplace(converter, semaCtx, item->clauses, eval,
lower::omp::isLastItemInQueue(item, queue),
/*useDelayedPrivatization=*/true, symTable);
dsp->processStep1(&parallelClauseOps);
}
EntryBlockArgs parallelArgs;
if (dsp)
parallelArgs.priv.syms = dsp->getDelayedPrivSymbols();
parallelArgs.priv.vars = parallelClauseOps.privateVars;
parallelArgs.reduction.syms = parallelReductionSyms;
parallelArgs.reduction.vars = parallelClauseOps.reductionVars;
return genParallelOp(converter, symTable, semaCtx, eval, loc, queue, item,
parallelClauseOps, parallelArgs,
enableDelayedPrivatization ? &dsp.value() : nullptr);
}
static mlir::omp::SimdOp
genStandaloneSimd(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue,
ConstructQueue::const_iterator item) {
mlir::omp::SimdOperands simdClauseOps;
llvm::SmallVector<const semantics::Symbol *> simdReductionSyms;
genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps,
simdReductionSyms);
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
/*shouldCollectPreDeterminedSymbols=*/true,
enableDelayedPrivatization, symTable);
dsp.processStep1(&simdClauseOps);
mlir::omp::LoopNestOperands loopNestClauseOps;
llvm::SmallVector<const semantics::Symbol *> iv;
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
loopNestClauseOps, iv);
EntryBlockArgs simdArgs;
simdArgs.priv.syms = dsp.getDelayedPrivSymbols();
simdArgs.priv.vars = simdClauseOps.privateVars;
simdArgs.reduction.syms = simdReductionSyms;
simdArgs.reduction.vars = simdClauseOps.reductionVars;
auto simdOp =
genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs);
genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
loopNestClauseOps, iv, {{simdOp, simdArgs}},
llvm::omp::Directive::OMPD_simd, dsp);
return simdOp;
}
static mlir::omp::TaskloopOp genStandaloneTaskloop(
lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
mlir::Location loc, const ConstructQueue &queue,
ConstructQueue::const_iterator item) {
mlir::omp::TaskloopOperands taskloopClauseOps;
genTaskloopClauses(converter, semaCtx, item->clauses, loc, taskloopClauseOps);
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
/*shouldCollectPreDeterminedSymbols=*/true,
enableDelayedPrivatization, symTable);
dsp.processStep1(&taskloopClauseOps);
mlir::omp::LoopNestOperands loopNestClauseOps;
llvm::SmallVector<const semantics::Symbol *> iv;
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
loopNestClauseOps, iv);
EntryBlockArgs taskloopArgs;
taskloopArgs.priv.syms = dsp.getDelayedPrivSymbols();
taskloopArgs.priv.vars = taskloopClauseOps.privateVars;
auto taskLoopOp = genWrapperOp<mlir::omp::TaskloopOp>(
converter, loc, taskloopClauseOps, taskloopArgs);
genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
loopNestClauseOps, iv, {{taskLoopOp, taskloopArgs}},
llvm::omp::Directive::OMPD_taskloop, dsp);
return taskLoopOp;
}
//===----------------------------------------------------------------------===//
// Code generation functions for composite constructs
//===----------------------------------------------------------------------===//
static mlir::omp::DistributeOp genCompositeDistributeParallelDo(
lower::AbstractConverter &converter, lower::SymMap &symTable,
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
assert(std::distance(item, queue.end()) == 3 && "Invalid leaf constructs");
ConstructQueue::const_iterator distributeItem = item;
ConstructQueue::const_iterator parallelItem = std::next(distributeItem);
ConstructQueue::const_iterator doItem = std::next(parallelItem);
// Create parent omp.parallel first.
mlir::omp::ParallelOperands parallelClauseOps;
llvm::SmallVector<const semantics::Symbol *> parallelReductionSyms;
genParallelClauses(converter, semaCtx, stmtCtx, parallelItem->clauses, loc,
parallelClauseOps, parallelReductionSyms);
DataSharingProcessor dsp(converter, semaCtx, doItem->clauses, eval,
/*shouldCollectPreDeterminedSymbols=*/true,
/*useDelayedPrivatization=*/true, symTable);
dsp.processStep1(&parallelClauseOps);
EntryBlockArgs parallelArgs;
parallelArgs.priv.syms = dsp.getDelayedPrivSymbols();
parallelArgs.priv.vars = parallelClauseOps.privateVars;
parallelArgs.reduction.syms = parallelReductionSyms;
parallelArgs.reduction.vars = parallelClauseOps.reductionVars;
genParallelOp(converter, symTable, semaCtx, eval, loc, queue, parallelItem,
parallelClauseOps, parallelArgs, &dsp, /*isComposite=*/true);
// Clause processing.
mlir::omp::DistributeOperands distributeClauseOps;
genDistributeClauses(converter, semaCtx, stmtCtx, distributeItem->clauses,
loc, distributeClauseOps);
mlir::omp::WsloopOperands wsloopClauseOps;
llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms;
genWsloopClauses(converter, semaCtx, stmtCtx, doItem->clauses, loc,
wsloopClauseOps, wsloopReductionSyms);
mlir::omp::LoopNestOperands loopNestClauseOps;
llvm::SmallVector<const semantics::Symbol *> iv;
genLoopNestClauses(converter, semaCtx, eval, doItem->clauses, loc,
loopNestClauseOps, iv);
// Operation creation.
EntryBlockArgs distributeArgs;
// TODO: Add private syms and vars.
auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>(
converter, loc, distributeClauseOps, distributeArgs);
distributeOp.setComposite(/*val=*/true);
EntryBlockArgs wsloopArgs;
// TODO: Add private syms and vars.
wsloopArgs.reduction.syms = wsloopReductionSyms;
wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars;
auto wsloopOp = genWrapperOp<mlir::omp::WsloopOp>(
converter, loc, wsloopClauseOps, wsloopArgs);
wsloopOp.setComposite(/*val=*/true);
genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, doItem,
loopNestClauseOps, iv,
{{distributeOp, distributeArgs}, {wsloopOp, wsloopArgs}},
llvm::omp::Directive::OMPD_distribute_parallel_do, dsp);
return distributeOp;
}
static mlir::omp::DistributeOp genCompositeDistributeParallelDoSimd(
lower::AbstractConverter &converter, lower::SymMap &symTable,
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
assert(std::distance(item, queue.end()) == 4 && "Invalid leaf constructs");
ConstructQueue::const_iterator distributeItem = item;
ConstructQueue::const_iterator parallelItem = std::next(distributeItem);
ConstructQueue::const_iterator doItem = std::next(parallelItem);
ConstructQueue::const_iterator simdItem = std::next(doItem);
// Create parent omp.parallel first.
mlir::omp::ParallelOperands parallelClauseOps;
llvm::SmallVector<const semantics::Symbol *> parallelReductionSyms;
genParallelClauses(converter, semaCtx, stmtCtx, parallelItem->clauses, loc,
parallelClauseOps, parallelReductionSyms);
DataSharingProcessor parallelItemDSP(
converter, semaCtx, parallelItem->clauses, eval,
/*shouldCollectPreDeterminedSymbols=*/false,
/*useDelayedPrivatization=*/true, symTable);
parallelItemDSP.processStep1(&parallelClauseOps);
EntryBlockArgs parallelArgs;
parallelArgs.priv.syms = parallelItemDSP.getDelayedPrivSymbols();
parallelArgs.priv.vars = parallelClauseOps.privateVars;
parallelArgs.reduction.syms = parallelReductionSyms;
parallelArgs.reduction.vars = parallelClauseOps.reductionVars;
genParallelOp(converter, symTable, semaCtx, eval, loc, queue, parallelItem,
parallelClauseOps, parallelArgs, &parallelItemDSP,
/*isComposite=*/true);
// Clause processing.
mlir::omp::DistributeOperands distributeClauseOps;
genDistributeClauses(converter, semaCtx, stmtCtx, distributeItem->clauses,
loc, distributeClauseOps);
mlir::omp::WsloopOperands wsloopClauseOps;
llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms;
genWsloopClauses(converter, semaCtx, stmtCtx, doItem->clauses, loc,
wsloopClauseOps, wsloopReductionSyms);
mlir::omp::SimdOperands simdClauseOps;
llvm::SmallVector<const semantics::Symbol *> simdReductionSyms;
genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps,
simdReductionSyms);
DataSharingProcessor simdItemDSP(converter, semaCtx, simdItem->clauses, eval,
/*shouldCollectPreDeterminedSymbols=*/true,
/*useDelayedPrivatization=*/true, symTable);
simdItemDSP.processStep1(&simdClauseOps);
mlir::omp::LoopNestOperands loopNestClauseOps;
llvm::SmallVector<const semantics::Symbol *> iv;
genLoopNestClauses(converter, semaCtx, eval, simdItem->clauses, loc,
loopNestClauseOps, iv);
// Operation creation.
EntryBlockArgs distributeArgs;
// TODO: Add private syms and vars.
auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>(
converter, loc, distributeClauseOps, distributeArgs);
distributeOp.setComposite(/*val=*/true);
EntryBlockArgs wsloopArgs;
// TODO: Add private syms and vars.
wsloopArgs.reduction.syms = wsloopReductionSyms;
wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars;
auto wsloopOp = genWrapperOp<mlir::omp::WsloopOp>(
converter, loc, wsloopClauseOps, wsloopArgs);
wsloopOp.setComposite(/*val=*/true);
EntryBlockArgs simdArgs;
simdArgs.priv.syms = simdItemDSP.getDelayedPrivSymbols();
simdArgs.priv.vars = simdClauseOps.privateVars;
simdArgs.reduction.syms = simdReductionSyms;
simdArgs.reduction.vars = simdClauseOps.reductionVars;
auto simdOp =
genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs);
simdOp.setComposite(/*val=*/true);
genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem,
loopNestClauseOps, iv,
{{distributeOp, distributeArgs},
{wsloopOp, wsloopArgs},
{simdOp, simdArgs}},
llvm::omp::Directive::OMPD_distribute_parallel_do_simd,
simdItemDSP);
return distributeOp;
}
static mlir::omp::DistributeOp genCompositeDistributeSimd(
lower::AbstractConverter &converter, lower::SymMap &symTable,
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
ConstructQueue::const_iterator distributeItem = item;
ConstructQueue::const_iterator simdItem = std::next(distributeItem);
// Clause processing.
mlir::omp::DistributeOperands distributeClauseOps;
genDistributeClauses(converter, semaCtx, stmtCtx, distributeItem->clauses,
loc, distributeClauseOps);
mlir::omp::SimdOperands simdClauseOps;
llvm::SmallVector<const semantics::Symbol *> simdReductionSyms;
genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps,
simdReductionSyms);
// TODO: Support delayed privatization.
DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval,
/*shouldCollectPreDeterminedSymbols=*/true,
/*useDelayedPrivatization=*/false, symTable);
dsp.processStep1();
// Pass the innermost leaf construct's clauses because that's where COLLAPSE
// is placed by construct decomposition.
mlir::omp::LoopNestOperands loopNestClauseOps;
llvm::SmallVector<const semantics::Symbol *> iv;
genLoopNestClauses(converter, semaCtx, eval, simdItem->clauses, loc,
loopNestClauseOps, iv);
// Operation creation.
EntryBlockArgs distributeArgs;
// TODO: Add private syms and vars.
auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>(
converter, loc, distributeClauseOps, distributeArgs);
distributeOp.setComposite(/*val=*/true);
EntryBlockArgs simdArgs;
// TODO: Add private syms and vars.
simdArgs.reduction.syms = simdReductionSyms;
simdArgs.reduction.vars = simdClauseOps.reductionVars;
auto simdOp =
genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs);
simdOp.setComposite(/*val=*/true);
genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem,
loopNestClauseOps, iv,
{{distributeOp, distributeArgs}, {simdOp, simdArgs}},
llvm::omp::Directive::OMPD_distribute_simd, dsp);
return distributeOp;
}
static mlir::omp::WsloopOp genCompositeDoSimd(
lower::AbstractConverter &converter, lower::SymMap &symTable,
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
ConstructQueue::const_iterator doItem = item;
ConstructQueue::const_iterator simdItem = std::next(doItem);
// Clause processing.
mlir::omp::WsloopOperands wsloopClauseOps;
llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms;
genWsloopClauses(converter, semaCtx, stmtCtx, doItem->clauses, loc,
wsloopClauseOps, wsloopReductionSyms);
mlir::omp::SimdOperands simdClauseOps;
llvm::SmallVector<const semantics::Symbol *> simdReductionSyms;
genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps,
simdReductionSyms);
// TODO: Support delayed privatization.
DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval,
/*shouldCollectPreDeterminedSymbols=*/true,
/*useDelayedPrivatization=*/false, symTable);
dsp.processStep1();
// Pass the innermost leaf construct's clauses because that's where COLLAPSE
// is placed by construct decomposition.
mlir::omp::LoopNestOperands loopNestClauseOps;
llvm::SmallVector<const semantics::Symbol *> iv;
genLoopNestClauses(converter, semaCtx, eval, simdItem->clauses, loc,
loopNestClauseOps, iv);
// Operation creation.
EntryBlockArgs wsloopArgs;
// TODO: Add private syms and vars.
wsloopArgs.reduction.syms = wsloopReductionSyms;
wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars;
auto wsloopOp = genWrapperOp<mlir::omp::WsloopOp>(
converter, loc, wsloopClauseOps, wsloopArgs);
wsloopOp.setComposite(/*val=*/true);
EntryBlockArgs simdArgs;
// TODO: Add private syms and vars.
simdArgs.reduction.syms = simdReductionSyms;
simdArgs.reduction.vars = simdClauseOps.reductionVars;
auto simdOp =
genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs);
simdOp.setComposite(/*val=*/true);
genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem,
loopNestClauseOps, iv,
{{wsloopOp, wsloopArgs}, {simdOp, simdArgs}},
llvm::omp::Directive::OMPD_do_simd, dsp);
return wsloopOp;
}
static mlir::omp::TaskloopOp genCompositeTaskloopSimd(
lower::AbstractConverter &converter, lower::SymMap &symTable,
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
TODO(loc, "Composite TASKLOOP SIMD");
return nullptr;
}
//===----------------------------------------------------------------------===//
// Dispatch
//===----------------------------------------------------------------------===//
static bool genOMPCompositeDispatch(
lower::AbstractConverter &converter, lower::SymMap &symTable,
lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue, ConstructQueue::const_iterator item,
mlir::Operation *&newOp) {
using llvm::omp::Directive;
using lower::omp::matchLeafSequence;
// TODO: Privatization for composite constructs is currently only done based
// on the clauses for their last leaf construct, which may not always be
// correct. Consider per-leaf privatization of composite constructs once
// delayed privatization is supported by all participating ops.
if (matchLeafSequence(item, queue, Directive::OMPD_distribute_parallel_do))
newOp = genCompositeDistributeParallelDo(converter, symTable, stmtCtx,
semaCtx, eval, loc, queue, item);
else if (matchLeafSequence(item, queue,
Directive::OMPD_distribute_parallel_do_simd))
newOp = genCompositeDistributeParallelDoSimd(
converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
else if (matchLeafSequence(item, queue, Directive::OMPD_distribute_simd))
newOp = genCompositeDistributeSimd(converter, symTable, stmtCtx, semaCtx,
eval, loc, queue, item);
else if (matchLeafSequence(item, queue, Directive::OMPD_do_simd))
newOp = genCompositeDoSimd(converter, symTable, stmtCtx, semaCtx, eval, loc,
queue, item);
else if (matchLeafSequence(item, queue, Directive::OMPD_taskloop_simd))
newOp = genCompositeTaskloopSimd(converter, symTable, stmtCtx, semaCtx,
eval, loc, queue, item);
else
return false;
return true;
}
static void genOMPDispatch(lower::AbstractConverter &converter,
lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval, mlir::Location loc,
const ConstructQueue &queue,
ConstructQueue::const_iterator item) {
assert(item != queue.end());
lower::StatementContext stmtCtx;
mlir::Operation *newOp = nullptr;
// Generate cleanup code for the stmtCtx after newOp
auto finalizeStmtCtx = [&]() {
if (newOp) {
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
fir::FirOpBuilder::InsertionGuard guard(builder);
builder.setInsertionPointAfter(newOp);
stmtCtx.finalizeAndPop();
}
};
bool loopLeaf = llvm::omp::getDirectiveAssociation(item->id) ==
llvm::omp::Association::Loop;
if (loopLeaf) {
symTable.pushScope();
if (genOMPCompositeDispatch(converter, symTable, stmtCtx, semaCtx, eval,
loc, queue, item, newOp)) {
symTable.popScope();
finalizeStmtCtx();
return;
}
}
switch (llvm::omp::Directive dir = item->id) {
case llvm::omp::Directive::OMPD_barrier:
newOp = genBarrierOp(converter, symTable, semaCtx, eval, loc, queue, item);
break;
case llvm::omp::Directive::OMPD_distribute:
newOp = genStandaloneDistribute(converter, symTable, stmtCtx, semaCtx, eval,
loc, queue, item);
break;
case llvm::omp::Directive::OMPD_do:
newOp = genStandaloneDo(converter, symTable, stmtCtx, semaCtx, eval, loc,
queue, item);
break;
case llvm::omp::Directive::OMPD_loop:
newOp = genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item);
break;
case llvm::omp::Directive::OMPD_masked:
newOp = genMaskedOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue,
item);
break;
case llvm::omp::Directive::OMPD_master:
newOp = genMasterOp(converter, symTable, semaCtx, eval, loc, queue, item);
break;
case llvm::omp::Directive::OMPD_ordered:
// Block-associated "ordered" construct.
newOp = genOrderedRegionOp(converter, symTable, semaCtx, eval, loc, queue,
item);
break;
case llvm::omp::Directive::OMPD_parallel:
newOp = genStandaloneParallel(converter, symTable, stmtCtx, semaCtx, eval,
loc, queue, item);
break;
case llvm::omp::Directive::OMPD_scan:
newOp = genScanOp(converter, symTable, semaCtx, loc, queue, item);
break;
case llvm::omp::Directive::OMPD_section:
llvm_unreachable("genOMPDispatch: OMPD_section");
// Lowered in the enclosing genSectionsOp.
break;
case llvm::omp::Directive::OMPD_sections:
// Called directly from genOMP([...], OpenMPSectionsConstruct) because it
// has a different prototype.
// This code path is still taken when iterating through the construct queue
// in genBodyOfOp
break;
case llvm::omp::Directive::OMPD_simd:
newOp =
genStandaloneSimd(converter, symTable, semaCtx, eval, loc, queue, item);
break;
case llvm::omp::Directive::OMPD_scope:
newOp = genScopeOp(converter, symTable, semaCtx, eval, loc, queue, item);
break;
case llvm::omp::Directive::OMPD_single:
newOp = genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
break;
case llvm::omp::Directive::OMPD_target:
newOp = genTargetOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue,
item);
break;
case llvm::omp::Directive::OMPD_target_data:
newOp = genTargetDataOp(converter, symTable, stmtCtx, semaCtx, eval, loc,
queue, item);
break;
case llvm::omp::Directive::OMPD_target_enter_data:
newOp = genTargetEnterExitUpdateDataOp<mlir::omp::TargetEnterDataOp>(
converter, symTable, stmtCtx, semaCtx, loc, queue, item);
break;
case llvm::omp::Directive::OMPD_target_exit_data:
newOp = genTargetEnterExitUpdateDataOp<mlir::omp::TargetExitDataOp>(
converter, symTable, stmtCtx, semaCtx, loc, queue, item);
break;
case llvm::omp::Directive::OMPD_target_update:
newOp = genTargetEnterExitUpdateDataOp<mlir::omp::TargetUpdateOp>(
converter, symTable, stmtCtx, semaCtx, loc, queue, item);
break;
case llvm::omp::Directive::OMPD_task:
newOp = genTaskOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue,
item);
break;
case llvm::omp::Directive::OMPD_taskgroup:
newOp =
genTaskgroupOp(converter, symTable, semaCtx, eval, loc, queue, item);
break;
case llvm::omp::Directive::OMPD_taskloop:
newOp = genStandaloneTaskloop(converter, symTable, semaCtx, eval, loc,
queue, item);
break;
case llvm::omp::Directive::OMPD_taskwait:
newOp = genTaskwaitOp(converter, symTable, semaCtx, eval, loc, queue, item);
break;
case llvm::omp::Directive::OMPD_taskyield:
newOp =
genTaskyieldOp(converter, symTable, semaCtx, eval, loc, queue, item);
break;
case llvm::omp::Directive::OMPD_teams:
newOp = genTeamsOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue,
item);
break;
case llvm::omp::Directive::OMPD_tile:
case llvm::omp::Directive::OMPD_unroll:
TODO(loc, "Unhandled loop directive (" +
llvm::omp::getOpenMPDirectiveName(dir) + ")");
// case llvm::omp::Directive::OMPD_workdistribute:
case llvm::omp::Directive::OMPD_workshare:
newOp = genWorkshareOp(converter, symTable, stmtCtx, semaCtx, eval, loc,
queue, item);
break;
default:
// Combined and composite constructs should have been split into a sequence
// of leaf constructs when building the construct queue.
assert(!llvm::omp::isLeafConstruct(dir) &&
"Unexpected compound construct.");
break;
}
finalizeStmtCtx();
if (loopLeaf)
symTable.popScope();
}
//===----------------------------------------------------------------------===//
// OpenMPDeclarativeConstruct visitors
//===----------------------------------------------------------------------===//
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPUtilityConstruct &);
static void
genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
const parser::OpenMPDeclarativeAllocate &declarativeAllocate) {
TODO(converter.getCurrentLocation(), "OpenMPDeclarativeAllocate");
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPDeclarativeAssumes &assumesConstruct) {
TODO(converter.getCurrentLocation(), "OpenMP ASSUMES declaration");
}
static void
genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
const parser::OmpDeclareVariantDirective &declareVariantDirective) {
TODO(converter.getCurrentLocation(), "OmpDeclareVariantDirective");
}
static void genOMP(
lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
const parser::OpenMPDeclareReductionConstruct &declareReductionConstruct) {
TODO(converter.getCurrentLocation(), "OpenMPDeclareReductionConstruct");
}
static void
genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
const parser::OpenMPDeclareSimdConstruct &declareSimdConstruct) {
TODO(converter.getCurrentLocation(), "OpenMPDeclareSimdConstruct");
}
static void
genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
const parser::OpenMPDeclareMapperConstruct &declareMapperConstruct) {
mlir::Location loc = converter.genLocation(declareMapperConstruct.source);
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
lower::StatementContext stmtCtx;
const auto &spec =
std::get<parser::OmpMapperSpecifier>(declareMapperConstruct.t);
const auto &mapperName{std::get<std::optional<parser::Name>>(spec.t)};
const auto &varType{std::get<parser::TypeSpec>(spec.t)};
const auto &varName{std::get<parser::Name>(spec.t)};
assert(varType.declTypeSpec->category() ==
semantics::DeclTypeSpec::Category::TypeDerived &&
"Expected derived type");
std::string mapperNameStr;
if (mapperName.has_value()) {
mapperNameStr = mapperName->ToString();
mapperNameStr =
converter.mangleName(mapperNameStr, mapperName->symbol->owner());
} else {
mapperNameStr =
varType.declTypeSpec->derivedTypeSpec().name().ToString() + ".default";
mapperNameStr = converter.mangleName(
mapperNameStr, *varType.declTypeSpec->derivedTypeSpec().GetScope());
}
// Save current insertion point before moving to the module scope to create
// the DeclareMapperOp
mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
firOpBuilder.setInsertionPointToStart(converter.getModuleOp().getBody());
auto mlirType = converter.genType(varType.declTypeSpec->derivedTypeSpec());
auto declMapperOp = firOpBuilder.create<mlir::omp::DeclareMapperOp>(
loc, mapperNameStr, mlirType);
auto &region = declMapperOp.getRegion();
firOpBuilder.createBlock(&region);
auto varVal = region.addArgument(firOpBuilder.getRefType(mlirType), loc);
converter.bindSymbol(*varName.symbol, varVal);
// Populate the declareMapper region with the map information.
mlir::omp::DeclareMapperInfoOperands clauseOps;
const auto *clauseList{
parser::Unwrap<parser::OmpClauseList>(declareMapperConstruct.t)};
List<Clause> clauses = makeClauses(*clauseList, semaCtx);
ClauseProcessor cp(converter, semaCtx, clauses);
cp.processMap(loc, stmtCtx, clauseOps);
firOpBuilder.create<mlir::omp::DeclareMapperInfoOp>(loc, clauseOps.mapVars);
}
static void
genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct) {
mlir::omp::DeclareTargetOperands clauseOps;
llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct,
clauseOps, symbolAndClause);
for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
mlir::Operation *op = mod.lookupSymbol(
converter.mangleName(std::get<const semantics::Symbol &>(symClause)));
// Some symbols are deferred until later in the module, these are handled
// upon finalization of the module for OpenMP inside of Bridge, so we simply
// skip for now.
if (!op)
continue;
markDeclareTarget(
op, converter,
std::get<mlir::omp::DeclareTargetCaptureClause>(symClause),
clauseOps.deviceType);
}
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPRequiresConstruct &requiresConstruct) {
// Requires directives are gathered and processed in semantics and
// then combined in the lowering bridge before triggering codegen
// just once. Hence, there is no need to lower each individual
// occurrence here.
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPThreadprivate &threadprivate) {
// The directive is lowered when instantiating the variable to
// support the case of threadprivate variable declared in module.
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OmpMetadirectiveDirective &meta) {
TODO(converter.getCurrentLocation(), "METADIRECTIVE");
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPDeclarativeConstruct &ompDeclConstruct) {
Fortran::common::visit(
[&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); },
ompDeclConstruct.u);
}
//===----------------------------------------------------------------------===//
// OpenMPStandaloneConstruct visitors
//===----------------------------------------------------------------------===//
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPSimpleStandaloneConstruct &construct) {
const auto &directive = std::get<parser::OmpDirectiveName>(construct.v.t);
List<Clause> clauses = makeClauses(construct.v.Clauses(), semaCtx);
mlir::Location currentLocation = converter.genLocation(directive.source);
ConstructQueue queue{
buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
eval, directive.source, directive.v, clauses)};
if (directive.v == llvm::omp::Directive::OMPD_ordered) {
// Standalone "ordered" directive.
genOrderedOp(converter, symTable, semaCtx, eval, currentLocation, queue,
queue.begin());
} else {
// Dispatch handles the "block-associated" variant of "ordered".
genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
queue.begin());
}
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPFlushConstruct &construct) {
const auto &argumentList = construct.v.Arguments();
const auto &clauseList = construct.v.Clauses();
ObjectList objects = makeObjects(argumentList, semaCtx);
List<Clause> clauses =
makeList(clauseList.v, [&](auto &&s) { return makeClause(s, semaCtx); });
mlir::Location currentLocation = converter.genLocation(construct.source);
ConstructQueue queue{buildConstructQueue(
converter.getFirOpBuilder().getModule(), semaCtx, eval, construct.source,
llvm::omp::Directive::OMPD_flush, clauses)};
genFlushOp(converter, symTable, semaCtx, eval, currentLocation, objects,
queue, queue.begin());
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPCancelConstruct &cancelConstruct) {
List<Clause> clauses = makeList(cancelConstruct.v.Clauses().v, [&](auto &&s) {
return makeClause(s, semaCtx);
});
mlir::Location loc = converter.genLocation(cancelConstruct.source);
ConstructQueue queue{buildConstructQueue(
converter.getFirOpBuilder().getModule(), semaCtx, eval,
cancelConstruct.source, llvm::omp::Directive::OMPD_cancel, clauses)};
genCancelOp(converter, semaCtx, eval, loc, queue, queue.begin());
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPCancellationPointConstruct
&cancellationPointConstruct) {
List<Clause> clauses =
makeList(cancellationPointConstruct.v.Clauses().v,
[&](auto &&s) { return makeClause(s, semaCtx); });
mlir::Location loc = converter.genLocation(cancellationPointConstruct.source);
ConstructQueue queue{
buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
eval, cancellationPointConstruct.source,
llvm::omp::Directive::OMPD_cancel, clauses)};
genCancellationPointOp(converter, semaCtx, eval, loc, queue, queue.begin());
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPDepobjConstruct &construct) {
// These values will be ignored until the construct itself is implemented,
// but run them anyway for the sake of testing (via a Todo test).
ObjectList objects = makeObjects(construct.v.Arguments(), semaCtx);
assert(objects.size() == 1);
List<Clause> clauses = makeClauses(construct.v.Clauses(), semaCtx);
assert(clauses.size() == 1);
(void)objects;
(void)clauses;
TODO(converter.getCurrentLocation(), "OpenMPDepobjConstruct");
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPInteropConstruct &interopConstruct) {
TODO(converter.getCurrentLocation(), "OpenMPInteropConstruct");
}
static void
genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
const parser::OpenMPStandaloneConstruct &standaloneConstruct) {
Fortran::common::visit(
[&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); },
standaloneConstruct.u);
}
//===----------------------------------------------------------------------===//
// OpenMPConstruct visitors
//===----------------------------------------------------------------------===//
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPAllocatorsConstruct &allocsConstruct) {
TODO(converter.getCurrentLocation(), "OpenMPAllocatorsConstruct");
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPAtomicConstruct &atomicConstruct) {
Fortran::common::visit(
common::visitors{
[&](const parser::OmpAtomicRead &atomicRead) {
mlir::Location loc = converter.genLocation(atomicRead.source);
genAtomicRead(converter, atomicRead, loc);
},
[&](const parser::OmpAtomicWrite &atomicWrite) {
mlir::Location loc = converter.genLocation(atomicWrite.source);
genAtomicWrite(converter, atomicWrite, loc);
},
[&](const parser::OmpAtomic &atomicConstruct) {
mlir::Location loc = converter.genLocation(atomicConstruct.source);
genOmpAtomic(converter, atomicConstruct, loc);
},
[&](const parser::OmpAtomicUpdate &atomicUpdate) {
mlir::Location loc = converter.genLocation(atomicUpdate.source);
genAtomicUpdate(converter, atomicUpdate, loc);
},
[&](const parser::OmpAtomicCapture &atomicCapture) {
mlir::Location loc = converter.genLocation(atomicCapture.source);
genAtomicCapture(converter, atomicCapture, loc);
},
[&](const parser::OmpAtomicCompare &atomicCompare) {
mlir::Location loc = converter.genLocation(atomicCompare.source);
TODO(loc, "OpenMP atomic compare");
},
},
atomicConstruct.u);
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPBlockConstruct &blockConstruct) {
const auto &beginBlockDirective =
std::get<parser::OmpBeginBlockDirective>(blockConstruct.t);
const auto &endBlockDirective =
std::get<parser::OmpEndBlockDirective>(blockConstruct.t);
mlir::Location currentLocation =
converter.genLocation(beginBlockDirective.source);
const auto origDirective =
std::get<parser::OmpBlockDirective>(beginBlockDirective.t).v;
List<Clause> clauses = makeClauses(
std::get<parser::OmpClauseList>(beginBlockDirective.t), semaCtx);
clauses.append(makeClauses(
std::get<parser::OmpClauseList>(endBlockDirective.t), semaCtx));
assert(llvm::omp::blockConstructSet.test(origDirective) &&
"Expected block construct");
(void)origDirective;
for (const Clause &clause : clauses) {
mlir::Location clauseLocation = converter.genLocation(clause.source);
if (!std::holds_alternative<clause::Affinity>(clause.u) &&
!std::holds_alternative<clause::Allocate>(clause.u) &&
!std::holds_alternative<clause::Copyin>(clause.u) &&
!std::holds_alternative<clause::Copyprivate>(clause.u) &&
!std::holds_alternative<clause::Default>(clause.u) &&
!std::holds_alternative<clause::Depend>(clause.u) &&
!std::holds_alternative<clause::Filter>(clause.u) &&
!std::holds_alternative<clause::Final>(clause.u) &&
!std::holds_alternative<clause::Firstprivate>(clause.u) &&
!std::holds_alternative<clause::HasDeviceAddr>(clause.u) &&
!std::holds_alternative<clause::If>(clause.u) &&
!std::holds_alternative<clause::IsDevicePtr>(clause.u) &&
!std::holds_alternative<clause::Map>(clause.u) &&
!std::holds_alternative<clause::Nowait>(clause.u) &&
!std::holds_alternative<clause::NumTeams>(clause.u) &&
!std::holds_alternative<clause::NumThreads>(clause.u) &&
!std::holds_alternative<clause::OmpxBare>(clause.u) &&
!std::holds_alternative<clause::Priority>(clause.u) &&
!std::holds_alternative<clause::Private>(clause.u) &&
!std::holds_alternative<clause::ProcBind>(clause.u) &&
!std::holds_alternative<clause::Reduction>(clause.u) &&
!std::holds_alternative<clause::Shared>(clause.u) &&
!std::holds_alternative<clause::Simd>(clause.u) &&
!std::holds_alternative<clause::ThreadLimit>(clause.u) &&
!std::holds_alternative<clause::Threads>(clause.u) &&
!std::holds_alternative<clause::UseDeviceAddr>(clause.u) &&
!std::holds_alternative<clause::UseDevicePtr>(clause.u) &&
!std::holds_alternative<clause::InReduction>(clause.u) &&
!std::holds_alternative<clause::Mergeable>(clause.u) &&
!std::holds_alternative<clause::Untied>(clause.u) &&
!std::holds_alternative<clause::TaskReduction>(clause.u) &&
!std::holds_alternative<clause::Detach>(clause.u)) {
std::string name =
parser::ToUpperCaseLetters(llvm::omp::getOpenMPClauseName(clause.id));
TODO(clauseLocation, name + " clause is not implemented yet");
}
}
llvm::omp::Directive directive =
std::get<parser::OmpBlockDirective>(beginBlockDirective.t).v;
const parser::CharBlock &source =
std::get<parser::OmpBlockDirective>(beginBlockDirective.t).source;
ConstructQueue queue{
buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
eval, source, directive, clauses)};
genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
queue.begin());
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPAssumeConstruct &assumeConstruct) {
mlir::Location clauseLocation = converter.genLocation(assumeConstruct.source);
TODO(clauseLocation, "OpenMP ASSUME construct");
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPCriticalConstruct &criticalConstruct) {
const auto &cd = std::get<parser::OmpCriticalDirective>(criticalConstruct.t);
List<Clause> clauses =
makeClauses(std::get<parser::OmpClauseList>(cd.t), semaCtx);
ConstructQueue queue{buildConstructQueue(
converter.getFirOpBuilder().getModule(), semaCtx, eval, cd.source,
llvm::omp::Directive::OMPD_critical, clauses)};
const auto &name = std::get<std::optional<parser::Name>>(cd.t);
mlir::Location currentLocation = converter.getCurrentLocation();
genCriticalOp(converter, symTable, semaCtx, eval, currentLocation, queue,
queue.begin(), name);
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPUtilityConstruct &) {
TODO(converter.getCurrentLocation(), "OpenMPUtilityConstruct");
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPDispatchConstruct &) {
TODO(converter.getCurrentLocation(), "OpenMPDispatchConstruct");
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPExecutableAllocate &execAllocConstruct) {
TODO(converter.getCurrentLocation(), "OpenMPExecutableAllocate");
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPLoopConstruct &loopConstruct) {
const auto &beginLoopDirective =
std::get<parser::OmpBeginLoopDirective>(loopConstruct.t);
List<Clause> clauses = makeClauses(
std::get<parser::OmpClauseList>(beginLoopDirective.t), semaCtx);
if (auto &endLoopDirective =
std::get<std::optional<parser::OmpEndLoopDirective>>(
loopConstruct.t)) {
clauses.append(makeClauses(
std::get<parser::OmpClauseList>(endLoopDirective->t), semaCtx));
}
mlir::Location currentLocation =
converter.genLocation(beginLoopDirective.source);
llvm::omp::Directive directive =
std::get<parser::OmpLoopDirective>(beginLoopDirective.t).v;
const parser::CharBlock &source =
std::get<parser::OmpLoopDirective>(beginLoopDirective.t).source;
ConstructQueue queue{
buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
eval, source, directive, clauses)};
genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
queue.begin());
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPSectionConstruct &sectionConstruct) {
// Do nothing here. SECTION is lowered inside of the lowering for Sections
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPSectionsConstruct &sectionsConstruct) {
const auto &beginSectionsDirective =
std::get<parser::OmpBeginSectionsDirective>(sectionsConstruct.t);
List<Clause> clauses = makeClauses(
std::get<parser::OmpClauseList>(beginSectionsDirective.t), semaCtx);
const auto &endSectionsDirective =
std::get<parser::OmpEndSectionsDirective>(sectionsConstruct.t);
const auto &sectionBlocks =
std::get<parser::OmpSectionBlocks>(sectionsConstruct.t);
clauses.append(makeClauses(
std::get<parser::OmpClauseList>(endSectionsDirective.t), semaCtx));
mlir::Location currentLocation = converter.getCurrentLocation();
llvm::omp::Directive directive =
std::get<parser::OmpSectionsDirective>(beginSectionsDirective.t).v;
const parser::CharBlock &source =
std::get<parser::OmpSectionsDirective>(beginSectionsDirective.t).source;
ConstructQueue queue{
buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
eval, source, directive, clauses)};
ConstructQueue::iterator next = queue.begin();
// Generate constructs that come first e.g. Parallel
while (next != queue.end() &&
next->id != llvm::omp::Directive::OMPD_sections) {
genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
next);
next = std::next(next);
}
// call genSectionsOp directly (not via genOMPDispatch) so that we can add the
// sectionBlocks argument
assert(next != queue.end());
assert(next->id == llvm::omp::Directive::OMPD_sections);
genSectionsOp(converter, symTable, semaCtx, eval, currentLocation, queue,
next, sectionBlocks);
assert(std::next(next) == queue.end());
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPConstruct &ompConstruct) {
Fortran::common::visit(
[&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); },
ompConstruct.u);
}
//===----------------------------------------------------------------------===//
// Public functions
//===----------------------------------------------------------------------===//
mlir::Operation *Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder,
mlir::Operation *op,
mlir::Location loc) {
if (mlir::isa<mlir::omp::AtomicUpdateOp, mlir::omp::DeclareReductionOp,
mlir::omp::LoopNestOp>(op))
return builder.create<mlir::omp::YieldOp>(loc);
return builder.create<mlir::omp::TerminatorOp>(loc);
}
void Fortran::lower::genOpenMPConstruct(lower::AbstractConverter &converter,
lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPConstruct &omp) {
lower::SymMapScope scope(symTable);
genOMP(converter, symTable, semaCtx, eval, omp);
}
void Fortran::lower::genOpenMPDeclarativeConstruct(
lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
const parser::OpenMPDeclarativeConstruct &omp) {
genOMP(converter, symTable, semaCtx, eval, omp);
genNestedEvaluations(converter, eval);
}
void Fortran::lower::genOpenMPSymbolProperties(
lower::AbstractConverter &converter, const lower::pft::Variable &var) {
assert(var.hasSymbol() && "Expecting Symbol");
const semantics::Symbol &sym = var.getSymbol();
if (sym.test(semantics::Symbol::Flag::OmpThreadprivate))
lower::genThreadprivateOp(converter, var);
if (sym.test(semantics::Symbol::Flag::OmpDeclareTarget))
lower::genDeclareTargetIntGlobal(converter, var);
}
int64_t
Fortran::lower::getCollapseValue(const parser::OmpClauseList &clauseList) {
for (const parser::OmpClause &clause : clauseList.v) {
if (const auto &collapseClause =
std::get_if<parser::OmpClause::Collapse>(&clause.u)) {
const auto *expr = semantics::GetExpr(collapseClause->v);
return evaluate::ToInt64(*expr).value();
}
}
return 1;
}
void Fortran::lower::genThreadprivateOp(lower::AbstractConverter &converter,
const lower::pft::Variable &var) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
mlir::Location currentLocation = converter.getCurrentLocation();
const semantics::Symbol &sym = var.getSymbol();
mlir::Value symThreadprivateValue;
if (const semantics::Symbol *common =
semantics::FindCommonBlockContaining(sym.GetUltimate())) {
mlir::Value commonValue = converter.getSymbolAddress(*common);
if (mlir::isa<mlir::omp::ThreadprivateOp>(commonValue.getDefiningOp())) {
// Generate ThreadprivateOp for a common block instead of its members and
// only do it once for a common block.
return;
}
// Generate ThreadprivateOp and rebind the common block.
mlir::Value commonThreadprivateValue =
firOpBuilder.create<mlir::omp::ThreadprivateOp>(
currentLocation, commonValue.getType(), commonValue);
converter.bindSymbol(*common, commonThreadprivateValue);
// Generate the threadprivate value for the common block member.
symThreadprivateValue = genCommonBlockMember(converter, currentLocation,
sym, commonThreadprivateValue);
} else if (!var.isGlobal()) {
// Non-global variable which can be in threadprivate directive must be one
// variable in main program, and it has implicit SAVE attribute. Take it as
// with SAVE attribute, so to create GlobalOp for it to simplify the
// translation to LLVM IR.
// Avoids performing multiple globalInitializations.
fir::GlobalOp global;
auto module = converter.getModuleOp();
std::string globalName = converter.mangleName(sym);
if (module.lookupSymbol<fir::GlobalOp>(globalName))
global = module.lookupSymbol<fir::GlobalOp>(globalName);
else
global = globalInitialization(converter, firOpBuilder, sym, var,
currentLocation);
mlir::Value symValue = firOpBuilder.create<fir::AddrOfOp>(
currentLocation, global.resultType(), global.getSymbol());
symThreadprivateValue = firOpBuilder.create<mlir::omp::ThreadprivateOp>(
currentLocation, symValue.getType(), symValue);
} else {
mlir::Value symValue = converter.getSymbolAddress(sym);
// The symbol may be use-associated multiple times, and nothing needs to be
// done after the original symbol is mapped to the threadprivatized value
// for the first time. Use the threadprivatized value directly.
mlir::Operation *op;
if (auto declOp = symValue.getDefiningOp<hlfir::DeclareOp>())
op = declOp.getMemref().getDefiningOp();
else
op = symValue.getDefiningOp();
if (mlir::isa<mlir::omp::ThreadprivateOp>(op))
return;
symThreadprivateValue = firOpBuilder.create<mlir::omp::ThreadprivateOp>(
currentLocation, symValue.getType(), symValue);
}
fir::ExtendedValue sexv = converter.getSymbolExtendedValue(sym);
fir::ExtendedValue symThreadprivateExv =
getExtendedValue(sexv, symThreadprivateValue);
converter.bindSymbol(sym, symThreadprivateExv);
}
// This function replicates threadprivate's behaviour of generating
// an internal fir.GlobalOp for non-global variables in the main program
// that have the implicit SAVE attribute, to simplifiy LLVM-IR and MLIR
// generation.
void Fortran::lower::genDeclareTargetIntGlobal(
lower::AbstractConverter &converter, const lower::pft::Variable &var) {
if (!var.isGlobal()) {
// A non-global variable which can be in a declare target directive must
// be a variable in the main program, and it has the implicit SAVE
// attribute. We create a GlobalOp for it to simplify the translation to
// LLVM IR.
globalInitialization(converter, converter.getFirOpBuilder(),
var.getSymbol(), var, converter.getCurrentLocation());
}
}
bool Fortran::lower::isOpenMPTargetConstruct(
const parser::OpenMPConstruct &omp) {
llvm::omp::Directive dir = llvm::omp::Directive::OMPD_unknown;
if (const auto *block = std::get_if<parser::OpenMPBlockConstruct>(&omp.u)) {
const auto &begin = std::get<parser::OmpBeginBlockDirective>(block->t);
dir = std::get<parser::OmpBlockDirective>(begin.t).v;
} else if (const auto *loop =
std::get_if<parser::OpenMPLoopConstruct>(&omp.u)) {
const auto &begin = std::get<parser::OmpBeginLoopDirective>(loop->t);
dir = std::get<parser::OmpLoopDirective>(begin.t).v;
}
return llvm::omp::allTargetSet.test(dir);
}
void Fortran::lower::gatherOpenMPDeferredDeclareTargets(
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPDeclarativeConstruct &ompDecl,
llvm::SmallVectorImpl<OMPDeferredDeclareTargetInfo>
&deferredDeclareTarget) {
Fortran::common::visit(
common::visitors{
[&](const parser::OpenMPDeclareTargetConstruct &ompReq) {
collectDeferredDeclareTargets(converter, semaCtx, eval, ompReq,
deferredDeclareTarget);
},
[&](const auto &) {},
},
ompDecl.u);
}
bool Fortran::lower::isOpenMPDeviceDeclareTarget(
lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OpenMPDeclarativeConstruct &ompDecl) {
return Fortran::common::visit(
common::visitors{
[&](const parser::OpenMPDeclareTargetConstruct &ompReq) {
mlir::omp::DeclareTargetDeviceType targetType =
getDeclareTargetFunctionDevice(converter, semaCtx, eval, ompReq)
.value_or(mlir::omp::DeclareTargetDeviceType::host);
return targetType != mlir::omp::DeclareTargetDeviceType::host;
},
[&](const auto &) { return false; },
},
ompDecl.u);
}
// In certain cases such as subroutine or function interfaces which declare
// but do not define or directly call the subroutine or function in the same
// module, their lowering is delayed until after the declare target construct
// itself is processed, so there symbol is not within the table.
//
// This function will also return true if we encounter any device declare
// target cases, to satisfy checking if we require the requires attributes
// on the module.
bool Fortran::lower::markOpenMPDeferredDeclareTargetFunctions(
mlir::Operation *mod,
llvm::SmallVectorImpl<OMPDeferredDeclareTargetInfo> &deferredDeclareTargets,
AbstractConverter &converter) {
bool deviceCodeFound = false;
auto modOp = llvm::cast<mlir::ModuleOp>(mod);
for (auto declTar : deferredDeclareTargets) {
mlir::Operation *op = modOp.lookupSymbol(converter.mangleName(declTar.sym));
// Due to interfaces being optionally emitted on usage in a module,
// not finding an operation at this point cannot be a hard error, we
// simply ignore it for now.
// TODO: Add semantic checks for detecting cases where an erronous
// (undefined) symbol has been supplied to a declare target clause
if (!op)
continue;
auto devType = declTar.declareTargetDeviceType;
if (!deviceCodeFound && devType != mlir::omp::DeclareTargetDeviceType::host)
deviceCodeFound = true;
markDeclareTarget(op, converter, declTar.declareTargetCaptureClause,
devType);
}
return deviceCodeFound;
}
void Fortran::lower::genOpenMPRequires(mlir::Operation *mod,
const semantics::Symbol *symbol) {
using MlirRequires = mlir::omp::ClauseRequires;
using SemaRequires = semantics::WithOmpDeclarative::RequiresFlag;
if (auto offloadMod =
llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(mod)) {
semantics::WithOmpDeclarative::RequiresFlags semaFlags;
if (symbol) {
common::visit(
[&](const auto &details) {
if constexpr (std::is_base_of_v<semantics::WithOmpDeclarative,
std::decay_t<decltype(details)>>) {
if (details.has_ompRequires())
semaFlags = *details.ompRequires();
}
},
symbol->details());
}
// Use pre-populated omp.requires module attribute if it was set, so that
// the "-fopenmp-force-usm" compiler option is honored.
MlirRequires mlirFlags = offloadMod.getRequires();
if (semaFlags.test(SemaRequires::ReverseOffload))
mlirFlags = mlirFlags | MlirRequires::reverse_offload;
if (semaFlags.test(SemaRequires::UnifiedAddress))
mlirFlags = mlirFlags | MlirRequires::unified_address;
if (semaFlags.test(SemaRequires::UnifiedSharedMemory))
mlirFlags = mlirFlags | MlirRequires::unified_shared_memory;
if (semaFlags.test(SemaRequires::DynamicAllocators))
mlirFlags = mlirFlags | MlirRequires::dynamic_allocators;
offloadMod.setRequires(mlirFlags);
}
}