blob: 75709b3c7e782171d8debc08fcebe8cb1e394152 [file] [log] [blame]
//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This provides a class for OpenMP runtime code generation.
//
//===----------------------------------------------------------------------===//
#include "CGOpenMPRuntime.h"
#include "CGCXXABI.h"
#include "CGCleanup.h"
#include "CGRecordLayout.h"
#include "CodeGenFunction.h"
#include "clang/AST/APValue.h"
#include "clang/AST/Attr.h"
#include "clang/AST/Decl.h"
#include "clang/AST/OpenMPClause.h"
#include "clang/AST/StmtOpenMP.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/BitmaskEnum.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/OpenMPKinds.h"
#include "clang/Basic/SourceManager.h"
#include "clang/CodeGen/ConstantInitBuilder.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <numeric>
using namespace clang;
using namespace CodeGen;
using namespace llvm::omp;
namespace {
/// Base class for handling code generation inside OpenMP regions.
class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
public:
/// Kinds of OpenMP regions used in codegen.
enum CGOpenMPRegionKind {
/// Region with outlined function for standalone 'parallel'
/// directive.
ParallelOutlinedRegion,
/// Region with outlined function for standalone 'task' directive.
TaskOutlinedRegion,
/// Region for constructs that do not require function outlining,
/// like 'for', 'sections', 'atomic' etc. directives.
InlinedRegion,
/// Region with outlined function for standalone 'target' directive.
TargetRegion,
};
CGOpenMPRegionInfo(const CapturedStmt &CS,
const CGOpenMPRegionKind RegionKind,
const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
bool HasCancel)
: CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
bool HasCancel)
: CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
Kind(Kind), HasCancel(HasCancel) {}
/// Get a variable or parameter for storing global thread id
/// inside OpenMP construct.
virtual const VarDecl *getThreadIDVariable() const = 0;
/// Emit the captured statement body.
void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
/// Get an LValue for the current ThreadID variable.
/// \return LValue for thread id variable. This LValue always has type int32*.
virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
bool hasCancel() const { return HasCancel; }
static bool classof(const CGCapturedStmtInfo *Info) {
return Info->getKind() == CR_OpenMP;
}
~CGOpenMPRegionInfo() override = default;
protected:
CGOpenMPRegionKind RegionKind;
RegionCodeGenTy CodeGen;
OpenMPDirectiveKind Kind;
bool HasCancel;
};
/// API for captured statement code generation in OpenMP constructs.
class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
public:
CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
const RegionCodeGenTy &CodeGen,
OpenMPDirectiveKind Kind, bool HasCancel,
StringRef HelperName)
: CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
HasCancel),
ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
}
/// Get a variable or parameter for storing global thread id
/// inside OpenMP construct.
const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
/// Get the name of the capture helper.
StringRef getHelperName() const override { return HelperName; }
static bool classof(const CGCapturedStmtInfo *Info) {
return CGOpenMPRegionInfo::classof(Info) &&
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
ParallelOutlinedRegion;
}
private:
/// A variable or parameter storing global thread id for OpenMP
/// constructs.
const VarDecl *ThreadIDVar;
StringRef HelperName;
};
/// API for captured statement code generation in OpenMP constructs.
class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
public:
class UntiedTaskActionTy final : public PrePostActionTy {
bool Untied;
const VarDecl *PartIDVar;
const RegionCodeGenTy UntiedCodeGen;
llvm::SwitchInst *UntiedSwitch = nullptr;
public:
UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
const RegionCodeGenTy &UntiedCodeGen)
: Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
void Enter(CodeGenFunction &CGF) override {
if (Untied) {
// Emit task switching point.
LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
CGF.GetAddrOfLocalVar(PartIDVar),
PartIDVar->getType()->castAs<PointerType>());
llvm::Value *Res =
CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
CGF.EmitBlock(DoneBB);
CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
UntiedSwitch->addCase(CGF.Builder.getInt32(0),
CGF.Builder.GetInsertBlock());
emitUntiedSwitch(CGF);
}
}
void emitUntiedSwitch(CodeGenFunction &CGF) const {
if (Untied) {
LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
CGF.GetAddrOfLocalVar(PartIDVar),
PartIDVar->getType()->castAs<PointerType>());
CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
PartIdLVal);
UntiedCodeGen(CGF);
CodeGenFunction::JumpDest CurPoint =
CGF.getJumpDestInCurrentScope(".untied.next.");
CGF.EmitBranch(CGF.ReturnBlock.getBlock());
CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
CGF.Builder.GetInsertBlock());
CGF.EmitBranchThroughCleanup(CurPoint);
CGF.EmitBlock(CurPoint.getBlock());
}
}
unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
};
CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
const VarDecl *ThreadIDVar,
const RegionCodeGenTy &CodeGen,
OpenMPDirectiveKind Kind, bool HasCancel,
const UntiedTaskActionTy &Action)
: CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
ThreadIDVar(ThreadIDVar), Action(Action) {
assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
}
/// Get a variable or parameter for storing global thread id
/// inside OpenMP construct.
const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
/// Get an LValue for the current ThreadID variable.
LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
/// Get the name of the capture helper.
StringRef getHelperName() const override { return ".omp_outlined."; }
void emitUntiedSwitch(CodeGenFunction &CGF) override {
Action.emitUntiedSwitch(CGF);
}
static bool classof(const CGCapturedStmtInfo *Info) {
return CGOpenMPRegionInfo::classof(Info) &&
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
TaskOutlinedRegion;
}
private:
/// A variable or parameter storing global thread id for OpenMP
/// constructs.
const VarDecl *ThreadIDVar;
/// Action for emitting code for untied tasks.
const UntiedTaskActionTy &Action;
};
/// API for inlined captured statement code generation in OpenMP
/// constructs.
class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
public:
CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
const RegionCodeGenTy &CodeGen,
OpenMPDirectiveKind Kind, bool HasCancel)
: CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
OldCSI(OldCSI),
OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
// Retrieve the value of the context parameter.
llvm::Value *getContextValue() const override {
if (OuterRegionInfo)
return OuterRegionInfo->getContextValue();
llvm_unreachable("No context value for inlined OpenMP region");
}
void setContextValue(llvm::Value *V) override {
if (OuterRegionInfo) {
OuterRegionInfo->setContextValue(V);
return;
}
llvm_unreachable("No context value for inlined OpenMP region");
}
/// Lookup the captured field decl for a variable.
const FieldDecl *lookup(const VarDecl *VD) const override {
if (OuterRegionInfo)
return OuterRegionInfo->lookup(VD);
// If there is no outer outlined region,no need to lookup in a list of
// captured variables, we can use the original one.
return nullptr;
}
FieldDecl *getThisFieldDecl() const override {
if (OuterRegionInfo)
return OuterRegionInfo->getThisFieldDecl();
return nullptr;
}
/// Get a variable or parameter for storing global thread id
/// inside OpenMP construct.
const VarDecl *getThreadIDVariable() const override {
if (OuterRegionInfo)
return OuterRegionInfo->getThreadIDVariable();
return nullptr;
}
/// Get an LValue for the current ThreadID variable.
LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
if (OuterRegionInfo)
return OuterRegionInfo->getThreadIDVariableLValue(CGF);
llvm_unreachable("No LValue for inlined OpenMP construct");
}
/// Get the name of the capture helper.
StringRef getHelperName() const override {
if (auto *OuterRegionInfo = getOldCSI())
return OuterRegionInfo->getHelperName();
llvm_unreachable("No helper name for inlined OpenMP construct");
}
void emitUntiedSwitch(CodeGenFunction &CGF) override {
if (OuterRegionInfo)
OuterRegionInfo->emitUntiedSwitch(CGF);
}
CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
static bool classof(const CGCapturedStmtInfo *Info) {
return CGOpenMPRegionInfo::classof(Info) &&
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
}
~CGOpenMPInlinedRegionInfo() override = default;
private:
/// CodeGen info about outer OpenMP region.
CodeGenFunction::CGCapturedStmtInfo *OldCSI;
CGOpenMPRegionInfo *OuterRegionInfo;
};
/// API for captured statement code generation in OpenMP target
/// constructs. For this captures, implicit parameters are used instead of the
/// captured fields. The name of the target region has to be unique in a given
/// application so it is provided by the client, because only the client has
/// the information to generate that.
class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
public:
CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
const RegionCodeGenTy &CodeGen, StringRef HelperName)
: CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
/*HasCancel=*/false),
HelperName(HelperName) {}
/// This is unused for target regions because each starts executing
/// with a single thread.
const VarDecl *getThreadIDVariable() const override { return nullptr; }
/// Get the name of the capture helper.
StringRef getHelperName() const override { return HelperName; }
static bool classof(const CGCapturedStmtInfo *Info) {
return CGOpenMPRegionInfo::classof(Info) &&
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
}
private:
StringRef HelperName;
};
static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
llvm_unreachable("No codegen for expressions");
}
/// API for generation of expressions captured in a innermost OpenMP
/// region.
class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
public:
CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
: CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
OMPD_unknown,
/*HasCancel=*/false),
PrivScope(CGF) {
// Make sure the globals captured in the provided statement are local by
// using the privatization logic. We assume the same variable is not
// captured more than once.
for (const auto &C : CS.captures()) {
if (!C.capturesVariable() && !C.capturesVariableByCopy())
continue;
const VarDecl *VD = C.getCapturedVar();
if (VD->isLocalVarDeclOrParm())
continue;
DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
/*RefersToEnclosingVariableOrCapture=*/false,
VD->getType().getNonReferenceType(), VK_LValue,
C.getLocation());
PrivScope.addPrivate(
VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
}
(void)PrivScope.Privatize();
}
/// Lookup the captured field decl for a variable.
const FieldDecl *lookup(const VarDecl *VD) const override {
if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
return FD;
return nullptr;
}
/// Emit the captured statement body.
void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
llvm_unreachable("No body for expressions");
}
/// Get a variable or parameter for storing global thread id
/// inside OpenMP construct.
const VarDecl *getThreadIDVariable() const override {
llvm_unreachable("No thread id for expressions");
}
/// Get the name of the capture helper.
StringRef getHelperName() const override {
llvm_unreachable("No helper name for expressions");
}
static bool classof(const CGCapturedStmtInfo *Info) { return false; }
private:
/// Private scope to capture global variables.
CodeGenFunction::OMPPrivateScope PrivScope;
};
/// RAII for emitting code of OpenMP constructs.
class InlinedOpenMPRegionRAII {
CodeGenFunction &CGF;
llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
FieldDecl *LambdaThisCaptureField = nullptr;
const CodeGen::CGBlockInfo *BlockInfo = nullptr;
bool NoInheritance = false;
public:
/// Constructs region for combined constructs.
/// \param CodeGen Code generation sequence for combined directives. Includes
/// a list of functions used for code generation of implicitly inlined
/// regions.
InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
OpenMPDirectiveKind Kind, bool HasCancel,
bool NoInheritance = true)
: CGF(CGF), NoInheritance(NoInheritance) {
// Start emission for the construct.
CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
if (NoInheritance) {
std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
LambdaThisCaptureField = CGF.LambdaThisCaptureField;
CGF.LambdaThisCaptureField = nullptr;
BlockInfo = CGF.BlockInfo;
CGF.BlockInfo = nullptr;
}
}
~InlinedOpenMPRegionRAII() {
// Restore original CapturedStmtInfo only if we're done with code emission.
auto *OldCSI =
cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
delete CGF.CapturedStmtInfo;
CGF.CapturedStmtInfo = OldCSI;
if (NoInheritance) {
std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
CGF.LambdaThisCaptureField = LambdaThisCaptureField;
CGF.BlockInfo = BlockInfo;
}
}
};
/// Values for bit flags used in the ident_t to describe the fields.
/// All enumeric elements are named and described in accordance with the code
/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
enum OpenMPLocationFlags : unsigned {
/// Use trampoline for internal microtask.
OMP_IDENT_IMD = 0x01,
/// Use c-style ident structure.
OMP_IDENT_KMPC = 0x02,
/// Atomic reduction option for kmpc_reduce.
OMP_ATOMIC_REDUCE = 0x10,
/// Explicit 'barrier' directive.
OMP_IDENT_BARRIER_EXPL = 0x20,
/// Implicit barrier in code.
OMP_IDENT_BARRIER_IMPL = 0x40,
/// Implicit barrier in 'for' directive.
OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
/// Implicit barrier in 'sections' directive.
OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
/// Implicit barrier in 'single' directive.
OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
/// Call of __kmp_for_static_init for static loop.
OMP_IDENT_WORK_LOOP = 0x200,
/// Call of __kmp_for_static_init for sections.
OMP_IDENT_WORK_SECTIONS = 0x400,
/// Call of __kmp_for_static_init for distribute.
OMP_IDENT_WORK_DISTRIBUTE = 0x800,
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
};
namespace {
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
/// Values for bit flags for marking which requires clauses have been used.
enum OpenMPOffloadingRequiresDirFlags : int64_t {
/// flag undefined.
OMP_REQ_UNDEFINED = 0x000,
/// no requires clause present.
OMP_REQ_NONE = 0x001,
/// reverse_offload clause.
OMP_REQ_REVERSE_OFFLOAD = 0x002,
/// unified_address clause.
OMP_REQ_UNIFIED_ADDRESS = 0x004,
/// unified_shared_memory clause.
OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
/// dynamic_allocators clause.
OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
};
enum OpenMPOffloadingReservedDeviceIDs {
/// Device ID if the device was not defined, runtime should get it
/// from environment variables in the spec.
OMP_DEVICEID_UNDEF = -1,
};
} // anonymous namespace
/// Describes ident structure that describes a source location.
/// All descriptions are taken from
/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
/// Original structure:
/// typedef struct ident {
/// kmp_int32 reserved_1; /**< might be used in Fortran;
/// see above */
/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
/// KMP_IDENT_KMPC identifies this union
/// member */
/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
/// see above */
///#if USE_ITT_BUILD
/// /* but currently used for storing
/// region-specific ITT */
/// /* contextual information. */
///#endif /* USE_ITT_BUILD */
/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
/// C++ */
/// char const *psource; /**< String describing the source location.
/// The string is composed of semi-colon separated
// fields which describe the source file,
/// the function and a pair of line numbers that
/// delimit the construct.
/// */
/// } ident_t;
enum IdentFieldIndex {
/// might be used in Fortran
IdentField_Reserved_1,
/// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
IdentField_Flags,
/// Not really used in Fortran any more
IdentField_Reserved_2,
/// Source[4] in Fortran, do not use for C++
IdentField_Reserved_3,
/// String describing the source location. The string is composed of
/// semi-colon separated fields which describe the source file, the function
/// and a pair of line numbers that delimit the construct.
IdentField_PSource
};
/// Schedule types for 'omp for' loops (these enumerators are taken from
/// the enum sched_type in kmp.h).
enum OpenMPSchedType {
/// Lower bound for default (unordered) versions.
OMP_sch_lower = 32,
OMP_sch_static_chunked = 33,
OMP_sch_static = 34,
OMP_sch_dynamic_chunked = 35,
OMP_sch_guided_chunked = 36,
OMP_sch_runtime = 37,
OMP_sch_auto = 38,
/// static with chunk adjustment (e.g., simd)
OMP_sch_static_balanced_chunked = 45,
/// Lower bound for 'ordered' versions.
OMP_ord_lower = 64,
OMP_ord_static_chunked = 65,
OMP_ord_static = 66,
OMP_ord_dynamic_chunked = 67,
OMP_ord_guided_chunked = 68,
OMP_ord_runtime = 69,
OMP_ord_auto = 70,
OMP_sch_default = OMP_sch_static,
/// dist_schedule types
OMP_dist_sch_static_chunked = 91,
OMP_dist_sch_static = 92,
/// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
/// Set if the monotonic schedule modifier was present.
OMP_sch_modifier_monotonic = (1 << 29),
/// Set if the nonmonotonic schedule modifier was present.
OMP_sch_modifier_nonmonotonic = (1 << 30),
};
/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
/// region.
class CleanupTy final : public EHScopeStack::Cleanup {
PrePostActionTy *Action;
public:
explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
if (!CGF.HaveInsertPoint())
return;
Action->Exit(CGF);
}
};
} // anonymous namespace
void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
CodeGenFunction::RunCleanupsScope Scope(CGF);
if (PrePostAction) {
CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
Callback(CodeGen, CGF, *PrePostAction);
} else {
PrePostActionTy Action;
Callback(CodeGen, CGF, Action);
}
}
/// Check if the combiner is a call to UDR combiner and if it is so return the
/// UDR decl used for reduction.
static const OMPDeclareReductionDecl *
getReductionInit(const Expr *ReductionOp) {
if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
if (const auto *DRE =
dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
return DRD;
return nullptr;
}
static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
const OMPDeclareReductionDecl *DRD,
const Expr *InitOp,
Address Private, Address Original,
QualType Ty) {
if (DRD->getInitializer()) {
std::pair<llvm::Function *, llvm::Function *> Reduction =
CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
const auto *CE = cast<CallExpr>(InitOp);
const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
const auto *LHSDRE =
cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
const auto *RHSDRE =
cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
[=]() { return Private; });
PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
[=]() { return Original; });
(void)PrivateScope.Privatize();
RValue Func = RValue::get(Reduction.second);
CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
CGF.EmitIgnoredExpr(InitOp);
} else {
llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
auto *GV = new llvm::GlobalVariable(
CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
llvm::GlobalValue::PrivateLinkage, Init, Name);
LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
RValue InitRVal;
switch (CGF.getEvaluationKind(Ty)) {
case TEK_Scalar:
InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
break;
case TEK_Complex:
InitRVal =
RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
break;
case TEK_Aggregate: {
OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
/*IsInitializer=*/false);
return;
}
}
OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
/*IsInitializer=*/false);
}
}
/// Emit initialization of arrays of complex types.
/// \param DestAddr Address of the array.
/// \param Type Type of array.
/// \param Init Initial expression of array.
/// \param SrcAddr Address of the original array.
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
QualType Type, bool EmitDeclareReductionInit,
const Expr *Init,
const OMPDeclareReductionDecl *DRD,
Address SrcAddr = Address::invalid()) {
// Perform element-by-element initialization.
QualType ElementTy;
// Drill down to the base element type on both arrays.
const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
DestAddr =
CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
if (DRD)
SrcAddr =
CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
llvm::Value *SrcBegin = nullptr;
if (DRD)
SrcBegin = SrcAddr.getPointer();
llvm::Value *DestBegin = DestAddr.getPointer();
// Cast from pointer to array type to pointer to single element.
llvm::Value *DestEnd =
CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
// The basic structure here is a while-do loop.
llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
llvm::Value *IsEmpty =
CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
// Enter the loop body, making that address the current address.
llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
CGF.EmitBlock(BodyBB);
CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
llvm::PHINode *SrcElementPHI = nullptr;
Address SrcElementCurrent = Address::invalid();
if (DRD) {
SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
"omp.arraycpy.srcElementPast");
SrcElementPHI->addIncoming(SrcBegin, EntryBB);
SrcElementCurrent =
Address(SrcElementPHI,
SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
}
llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
DestElementPHI->addIncoming(DestBegin, EntryBB);
Address DestElementCurrent =
Address(DestElementPHI,
DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
// Emit copy.
{
CodeGenFunction::RunCleanupsScope InitScope(CGF);
if (EmitDeclareReductionInit) {
emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
SrcElementCurrent, ElementTy);
} else
CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
/*IsInitializer=*/false);
}
if (DRD) {
// Shift the address forward by one element.
llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
"omp.arraycpy.dest.element");
SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
}
// Shift the address forward by one element.
llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
"omp.arraycpy.dest.element");
// Check whether we've reached the end.
llvm::Value *Done =
CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
// Done.
CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
}
LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
return CGF.EmitOMPSharedLValue(E);
}
LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
const Expr *E) {
if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
return LValue();
}
void ReductionCodeGen::emitAggregateInitialization(
CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
const OMPDeclareReductionDecl *DRD) {
// Emit VarDecl with copy init for arrays.
// Get the address of the original variable captured in current
// captured region.
const auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
bool EmitDeclareReductionInit =
DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
EmitDeclareReductionInit,
EmitDeclareReductionInit ? ClausesData[N].ReductionOp
: PrivateVD->getInit(),
DRD, SharedLVal.getAddress(CGF));
}
ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
ArrayRef<const Expr *> Origs,
ArrayRef<const Expr *> Privates,
ArrayRef<const Expr *> ReductionOps) {
ClausesData.reserve(Shareds.size());
SharedAddresses.reserve(Shareds.size());
Sizes.reserve(Shareds.size());
BaseDecls.reserve(Shareds.size());
const auto *IOrig = Origs.begin();
const auto *IPriv = Privates.begin();
const auto *IRed = ReductionOps.begin();
for (const Expr *Ref : Shareds) {
ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
std::advance(IOrig, 1);
std::advance(IPriv, 1);
std::advance(IRed, 1);
}
}
void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
"Number of generated lvalues must be exactly N.");
LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
SharedAddresses.emplace_back(First, Second);
if (ClausesData[N].Shared == ClausesData[N].Ref) {
OrigAddresses.emplace_back(First, Second);
} else {
LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
OrigAddresses.emplace_back(First, Second);
}
}
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
const auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
QualType PrivateType = PrivateVD->getType();
bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
if (!PrivateType->isVariablyModifiedType()) {
Sizes.emplace_back(
CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
nullptr);
return;
}
llvm::Value *Size;
llvm::Value *SizeInChars;
auto *ElemType =
cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
->getElementType();
auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
if (AsArraySection) {
Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
OrigAddresses[N].first.getPointer(CGF));
Size = CGF.Builder.CreateNUWAdd(
Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
} else {
SizeInChars =
CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
}
Sizes.emplace_back(SizeInChars, Size);
CodeGenFunction::OpaqueValueMapping OpaqueMap(
CGF,
cast<OpaqueValueExpr>(
CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
RValue::get(Size));
CGF.EmitVariablyModifiedType(PrivateType);
}
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
llvm::Value *Size) {
const auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
QualType PrivateType = PrivateVD->getType();
if (!PrivateType->isVariablyModifiedType()) {
assert(!Size && !Sizes[N].second &&
"Size should be nullptr for non-variably modified reduction "
"items.");
return;
}
CodeGenFunction::OpaqueValueMapping OpaqueMap(
CGF,
cast<OpaqueValueExpr>(
CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
RValue::get(Size));
CGF.EmitVariablyModifiedType(PrivateType);
}
void ReductionCodeGen::emitInitialization(
CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
assert(SharedAddresses.size() > N && "No variable was generated");
const auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
const OMPDeclareReductionDecl *DRD =
getReductionInit(ClausesData[N].ReductionOp);
QualType PrivateType = PrivateVD->getType();
PrivateAddr = CGF.Builder.CreateElementBitCast(
PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
QualType SharedType = SharedAddresses[N].first.getType();
SharedLVal = CGF.MakeAddrLValue(
CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
CGF.ConvertTypeForMem(SharedType)),
SharedType, SharedAddresses[N].first.getBaseInfo(),
CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
if (DRD && DRD->getInitializer())
(void)DefaultInit(CGF);
emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
} else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
(void)DefaultInit(CGF);
emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
PrivateAddr, SharedLVal.getAddress(CGF),
SharedLVal.getType());
} else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
!CGF.isTrivialInitializer(PrivateVD->getInit())) {
CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
PrivateVD->getType().getQualifiers(),
/*IsInitializer=*/false);
}
}
bool ReductionCodeGen::needCleanups(unsigned N) {
const auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
QualType PrivateType = PrivateVD->getType();
QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
return DTorKind != QualType::DK_none;
}
void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
Address PrivateAddr) {
const auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
QualType PrivateType = PrivateVD->getType();
QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
if (needCleanups(N)) {
PrivateAddr = CGF.Builder.CreateElementBitCast(
PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
}
}
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
LValue BaseLV) {
BaseTy = BaseTy.getNonReferenceType();
while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
!CGF.getContext().hasSameType(BaseTy, ElTy)) {
if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
} else {
LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
}
BaseTy = BaseTy->getPointeeType();
}
return CGF.MakeAddrLValue(
CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
CGF.ConvertTypeForMem(ElTy)),
BaseLV.getType(), BaseLV.getBaseInfo(),
CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
}
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
llvm::Value *Addr) {
Address Tmp = Address::invalid();
Address TopTmp = Address::invalid();
Address MostTopTmp = Address::invalid();
BaseTy = BaseTy.getNonReferenceType();
while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
!CGF.getContext().hasSameType(BaseTy, ElTy)) {
Tmp = CGF.CreateMemTemp(BaseTy);
if (TopTmp.isValid())
CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
else
MostTopTmp = Tmp;
TopTmp = Tmp;
BaseTy = BaseTy->getPointeeType();
}
llvm::Type *Ty = BaseLVType;
if (Tmp.isValid())
Ty = Tmp.getElementType();
Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
if (Tmp.isValid()) {
CGF.Builder.CreateStore(Addr, Tmp);
return MostTopTmp;
}
return Address(Addr, BaseLVAlignment);
}
static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
const VarDecl *OrigVD = nullptr;
if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
Base = TempOASE->getBase()->IgnoreParenImpCasts();
while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
Base = TempASE->getBase()->IgnoreParenImpCasts();
DE = cast<DeclRefExpr>(Base);
OrigVD = cast<VarDecl>(DE->getDecl());
} else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
Base = TempASE->getBase()->IgnoreParenImpCasts();
DE = cast<DeclRefExpr>(Base);
OrigVD = cast<VarDecl>(DE->getDecl());
}
return OrigVD;
}
Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
Address PrivateAddr) {
const DeclRefExpr *DE;
if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
BaseDecls.emplace_back(OrigVD);
LValue OriginalBaseLValue = CGF.EmitLValue(DE);
LValue BaseLValue =
loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
OriginalBaseLValue);
Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
BaseLValue.getPointer(CGF), SharedAddr.getPointer());
llvm::Value *PrivatePointer =
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
PrivateAddr.getPointer(), SharedAddr.getType());
llvm::Value *Ptr = CGF.Builder.CreateGEP(
SharedAddr.getElementType(), PrivatePointer, Adjustment);
return castToBase(CGF, OrigVD->getType(),
SharedAddresses[N].first.getType(),
OriginalBaseLValue.getAddress(CGF).getType(),
OriginalBaseLValue.getAlignment(), Ptr);
}
BaseDecls.emplace_back(
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
return PrivateAddr;
}
bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
const OMPDeclareReductionDecl *DRD =
getReductionInit(ClausesData[N].ReductionOp);
return DRD && DRD->getInitializer();
}
LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
return CGF.EmitLoadOfPointerLValue(
CGF.GetAddrOfLocalVar(getThreadIDVariable()),
getThreadIDVariable()->getType()->castAs<PointerType>());
}
void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
if (!CGF.HaveInsertPoint())
return;
// 1.2.2 OpenMP Language Terminology
// Structured block - An executable statement with a single entry at the
// top and a single exit at the bottom.
// The point of exit cannot be a branch out of the structured block.
// longjmp() and throw() must not violate the entry/exit criteria.
CGF.EHStack.pushTerminate();
if (S)
CGF.incrementProfileCounter(S);
CodeGen(CGF);
CGF.EHStack.popTerminate();
}
LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
CodeGenFunction &CGF) {
return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
getThreadIDVariable()->getType(),
AlignmentSource::Decl);
}
static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
QualType FieldTy) {
auto *Field = FieldDecl::Create(
C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
/*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
Field->setAccess(AS_public);
DC->addDecl(Field);
return Field;
}
CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
StringRef Separator)
: CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
// Initialize Types used in OpenMPIRBuilder from OMPKinds.def
OMPBuilder.initialize();
loadOffloadInfoMetadata();
}
void CGOpenMPRuntime::clear() {
InternalVars.clear();
// Clean non-target variable declarations possibly used only in debug info.
for (const auto &Data : EmittedNonTargetVariables) {
if (!Data.getValue().pointsToAliveValue())
continue;
auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
if (!GV)
continue;
if (!GV->isDeclaration() || GV->getNumUses() > 0)
continue;
GV->eraseFromParent();
}
}
std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
SmallString<128> Buffer;
llvm::raw_svector_ostream OS(Buffer);
StringRef Sep = FirstSeparator;
for (StringRef Part : Parts) {
OS << Sep << Part;
Sep = Separator;
}
return std::string(OS.str());
}
static llvm::Function *
emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
const Expr *CombinerInitializer, const VarDecl *In,
const VarDecl *Out, bool IsCombiner) {
// void .omp_combiner.(Ty *in, Ty *out);
ASTContext &C = CGM.getContext();
QualType PtrTy = C.getPointerType(Ty).withRestrict();
FunctionArgList Args;
ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
/*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
/*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
Args.push_back(&OmpOutParm);
Args.push_back(&OmpInParm);
const CGFunctionInfo &FnInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
std::string Name = CGM.getOpenMPRuntime().getName(
{IsCombiner ? "omp_combiner" : "omp_initializer", ""});
auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
Name, &CGM.getModule());
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
if (CGM.getLangOpts().Optimize) {
Fn->removeFnAttr(llvm::Attribute::NoInline);
Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
Fn->addFnAttr(llvm::Attribute::AlwaysInline);
}
CodeGenFunction CGF(CGM);
// Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
// Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
Out->getLocation());
CodeGenFunction::OMPPrivateScope Scope(CGF);
Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
.getAddress(CGF);
});
Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
.getAddress(CGF);
});
(void)Scope.Privatize();
if (!IsCombiner && Out->hasInit() &&
!CGF.isTrivialInitializer(Out->getInit())) {
CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
Out->getType().getQualifiers(),
/*IsInitializer=*/true);
}
if (CombinerInitializer)
CGF.EmitIgnoredExpr(CombinerInitializer);
Scope.ForceCleanup();
CGF.FinishFunction();
return Fn;
}
void CGOpenMPRuntime::emitUserDefinedReduction(
CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
if (UDRMap.count(D) > 0)
return;
llvm::Function *Combiner = emitCombinerOrInitializer(
CGM, D->getType(), D->getCombiner(),
cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
/*IsCombiner=*/true);
llvm::Function *Initializer = nullptr;
if (const Expr *Init = D->getInitializer()) {
Initializer = emitCombinerOrInitializer(
CGM, D->getType(),
D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
: nullptr,
cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
/*IsCombiner=*/false);
}
UDRMap.try_emplace(D, Combiner, Initializer);
if (CGF) {
auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
Decls.second.push_back(D);
}
}
std::pair<llvm::Function *, llvm::Function *>
CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
auto I = UDRMap.find(D);
if (I != UDRMap.end())
return I->second;
emitUserDefinedReduction(/*CGF=*/nullptr, D);
return UDRMap.lookup(D);
}
namespace {
// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
// Builder if one is present.
struct PushAndPopStackRAII {
PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
bool HasCancel, llvm::omp::Directive Kind)
: OMPBuilder(OMPBuilder) {
if (!OMPBuilder)
return;
// The following callback is the crucial part of clangs cleanup process.
//
// NOTE:
// Once the OpenMPIRBuilder is used to create parallel regions (and
// similar), the cancellation destination (Dest below) is determined via
// IP. That means if we have variables to finalize we split the block at IP,
// use the new block (=BB) as destination to build a JumpDest (via
// getJumpDestInCurrentScope(BB)) which then is fed to
// EmitBranchThroughCleanup. Furthermore, there will not be the need
// to push & pop an FinalizationInfo object.
// The FiniCB will still be needed but at the point where the
// OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
assert(IP.getBlock()->end() == IP.getPoint() &&
"Clang CG should cause non-terminated block!");
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
CGF.Builder.restoreIP(IP);
CodeGenFunction::JumpDest Dest =
CGF.getOMPCancelDestination(OMPD_parallel);
CGF.EmitBranchThroughCleanup(Dest);
};
// TODO: Remove this once we emit parallel regions through the
// OpenMPIRBuilder as it can do this setup internally.
llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
OMPBuilder->pushFinalizationCB(std::move(FI));
}
~PushAndPopStackRAII() {
if (OMPBuilder)
OMPBuilder->popFinalizationCB();
}
llvm::OpenMPIRBuilder *OMPBuilder;
};
} // namespace
static llvm::Function *emitParallelOrTeamsOutlinedFunction(
CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
assert(ThreadIDVar->getType()->isPointerType() &&
"thread id variable must be of type kmp_int32 *");
CodeGenFunction CGF(CGM, true);
bool HasCancel = false;
if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
HasCancel = OPD->hasCancel();
else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
HasCancel = OPD->hasCancel();
else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
HasCancel = OPSD->hasCancel();
else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
HasCancel = OPFD->hasCancel();
else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
HasCancel = OPFD->hasCancel();
else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
HasCancel = OPFD->hasCancel();
else if (const auto *OPFD =
dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
HasCancel = OPFD->hasCancel();
else if (const auto *OPFD =
dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
HasCancel = OPFD->hasCancel();
// TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
// parallel region to make cancellation barriers work properly.
llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
HasCancel, OutlinedHelperName);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
}
llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
return emitParallelOrTeamsOutlinedFunction(
CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
}
llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
return emitParallelOrTeamsOutlinedFunction(
CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
}
llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
const VarDecl *PartIDVar, const VarDecl *TaskTVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
bool Tied, unsigned &NumberOfParts) {
auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
PrePostActionTy &) {
llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
llvm::Value *TaskArgs[] = {
UpLoc, ThreadID,
CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
TaskTVar->getType()->castAs<PointerType>())
.getPointer(CGF)};
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_omp_task),
TaskArgs);
};
CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
UntiedCodeGen);
CodeGen.setAction(Action);
assert(!ThreadIDVar->getType()->isPointerType() &&
"thread id variable must be of type kmp_int32 for tasks");
const OpenMPDirectiveKind Region =
isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
: OMPD_task;
const CapturedStmt *CS = D.getCapturedStmt(Region);
bool HasCancel = false;
if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
HasCancel = TD->hasCancel();
else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
HasCancel = TD->hasCancel();
else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
HasCancel = TD->hasCancel();
else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
HasCancel = TD->hasCancel();
CodeGenFunction CGF(CGM, true);
CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
InnermostKind, HasCancel, Action);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
if (!Tied)
NumberOfParts = Action.getNumberOfParts();
return Res;
}
static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
const RecordDecl *RD, const CGRecordLayout &RL,
ArrayRef<llvm::Constant *> Data) {
llvm::StructType *StructTy = RL.getLLVMType();
unsigned PrevIdx = 0;
ConstantInitBuilder CIBuilder(CGM);
auto DI = Data.begin();
for (const FieldDecl *FD : RD->fields()) {
unsigned Idx = RL.getLLVMFieldNo(FD);
// Fill the alignment.
for (unsigned I = PrevIdx; I < Idx; ++I)
Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
PrevIdx = Idx + 1;
Fields.add(*DI);
++DI;
}
}
template <class... As>
static llvm::GlobalVariable *
createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
ArrayRef<llvm::Constant *> Data, const Twine &Name,
As &&... Args) {
const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
ConstantInitBuilder CIBuilder(CGM);
ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
buildStructValue(Fields, CGM, RD, RL, Data);
return Fields.finishAndCreateGlobal(
Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
std::forward<As>(Args)...);
}
template <typename T>
static void
createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
ArrayRef<llvm::Constant *> Data,
T &Parent) {
const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
buildStructValue(Fields, CGM, RD, RL, Data);
Fields.finishAndAddTo(Parent);
}
void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
bool AtCurrentPoint) {
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
if (AtCurrentPoint) {
Elem.second.ServiceInsertPt = new llvm::BitCastInst(
Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
} else {
Elem.second.ServiceInsertPt =
new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
}
}
void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
if (Elem.second.ServiceInsertPt) {
llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
Elem.second.ServiceInsertPt = nullptr;
Ptr->eraseFromParent();
}
}
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
SourceLocation Loc,
SmallString<128> &Buffer) {
llvm::raw_svector_ostream OS(Buffer);
// Build debug location
PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
OS << ";" << PLoc.getFilename() << ";";
if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
OS << FD->getQualifiedNameAsString();
OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
return OS.str();
}
llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
SourceLocation Loc,
unsigned Flags) {
llvm::Constant *SrcLocStr;
if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
Loc.isInvalid()) {
SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
} else {
std::string FunctionName = "";
if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
FunctionName = FD->getQualifiedNameAsString();
PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
const char *FileName = PLoc.getFilename();
unsigned Line = PLoc.getLine();
unsigned Column = PLoc.getColumn();
SrcLocStr =
OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column);
}
unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
Reserved2Flags);
}
llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
SourceLocation Loc) {
assert(CGF.CurFn && "No function in current CodeGenFunction.");
// If the OpenMPIRBuilder is used we need to use it for all thread id calls as
// the clang invariants used below might be broken.
if (CGM.getLangOpts().OpenMPIRBuilder) {
SmallString<128> Buffer;
OMPBuilder.updateToLocation(CGF.Builder.saveIP());
auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
getIdentStringFromSourceLocation(CGF, Loc, Buffer));
return OMPBuilder.getOrCreateThreadID(
OMPBuilder.getOrCreateIdent(SrcLocStr));
}
llvm::Value *ThreadID = nullptr;
// Check whether we've already cached a load of the thread id in this
// function.
auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
if (I != OpenMPLocThreadIDMap.end()) {
ThreadID = I->second.ThreadID;
if (ThreadID != nullptr)
return ThreadID;
}
// If exceptions are enabled, do not use parameter to avoid possible crash.
if (auto *OMPRegionInfo =
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
if (OMPRegionInfo->getThreadIDVariable()) {
// Check if this an outlined function with thread id passed as argument.
LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
!CGF.getLangOpts().CXXExceptions ||
CGF.Builder.GetInsertBlock() == TopBlock ||
!isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
TopBlock ||
cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
CGF.Builder.GetInsertBlock()) {
ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
// If value loaded in entry block, cache it and use it everywhere in
// function.
if (CGF.Builder.GetInsertBlock() == TopBlock) {
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
Elem.second.ThreadID = ThreadID;
}
return ThreadID;
}
}
}
// This is not an outlined function region - need to call __kmpc_int32
// kmpc_global_thread_num(ident_t *loc).
// Generate thread id value and cache this value for use across the
// function.
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
if (!Elem.second.ServiceInsertPt)
setLocThreadIdInsertPt(CGF);
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
llvm::CallInst *Call = CGF.Builder.CreateCall(
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
OMPRTL___kmpc_global_thread_num),
emitUpdateLocation(CGF, Loc));
Call->setCallingConv(CGF.getRuntimeCC());
Elem.second.ThreadID = Call;
return Call;
}
void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
assert(CGF.CurFn && "No function in current CodeGenFunction.");
if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
clearLocThreadIdInsertPt(CGF);
OpenMPLocThreadIDMap.erase(CGF.CurFn);
}
if (FunctionUDRMap.count(CGF.CurFn) > 0) {
for(const auto *D : FunctionUDRMap[CGF.CurFn])
UDRMap.erase(D);
FunctionUDRMap.erase(CGF.CurFn);
}
auto I = FunctionUDMMap.find(CGF.CurFn);
if (I != FunctionUDMMap.end()) {
for(const auto *D : I->second)
UDMMap.erase(D);
FunctionUDMMap.erase(I);
}
LastprivateConditionalToTypes.erase(CGF.CurFn);
FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
}
llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
return OMPBuilder.IdentPtr;
}
llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
if (!Kmpc_MicroTy) {
// Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
llvm::PointerType::getUnqual(CGM.Int32Ty)};
Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
}
return llvm::PointerType::getUnqual(Kmpc_MicroTy);
}
llvm::FunctionCallee
CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
bool IsGPUDistribute) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
StringRef Name;
if (IsGPUDistribute)
Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
: "__kmpc_distribute_static_init_4u")
: (IVSigned ? "__kmpc_distribute_static_init_8"
: "__kmpc_distribute_static_init_8u");
else
Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
: "__kmpc_for_static_init_4u")
: (IVSigned ? "__kmpc_for_static_init_8"
: "__kmpc_for_static_init_8u");
llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
auto *PtrTy = llvm::PointerType::getUnqual(ITy);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), // loc
CGM.Int32Ty, // tid
CGM.Int32Ty, // schedtype
llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
PtrTy, // p_lower
PtrTy, // p_upper
PtrTy, // p_stride
ITy, // incr
ITy // chunk
};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
return CGM.CreateRuntimeFunction(FnTy, Name);
}
llvm::FunctionCallee
CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
StringRef Name =
IVSize == 32
? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
: (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
CGM.Int32Ty, // tid
CGM.Int32Ty, // schedtype
ITy, // lower
ITy, // upper
ITy, // stride
ITy // chunk
};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
return CGM.CreateRuntimeFunction(FnTy, Name);
}
llvm::FunctionCallee
CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
StringRef Name =
IVSize == 32
? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
: (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), // loc
CGM.Int32Ty, // tid
};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
return CGM.CreateRuntimeFunction(FnTy, Name);
}
llvm::FunctionCallee
CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
StringRef Name =
IVSize == 32
? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
: (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
auto *PtrTy = llvm::PointerType::getUnqual(ITy);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), // loc
CGM.Int32Ty, // tid
llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
PtrTy, // p_lower
PtrTy, // p_upper
PtrTy // p_stride
};
auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
return CGM.CreateRuntimeFunction(FnTy, Name);
}
/// Obtain information that uniquely identifies a target entry. This
/// consists of the file and device IDs as well as line number associated with
/// the relevant entry source location.
static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
unsigned &DeviceID, unsigned &FileID,
unsigned &LineNum) {
SourceManager &SM = C.getSourceManager();
// The loc should be always valid and have a file ID (the user cannot use
// #pragma directives in macros)
assert(Loc.isValid() && "Source location is expected to be always valid.");
PresumedLoc PLoc = SM.getPresumedLoc(Loc);
assert(PLoc.isValid() && "Source location is expected to be always valid.");
llvm::sys::fs::UniqueID ID;
if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
assert(PLoc.isValid() && "Source location is expected to be always valid.");
if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
SM.getDiagnostics().Report(diag::err_cannot_open_file)
<< PLoc.getFilename() << EC.message();
}
DeviceID = ID.getDevice();
FileID = ID.getFile();
LineNum = PLoc.getLine();
}
Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
if (CGM.getLangOpts().OpenMPSimd)
return Address::invalid();
llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
(*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory))) {
SmallString<64> PtrName;
{
llvm::raw_svector_ostream OS(PtrName);
OS << CGM.getMangledName(GlobalDecl(VD));
if (!VD->isExternallyVisible()) {
unsigned DeviceID, FileID, Line;
getTargetEntryUniqueInfo(CGM.getContext(),
VD->getCanonicalDecl()->getBeginLoc(),
DeviceID, FileID, Line);
OS << llvm::format("_%x", FileID);
}
OS << "_decl_tgt_ref_ptr";
}
llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
if (!Ptr) {
QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
PtrName);
auto *GV = cast<llvm::GlobalVariable>(Ptr);
GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
if (!CGM.getLangOpts().OpenMPIsDevice)
GV->setInitializer(CGM.GetAddrOfGlobal(VD));
registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
}
return Address(Ptr, CGM.getContext().getDeclAlign(VD));
}
return Address::invalid();
}
llvm::Constant *
CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
assert(!CGM.getLangOpts().OpenMPUseTLS ||
!CGM.getContext().getTargetInfo().isTLSSupported());
// Lookup the entry, lazily creating it if necessary.
std::string Suffix = getName({"cache", ""});
return getOrCreateInternalVariable(
CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
}
Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
const VarDecl *VD,
Address VDAddr,
SourceLocation Loc) {
if (CGM.getLangOpts().OpenMPUseTLS &&
CGM.getContext().getTargetInfo().isTLSSupported())
return VDAddr;
llvm::Type *VarTy = VDAddr.getElementType();
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
CGM.Int8PtrTy),
CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
getOrCreateThreadPrivateCache(VD)};
return Address(CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
Args),
VDAddr.getAlignment());
}
void CGOpenMPRuntime::emitThreadPrivateVarInit(
CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
// Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
// library.
llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_global_thread_num),
OMPLoc);
// Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
// to register constructor/destructor for variable.
llvm::Value *Args[] = {
OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
Ctor, CopyCtor, Dtor};
CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
Args);
}
llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
const VarDecl *VD, Address VDAddr, SourceLocation Loc,
bool PerformInit, CodeGenFunction *CGF) {
if (CGM.getLangOpts().OpenMPUseTLS &&
CGM.getContext().getTargetInfo().isTLSSupported())
return nullptr;
VD = VD->getDefinition(CGM.getContext());
if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
QualType ASTTy = VD->getType();
llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
const Expr *Init = VD->getAnyInitializer();
if (CGM.getLangOpts().CPlusPlus && PerformInit) {
// Generate function that re-emits the declaration's initializer into the
// threadprivate copy of the variable VD
CodeGenFunction CtorCGF(CGM);
FunctionArgList Args;
ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
/*Id=*/nullptr, CGM.getContext().VoidPtrTy,
ImplicitParamDecl::Other);
Args.push_back(&Dst);
const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
CGM.getContext().VoidPtrTy, Args);
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
std::string Name = getName({"__kmpc_global_ctor_", ""});
llvm::Function *Fn =
CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
Args, Loc, Loc);
llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
CGM.getContext().VoidPtrTy, Dst.getLocation());
Address Arg = Address(ArgVal, VDAddr.getAlignment());
Arg = CtorCGF.Builder.CreateElementBitCast(
Arg, CtorCGF.ConvertTypeForMem(ASTTy));
CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
/*IsInitializer=*/true);
ArgVal = CtorCGF.EmitLoadOfScalar(
CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
CGM.getContext().VoidPtrTy, Dst.getLocation());
CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
CtorCGF.FinishFunction();
Ctor = Fn;
}
if (VD->getType().isDestructedType() != QualType::DK_none) {
// Generate function that emits destructor call for the threadprivate copy
// of the variable VD
CodeGenFunction DtorCGF(CGM);
FunctionArgList Args;
ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
/*Id=*/nullptr, CGM.getContext().VoidPtrTy,
ImplicitParamDecl::Other);
Args.push_back(&Dst);
const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
CGM.getContext().VoidTy, Args);
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
std::string Name = getName({"__kmpc_global_dtor_", ""});
llvm::Function *Fn =
CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
Loc, Loc);
// Create a scope with an artificial location for the body of this function.
auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
DtorCGF.GetAddrOfLocalVar(&Dst),
/*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
DtorCGF.getDestroyer(ASTTy.isDestructedType()),
DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
DtorCGF.FinishFunction();
Dtor = Fn;
}
// Do not emit init function if it is not required.
if (!Ctor && !Dtor)
return nullptr;
llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
/*isVarArg=*/false)
->getPointerTo();
// Copying constructor for the threadprivate variable.
// Must be NULL - reserved by runtime, but currently it requires that this
// parameter is always NULL. Otherwise it fires assertion.
CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
if (Ctor == nullptr) {
auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
/*isVarArg=*/false)
->getPointerTo();
Ctor = llvm::Constant::getNullValue(CtorTy);
}
if (Dtor == nullptr) {
auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
/*isVarArg=*/false)
->getPointerTo();
Dtor = llvm::Constant::getNullValue(DtorTy);
}
if (!CGF) {
auto *InitFunctionTy =
llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
std::string Name = getName({"__omp_threadprivate_init_", ""});
llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
CodeGenFunction InitCGF(CGM);
FunctionArgList ArgList;
InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
CGM.getTypes().arrangeNullaryFunction(), ArgList,
Loc, Loc);
emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
InitCGF.FinishFunction();
return InitFunction;
}
emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
}
return nullptr;
}
bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
llvm::GlobalVariable *Addr,
bool PerformInit) {
if (CGM.getLangOpts().OMPTargetTriples.empty() &&
!CGM.getLangOpts().OpenMPIsDevice)
return false;
Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
(*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory))
return CGM.getLangOpts().OpenMPIsDevice;
VD = VD->getDefinition(CGM.getContext());
assert(VD && "Unknown VarDecl");
if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
return CGM.getLangOpts().OpenMPIsDevice;
QualType ASTTy = VD->getType();
SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
// Produce the unique prefix to identify the new target regions. We use
// the source location of the variable declaration which we know to not
// conflict with any target region.
unsigned DeviceID;
unsigned FileID;
unsigned Line;
getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
SmallString<128> Buffer, Out;
{
llvm::raw_svector_ostream OS(Buffer);
OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
<< llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
}
const Expr *Init = VD->getAnyInitializer();
if (CGM.getLangOpts().CPlusPlus && PerformInit) {
llvm::Constant *Ctor;
llvm::Constant *ID;
if (CGM.getLangOpts().OpenMPIsDevice) {
// Generate function that re-emits the declaration's initializer into
// the threadprivate copy of the variable VD
CodeGenFunction CtorCGF(CGM);
const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
FTy, Twine(Buffer, "_ctor"), FI, Loc);
auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
FunctionArgList(), Loc, Loc);
auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
CtorCGF.EmitAnyExprToMem(Init,
Address(Addr, CGM.getContext().getDeclAlign(VD)),
Init->getType().getQualifiers(),
/*IsInitializer=*/true);
CtorCGF.FinishFunction();
Ctor = Fn;
ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
} else {
Ctor = new llvm::GlobalVariable(
CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
llvm::GlobalValue::PrivateLinkage,
llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
ID = Ctor;
}
// Register the information for the entry associated with the constructor.
Out.clear();
OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
}
if (VD->getType().isDestructedType() != QualType::DK_none) {
llvm::Constant *Dtor;
llvm::Constant *ID;
if (CGM.getLangOpts().OpenMPIsDevice) {
// Generate function that emits destructor call for the threadprivate
// copy of the variable VD
CodeGenFunction DtorCGF(CGM);
const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
FTy, Twine(Buffer, "_dtor"), FI, Loc);
auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
FunctionArgList(), Loc, Loc);
// Create a scope with an artificial location for the body of this
// function.
auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
DtorCGF.FinishFunction();
Dtor = Fn;
ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
} else {
Dtor = new llvm::GlobalVariable(
CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
llvm::GlobalValue::PrivateLinkage,
llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
ID = Dtor;
}
// Register the information for the entry associated with the destructor.
Out.clear();
OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
}
return CGM.getLangOpts().OpenMPIsDevice;
}
Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
QualType VarType,
StringRef Name) {
std::string Suffix = getName({"artificial", ""});
llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
llvm::Value *GAddr =
getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
CGM.getTarget().isTLSSupported()) {
cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
}
std::string CacheSuffix = getName({"cache", ""});
llvm::Value *Args[] = {
emitUpdateLocation(CGF, SourceLocation()),
getThreadID(CGF, SourceLocation()),
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
/*isSigned=*/false),
getOrCreateInternalVariable(
CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
return Address(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
Args),
VarLVType->getPointerTo(/*AddrSpace=*/0)),
CGM.getContext().getTypeAlignInChars(VarType));
}
void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
const RegionCodeGenTy &ThenGen,
const RegionCodeGenTy &ElseGen) {
CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
// If the condition constant folds and can be elided, try to avoid emitting
// the condition and the dead arm of the if/else.
bool CondConstant;
if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
if (CondConstant)
ThenGen(CGF);
else
ElseGen(CGF);
return;
}
// Otherwise, the condition did not fold, or we couldn't elide it. Just
// emit the conditional branch.
llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
// Emit the 'then' code.
CGF.EmitBlock(ThenBlock);
ThenGen(CGF);
CGF.EmitBranch(ContBlock);
// Emit the 'else' code if present.
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
CGF.EmitBlock(ElseBlock);
ElseGen(CGF);
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
CGF.EmitBranch(ContBlock);
// Emit the continuation block for code after the if.
CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
}
void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
llvm::Function *OutlinedFn,
ArrayRef<llvm::Value *> CapturedVars,
const Expr *IfCond) {
if (!CGF.HaveInsertPoint())
return;
llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
auto &M = CGM.getModule();
auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
this](CodeGenFunction &CGF, PrePostActionTy &) {
// Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
llvm::Value *Args[] = {
RTLoc,
CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
llvm::SmallVector<llvm::Value *, 16> RealArgs;
RealArgs.append(std::begin(Args), std::end(Args));
RealArgs.append(CapturedVars.begin(), CapturedVars.end());
llvm::FunctionCallee RTLFn =
OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
CGF.EmitRuntimeCall(RTLFn, RealArgs);
};
auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
this](CodeGenFunction &CGF, PrePostActionTy &) {
CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
// Build calls:
// __kmpc_serialized_parallel(&Loc, GTid);
llvm::Value *Args[] = {RTLoc, ThreadID};
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
M, OMPRTL___kmpc_serialized_parallel),
Args);
// OutlinedFn(&GTid, &zero_bound, CapturedStruct);
Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
Address ZeroAddrBound =
CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
/*Name=*/".bound.zero.addr");
CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
// ThreadId for serialized parallels is 0.
OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
// Ensure we do not inline the function. This is trivially true for the ones
// passed to __kmpc_fork_call but the ones called in serialized regions
// could be inlined. This is not a perfect but it is closer to the invariant
// we want, namely, every data environment starts with a new function.
// TODO: We should pass the if condition to the runtime function and do the
// handling there. Much cleaner code.
OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
// __kmpc_end_serialized_parallel(&Loc, GTid);
llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
M, OMPRTL___kmpc_end_serialized_parallel),
EndArgs);
};
if (IfCond) {
emitIfClause(CGF, IfCond, ThenGen, ElseGen);
} else {
RegionCodeGenTy ThenRCG(ThenGen);
ThenRCG(CGF);
}
}
// If we're inside an (outlined) parallel region, use the region info's
// thread-ID variable (it is passed in a first argument of the outlined function
// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
// regular serial code region, get thread ID by calling kmp_int32
// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
// return the address of that temp.
Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
SourceLocation Loc) {
if (auto *OMPRegionInfo =
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
if (OMPRegionInfo->getThreadIDVariable())
return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
llvm::Value *ThreadID = getThreadID(CGF, Loc);
QualType Int32Ty =
CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
CGF.EmitStoreOfScalar(ThreadID,
CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
return ThreadIDTemp;
}
llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
SmallString<256> Buffer;
llvm::raw_svector_ostream Out(Buffer);
Out << Name;
StringRef RuntimeName = Out.str();
auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
if (Elem.second) {
assert(Elem.second->getType()->getPointerElementType() == Ty &&
"OMP internal variable has different type than requested");
return &*Elem.second;
}
return Elem.second = new llvm::GlobalVariable(
CGM.getModule(), Ty, /*IsConstant*/ false,
llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
Elem.first(), /*InsertBefore=*/nullptr,
llvm::GlobalValue::NotThreadLocal, AddressSpace);
}
llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
std::string Name = getName({Prefix, "var"});
return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
}
namespace {
/// Common pre(post)-action for different OpenMP constructs.
class CommonActionTy final : public PrePostActionTy {
llvm::FunctionCallee EnterCallee;
ArrayRef<llvm::Value *> EnterArgs;
llvm::FunctionCallee ExitCallee;
ArrayRef<llvm::Value *> ExitArgs;
bool Conditional;
llvm::BasicBlock *ContBlock = nullptr;
public:
CommonActionTy(llvm::FunctionCallee EnterCallee,
ArrayRef<llvm::Value *> EnterArgs,
llvm::FunctionCallee ExitCallee,
ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
: EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
ExitArgs(ExitArgs), Conditional(Conditional) {}
void Enter(CodeGenFunction &CGF) override {
llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
if (Conditional) {
llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
ContBlock = CGF.createBasicBlock("omp_if.end");
// Generate the branch (If-stmt)
CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
CGF.EmitBlock(ThenBlock);
}
}
void Done(CodeGenFunction &CGF) {
// Emit the rest of blocks/branches
CGF.EmitBranch(ContBlock);
CGF.EmitBlock(ContBlock, true);
}
void Exit(CodeGenFunction &CGF) override {
CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
}
};
} // anonymous namespace
void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
StringRef CriticalName,
const RegionCodeGenTy &CriticalOpGen,
SourceLocation Loc, const Expr *Hint) {
// __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
// CriticalOpGen();
// __kmpc_end_critical(ident_t *, gtid, Lock);
// Prepare arguments and build a call to __kmpc_critical
if (!CGF.HaveInsertPoint())
return;
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
getCriticalRegionLock(CriticalName)};
llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
std::end(Args));
if (Hint) {
EnterArgs.push_back(CGF.Builder.CreateIntCast(
CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
}
CommonActionTy Action(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(),
Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
EnterArgs,
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
OMPRTL___kmpc_end_critical),
Args);
CriticalOpGen.setAction(Action);
emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
}
void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
const RegionCodeGenTy &MasterOpGen,
SourceLocation Loc) {
if (!CGF.HaveInsertPoint())
return;
// if(__kmpc_master(ident_t *, gtid)) {
// MasterOpGen();
// __kmpc_end_master(ident_t *, gtid);
// }
// Prepare arguments and build a call to __kmpc_master
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_master),
Args,
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_end_master),
Args,
/*Conditional=*/true);
MasterOpGen.setAction(Action);
emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
Action.Done(CGF);
}
void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
const RegionCodeGenTy &MaskedOpGen,
SourceLocation Loc, const Expr *Filter) {
if (!CGF.HaveInsertPoint())
return;
// if(__kmpc_masked(ident_t *, gtid, filter)) {
// MaskedOpGen();
// __kmpc_end_masked(iden_t *, gtid);
// }
// Prepare arguments and build a call to __kmpc_masked
llvm::Value *FilterVal = Filter
? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
: llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
FilterVal};
llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
getThreadID(CGF, Loc)};
CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_masked),
Args,
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_end_masked),
ArgsEnd,
/*Conditional=*/true);
MaskedOpGen.setAction(Action);
emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
Action.Done(CGF);
}
void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
SourceLocation Loc) {
if (!CGF.HaveInsertPoint())
return;
if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
OMPBuilder.createTaskyield(CGF.Builder);
} else {
// Build call __kmpc_omp_taskyield(loc, thread_id, 0);
llvm::Value *Args[] = {
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
Args);
}
if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
Region->emitUntiedSwitch(CGF);
}
void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
const RegionCodeGenTy &TaskgroupOpGen,
SourceLocation Loc) {
if (!CGF.HaveInsertPoint())
return;
// __kmpc_taskgroup(ident_t *, gtid);
// TaskgroupOpGen();
// __kmpc_end_taskgroup(ident_t *, gtid);
// Prepare arguments and build a call to __kmpc_taskgroup
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_taskgroup),
Args,
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
Args);
TaskgroupOpGen.setAction(Action);
emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
}
/// Given an array of pointers to variables, project the address of a
/// given variable.
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
unsigned Index, const VarDecl *Var) {
// Pull out the pointer to the variable.
Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
Addr = CGF.Builder.CreateElementBitCast(
Addr, CGF.ConvertTypeForMem(Var->getType()));
return Addr;
}
static llvm::Value *emitCopyprivateCopyFunction(
CodeGenModule &CGM, llvm::Type *ArgsType,
ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
SourceLocation Loc) {
ASTContext &C = CGM.getContext();
// void copy_func(void *LHSArg, void *RHSArg);
FunctionArgList Args;
ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
ImplicitParamDecl::Other);
ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
ImplicitParamDecl::Other);
Args.push_back(&LHSArg);
Args.push_back(&RHSArg);
const auto &CGFI =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
std::string Name =
CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
llvm::GlobalValue::InternalLinkage, Name,
&CGM.getModule());
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
Fn->setDoesNotRecurse();
CodeGenFunction CGF(CGM);
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
// Dest = (void*[n])(LHSArg);
// Src = (void*[n])(RHSArg);
Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
ArgsType), CGF.getPointerAlign());
Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
ArgsType), CGF.getPointerAlign());
// *(Type0*)Dst[0] = *(Type0*)Src[0];
// *(Type1*)Dst[1] = *(Type1*)Src[1];
// ...
// *(Typen*)Dst[n] = *(Typen*)Src[n];
for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
const auto *DestVar =
cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
const auto *SrcVar =
cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
QualType Type = VD->getType();
CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
}
CGF.FinishFunction();
return Fn;
}
void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
const RegionCodeGenTy &SingleOpGen,
SourceLocation Loc,
ArrayRef<const Expr *> CopyprivateVars,
ArrayRef<const Expr *> SrcExprs,
ArrayRef<const Expr *> DstExprs,
ArrayRef<const Expr *> AssignmentOps) {
if (!CGF.HaveInsertPoint())
return;
assert(CopyprivateVars.size() == SrcExprs.size() &&
CopyprivateVars.size() == DstExprs.size() &&
CopyprivateVars.size() == AssignmentOps.size());
ASTContext &C = CGM.getContext();
// int32 did_it = 0;
// if(__kmpc_single(ident_t *, gtid)) {
// SingleOpGen();
// __kmpc_end_single(ident_t *, gtid);
// did_it = 1;
// }
// call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
// <copy_func>, did_it);
Address DidIt = Address::invalid();
if (!CopyprivateVars.empty()) {
// int32 did_it = 0;
QualType KmpInt32Ty =
C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
}
// Prepare arguments and build a call to __kmpc_single
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_single),
Args,
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_end_single),
Args,
/*Conditional=*/true);
SingleOpGen.setAction(Action);
emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
if (DidIt.isValid()) {
// did_it = 1;
CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
}
Action.Done(CGF);
// call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
// <copy_func>, did_it);
if (DidIt.isValid()) {
llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
QualType CopyprivateArrayTy = C.getConstantArrayType(
C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
/*IndexTypeQuals=*/0);
// Create a list of all private variables for copyprivate.
Address CopyprivateList =
CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
CGF.Builder.CreateStore(
CGF.