blob: f6853a22cd361c39e440dcb325a22ec7e4b4615d [file] [log] [blame]
//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This contains code to emit OpenMP nodes as LLVM code.
//
//===----------------------------------------------------------------------===//
#include "CGCleanup.h"
#include "CGOpenMPRuntime.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Attr.h"
#include "clang/AST/DeclOpenMP.h"
#include "clang/AST/OpenMPClause.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/StmtOpenMP.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/OpenMPKinds.h"
#include "clang/Basic/PrettyStackTrace.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/AtomicOrdering.h"
using namespace clang;
using namespace CodeGen;
using namespace llvm::omp;
static const VarDecl *getBaseDecl(const Expr *Ref);
namespace {
/// Lexical scope for OpenMP executable constructs, that handles correct codegen
/// for captured expressions.
class OMPLexicalScope : public CodeGenFunction::LexicalScope {
void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
for (const auto *C : S.clauses()) {
if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
if (const auto *PreInit =
cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
for (const auto *I : PreInit->decls()) {
if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
CGF.EmitVarDecl(cast<VarDecl>(*I));
} else {
CodeGenFunction::AutoVarEmission Emission =
CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
CGF.EmitAutoVarCleanups(Emission);
}
}
}
}
}
}
CodeGenFunction::OMPPrivateScope InlinedShareds;
static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
return CGF.LambdaCaptureFields.lookup(VD) ||
(CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
(CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
}
public:
OMPLexicalScope(
CodeGenFunction &CGF, const OMPExecutableDirective &S,
const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
const bool EmitPreInitStmt = true)
: CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
InlinedShareds(CGF) {
if (EmitPreInitStmt)
emitPreInitStmt(CGF, S);
if (!CapturedRegion.hasValue())
return;
assert(S.hasAssociatedStmt() &&
"Expected associated statement for inlined directive.");
const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
for (const auto &C : CS->captures()) {
if (C.capturesVariable() || C.capturesVariableByCopy()) {
auto *VD = C.getCapturedVar();
assert(VD == VD->getCanonicalDecl() &&
"Canonical decl must be captured.");
DeclRefExpr DRE(
CGF.getContext(), const_cast<VarDecl *>(VD),
isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
InlinedShareds.isGlobalVarCaptured(VD)),
VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
return CGF.EmitLValue(&DRE).getAddress(CGF);
});
}
}
(void)InlinedShareds.Privatize();
}
};
/// Lexical scope for OpenMP parallel construct, that handles correct codegen
/// for captured expressions.
class OMPParallelScope final : public OMPLexicalScope {
bool EmitPreInitStmt(const OMPExecutableDirective &S) {
OpenMPDirectiveKind Kind = S.getDirectiveKind();
return !(isOpenMPTargetExecutionDirective(Kind) ||
isOpenMPLoopBoundSharingDirective(Kind)) &&
isOpenMPParallelDirective(Kind);
}
public:
OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
: OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
EmitPreInitStmt(S)) {}
};
/// Lexical scope for OpenMP teams construct, that handles correct codegen
/// for captured expressions.
class OMPTeamsScope final : public OMPLexicalScope {
bool EmitPreInitStmt(const OMPExecutableDirective &S) {
OpenMPDirectiveKind Kind = S.getDirectiveKind();
return !isOpenMPTargetExecutionDirective(Kind) &&
isOpenMPTeamsDirective(Kind);
}
public:
OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
: OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
EmitPreInitStmt(S)) {}
};
/// Private scope for OpenMP loop-based directives, that supports capturing
/// of used expression from loop statement.
class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
const DeclStmt *PreInits;
CodeGenFunction::OMPMapVars PreCondVars;
if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
for (const auto *E : LD->counters()) {
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
EmittedAsPrivate.insert(VD->getCanonicalDecl());
(void)PreCondVars.setVarAddr(
CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
}
// Mark private vars as undefs.
for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
for (const Expr *IRef : C->varlists()) {
const auto *OrigVD =
cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
(void)PreCondVars.setVarAddr(
CGF, OrigVD,
Address(llvm::UndefValue::get(CGF.ConvertTypeForMem(
CGF.getContext().getPointerType(
OrigVD->getType().getNonReferenceType()))),
CGF.getContext().getDeclAlign(OrigVD)));
}
}
}
(void)PreCondVars.apply(CGF);
// Emit init, __range and __end variables for C++ range loops.
(void)OMPLoopBasedDirective::doForAllLoops(
LD->getInnermostCapturedStmt()->getCapturedStmt(),
/*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(),
[&CGF](unsigned Cnt, const Stmt *CurStmt) {
if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) {
if (const Stmt *Init = CXXFor->getInit())
CGF.EmitStmt(Init);
CGF.EmitStmt(CXXFor->getRangeStmt());
CGF.EmitStmt(CXXFor->getEndStmt());
}
return false;
});
PreInits = cast_or_null<DeclStmt>(LD->getPreInits());
} else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) {
PreInits = cast_or_null<DeclStmt>(Tile->getPreInits());
} else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) {
PreInits = cast_or_null<DeclStmt>(Unroll->getPreInits());
} else {
llvm_unreachable("Unknown loop-based directive kind.");
}
if (PreInits) {
for (const auto *I : PreInits->decls())
CGF.EmitVarDecl(cast<VarDecl>(*I));
}
PreCondVars.restore(CGF);
}
public:
OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
: CodeGenFunction::RunCleanupsScope(CGF) {
emitPreInitStmt(CGF, S);
}
};
class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
CodeGenFunction::OMPPrivateScope InlinedShareds;
static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
return CGF.LambdaCaptureFields.lookup(VD) ||
(CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
(CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
}
public:
OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
: CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
InlinedShareds(CGF) {
for (const auto *C : S.clauses()) {
if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
if (const auto *PreInit =
cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
for (const auto *I : PreInit->decls()) {
if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
CGF.EmitVarDecl(cast<VarDecl>(*I));
} else {
CodeGenFunction::AutoVarEmission Emission =
CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
CGF.EmitAutoVarCleanups(Emission);
}
}
}
} else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
for (const Expr *E : UDP->varlists()) {
const Decl *D = cast<DeclRefExpr>(E)->getDecl();
if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
CGF.EmitVarDecl(*OED);
}
} else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
for (const Expr *E : UDP->varlists()) {
const Decl *D = getBaseDecl(E);
if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
CGF.EmitVarDecl(*OED);
}
}
}
if (!isOpenMPSimdDirective(S.getDirectiveKind()))
CGF.EmitOMPPrivateClause(S, InlinedShareds);
if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
if (const Expr *E = TG->getReductionRef())
CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
}
// Temp copy arrays for inscan reductions should not be emitted as they are
// not used in simd only mode.
llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
if (C->getModifier() != OMPC_REDUCTION_inscan)
continue;
for (const Expr *E : C->copy_array_temps())
CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl());
}
const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
while (CS) {
for (auto &C : CS->captures()) {
if (C.capturesVariable() || C.capturesVariableByCopy()) {
auto *VD = C.getCapturedVar();
if (CopyArrayTemps.contains(VD))
continue;
assert(VD == VD->getCanonicalDecl() &&
"Canonical decl must be captured.");
DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
isCapturedVar(CGF, VD) ||
(CGF.CapturedStmtInfo &&
InlinedShareds.isGlobalVarCaptured(VD)),
VD->getType().getNonReferenceType(), VK_LValue,
C.getLocation());
InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
return CGF.EmitLValue(&DRE).getAddress(CGF);
});
}
}
CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
}
(void)InlinedShareds.Privatize();
}
};
} // namespace
static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
const OMPExecutableDirective &S,
const RegionCodeGenTy &CodeGen);
LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
OrigVD = OrigVD->getCanonicalDecl();
bool IsCaptured =
LambdaCaptureFields.lookup(OrigVD) ||
(CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
(CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
return EmitLValue(&DRE);
}
}
return EmitLValue(E);
}
llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
ASTContext &C = getContext();
llvm::Value *Size = nullptr;
auto SizeInChars = C.getTypeSizeInChars(Ty);
if (SizeInChars.isZero()) {
// getTypeSizeInChars() returns 0 for a VLA.
while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
VlaSizePair VlaSize = getVLASize(VAT);
Ty = VlaSize.Type;
Size =
Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) : VlaSize.NumElts;
}
SizeInChars = C.getTypeSizeInChars(Ty);
if (SizeInChars.isZero())
return llvm::ConstantInt::get(SizeTy, /*V=*/0);
return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
}
return CGM.getSize(SizeInChars);
}
void CodeGenFunction::GenerateOpenMPCapturedVars(
const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
const RecordDecl *RD = S.getCapturedRecordDecl();
auto CurField = RD->field_begin();
auto CurCap = S.captures().begin();
for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
E = S.capture_init_end();
I != E; ++I, ++CurField, ++CurCap) {
if (CurField->hasCapturedVLAType()) {
const VariableArrayType *VAT = CurField->getCapturedVLAType();
llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
CapturedVars.push_back(Val);
} else if (CurCap->capturesThis()) {
CapturedVars.push_back(CXXThisValue);
} else if (CurCap->capturesVariableByCopy()) {
llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
// If the field is not a pointer, we need to save the actual value
// and load it as a void pointer.
if (!CurField->getType()->isAnyPointerType()) {
ASTContext &Ctx = getContext();
Address DstAddr = CreateMemTemp(
Ctx.getUIntPtrType(),
Twine(CurCap->getCapturedVar()->getName(), ".casted"));
LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
llvm::Value *SrcAddrVal = EmitScalarConversion(
DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
LValue SrcLV =
MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
// Store the value using the source type pointer.
EmitStoreThroughLValue(RValue::get(CV), SrcLV);
// Load the value using the destination type pointer.
CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
}
CapturedVars.push_back(CV);
} else {
assert(CurCap->capturesVariable() && "Expected capture by reference.");
CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer());
}
}
}
static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
QualType DstType, StringRef Name,
LValue AddrLV) {
ASTContext &Ctx = CGF.getContext();
llvm::Value *CastedPtr = CGF.EmitScalarConversion(
AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(),
Ctx.getPointerType(DstType), Loc);
Address TmpAddr =
CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
.getAddress(CGF);
return TmpAddr;
}
static QualType getCanonicalParamType(ASTContext &C, QualType T) {
if (T->isLValueReferenceType())
return C.getLValueReferenceType(
getCanonicalParamType(C, T.getNonReferenceType()),
/*SpelledAsLValue=*/false);
if (T->isPointerType())
return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
if (const auto *VLA = dyn_cast<VariableArrayType>(A))
return getCanonicalParamType(C, VLA->getElementType());
if (!A->isVariablyModifiedType())
return C.getCanonicalType(T);
}
return C.getCanonicalParamType(T);
}
namespace {
/// Contains required data for proper outlined function codegen.
struct FunctionOptions {
/// Captured statement for which the function is generated.
const CapturedStmt *S = nullptr;
/// true if cast to/from UIntPtr is required for variables captured by
/// value.
const bool UIntPtrCastRequired = true;
/// true if only casted arguments must be registered as local args or VLA
/// sizes.
const bool RegisterCastedArgsOnly = false;
/// Name of the generated function.
const StringRef FunctionName;
/// Location of the non-debug version of the outlined function.
SourceLocation Loc;
explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
bool RegisterCastedArgsOnly, StringRef FunctionName,
SourceLocation Loc)
: S(S), UIntPtrCastRequired(UIntPtrCastRequired),
RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
FunctionName(FunctionName), Loc(Loc) {}
};
} // namespace
static llvm::Function *emitOutlinedFunctionPrologue(
CodeGenFunction &CGF, FunctionArgList &Args,
llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
&LocalAddrs,
llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
&VLASizes,
llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
const CapturedDecl *CD = FO.S->getCapturedDecl();
const RecordDecl *RD = FO.S->getCapturedRecordDecl();
assert(CD->hasBody() && "missing CapturedDecl body");
CXXThisValue = nullptr;
// Build the argument list.
CodeGenModule &CGM = CGF.CGM;
ASTContext &Ctx = CGM.getContext();
FunctionArgList TargetArgs;
Args.append(CD->param_begin(),
std::next(CD->param_begin(), CD->getContextParamPosition()));
TargetArgs.append(
CD->param_begin(),
std::next(CD->param_begin(), CD->getContextParamPosition()));
auto I = FO.S->captures().begin();
FunctionDecl *DebugFunctionDecl = nullptr;
if (!FO.UIntPtrCastRequired) {
FunctionProtoType::ExtProtoInfo EPI;
QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI);
DebugFunctionDecl = FunctionDecl::Create(
Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
SourceLocation(), DeclarationName(), FunctionTy,
Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
/*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
/*hasWrittenPrototype=*/false);
}
for (const FieldDecl *FD : RD->fields()) {
QualType ArgType = FD->getType();
IdentifierInfo *II = nullptr;
VarDecl *CapVar = nullptr;
// If this is a capture by copy and the type is not a pointer, the outlined
// function argument type should be uintptr and the value properly casted to
// uintptr. This is necessary given that the runtime library is only able to
// deal with pointers. We can pass in the same way the VLA type sizes to the
// outlined function.
if (FO.UIntPtrCastRequired &&
((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
I->capturesVariableArrayType()))
ArgType = Ctx.getUIntPtrType();
if (I->capturesVariable() || I->capturesVariableByCopy()) {
CapVar = I->getCapturedVar();
II = CapVar->getIdentifier();
} else if (I->capturesThis()) {
II = &Ctx.Idents.get("this");
} else {
assert(I->capturesVariableArrayType());
II = &Ctx.Idents.get("vla");
}
if (ArgType->isVariablyModifiedType())
ArgType = getCanonicalParamType(Ctx, ArgType);
VarDecl *Arg;
if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
Arg = ParmVarDecl::Create(
Ctx, DebugFunctionDecl,
CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
/*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
} else {
Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
II, ArgType, ImplicitParamDecl::Other);
}
Args.emplace_back(Arg);
// Do not cast arguments if we emit function with non-original types.
TargetArgs.emplace_back(
FO.UIntPtrCastRequired
? Arg
: CGM.getOpenMPRuntime().translateParameter(FD, Arg));
++I;
}
Args.append(std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
CD->param_end());
TargetArgs.append(
std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
CD->param_end());
// Create the function declaration.
const CGFunctionInfo &FuncInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
auto *F =
llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
FO.FunctionName, &CGM.getModule());
CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
if (CD->isNothrow())
F->setDoesNotThrow();
F->setDoesNotRecurse();
// Always inline the outlined function if optimizations are enabled.
if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
F->removeFnAttr(llvm::Attribute::NoInline);
F->addFnAttr(llvm::Attribute::AlwaysInline);
}
// Generate the function.
CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
FO.UIntPtrCastRequired ? FO.Loc
: CD->getBody()->getBeginLoc());
unsigned Cnt = CD->getContextParamPosition();
I = FO.S->captures().begin();
for (const FieldDecl *FD : RD->fields()) {
// Do not map arguments if we emit function with non-original types.
Address LocalAddr(Address::invalid());
if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
TargetArgs[Cnt]);
} else {
LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
}
// If we are capturing a pointer by copy we don't need to do anything, just
// use the value that we get from the arguments.
if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
const VarDecl *CurVD = I->getCapturedVar();
if (!FO.RegisterCastedArgsOnly)
LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
++Cnt;
++I;
continue;
}
LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
AlignmentSource::Decl);
if (FD->hasCapturedVLAType()) {
if (FO.UIntPtrCastRequired) {
ArgLVal = CGF.MakeAddrLValue(
castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
Args[Cnt]->getName(), ArgLVal),
FD->getType(), AlignmentSource::Decl);
}
llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
const VariableArrayType *VAT = FD->getCapturedVLAType();
VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
} else if (I->capturesVariable()) {
const VarDecl *Var = I->getCapturedVar();
QualType VarTy = Var->getType();
Address ArgAddr = ArgLVal.getAddress(CGF);
if (ArgLVal.getType()->isLValueReferenceType()) {
ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
} else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
assert(ArgLVal.getType()->isPointerType());
ArgAddr = CGF.EmitLoadOfPointer(
ArgAddr, ArgLVal.getType()->castAs<PointerType>());
}
if (!FO.RegisterCastedArgsOnly) {
LocalAddrs.insert(
{Args[Cnt],
{Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
}
} else if (I->capturesVariableByCopy()) {
assert(!FD->getType()->isAnyPointerType() &&
"Not expecting a captured pointer.");
const VarDecl *Var = I->getCapturedVar();
LocalAddrs.insert({Args[Cnt],
{Var, FO.UIntPtrCastRequired
? castValueFromUintptr(
CGF, I->getLocation(), FD->getType(),
Args[Cnt]->getName(), ArgLVal)
: ArgLVal.getAddress(CGF)}});
} else {
// If 'this' is captured, load it into CXXThisValue.
assert(I->capturesThis());
CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}});
}
++Cnt;
++I;
}
return F;
}
llvm::Function *
CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
SourceLocation Loc) {
assert(
CapturedStmtInfo &&
"CapturedStmtInfo should be set when generating the captured function");
const CapturedDecl *CD = S.getCapturedDecl();
// Build the argument list.
bool NeedWrapperFunction =
getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
FunctionArgList Args;
llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
SmallString<256> Buffer;
llvm::raw_svector_ostream Out(Buffer);
Out << CapturedStmtInfo->getHelperName();
if (NeedWrapperFunction)
Out << "_debug__";
FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
Out.str(), Loc);
llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
VLASizes, CXXThisValue, FO);
CodeGenFunction::OMPPrivateScope LocalScope(*this);
for (const auto &LocalAddrPair : LocalAddrs) {
if (LocalAddrPair.second.first) {
LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() {
return LocalAddrPair.second.second;
});
}
}
(void)LocalScope.Privatize();
for (const auto &VLASizePair : VLASizes)
VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
PGO.assignRegionCounters(GlobalDecl(CD), F);
CapturedStmtInfo->EmitBody(*this, CD->getBody());
(void)LocalScope.ForceCleanup();
FinishFunction(CD->getBodyRBrace());
if (!NeedWrapperFunction)
return F;
FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
/*RegisterCastedArgsOnly=*/true,
CapturedStmtInfo->getHelperName(), Loc);
CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
Args.clear();
LocalAddrs.clear();
VLASizes.clear();
llvm::Function *WrapperF =
emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
WrapperCGF.CXXThisValue, WrapperFO);
llvm::SmallVector<llvm::Value *, 4> CallArgs;
auto *PI = F->arg_begin();
for (const auto *Arg : Args) {
llvm::Value *CallArg;
auto I = LocalAddrs.find(Arg);
if (I != LocalAddrs.end()) {
LValue LV = WrapperCGF.MakeAddrLValue(
I->second.second,
I->second.first ? I->second.first->getType() : Arg->getType(),
AlignmentSource::Decl);
if (LV.getType()->isAnyComplexType())
LV.setAddress(WrapperCGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
LV.getAddress(WrapperCGF),
PI->getType()->getPointerTo(
LV.getAddress(WrapperCGF).getAddressSpace())));
CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
} else {
auto EI = VLASizes.find(Arg);
if (EI != VLASizes.end()) {
CallArg = EI->second.second;
} else {
LValue LV =
WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
Arg->getType(), AlignmentSource::Decl);
CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
}
}
CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
++PI;
}
CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
WrapperCGF.FinishFunction();
return WrapperF;
}
//===----------------------------------------------------------------------===//
// OpenMP Directive Emission
//===----------------------------------------------------------------------===//
void CodeGenFunction::EmitOMPAggregateAssign(
Address DestAddr, Address SrcAddr, QualType OriginalType,
const llvm::function_ref<void(Address, Address)> CopyGen) {
// Perform element-by-element initialization.
QualType ElementTy;
// Drill down to the base element type on both arrays.
const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
llvm::Value *SrcBegin = SrcAddr.getPointer();
llvm::Value *DestBegin = DestAddr.getPointer();
// Cast from pointer to array type to pointer to single element.
llvm::Value *DestEnd =
Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
// The basic structure here is a while-do loop.
llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
llvm::Value *IsEmpty =
Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
// Enter the loop body, making that address the current address.
llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
EmitBlock(BodyBB);
CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
llvm::PHINode *SrcElementPHI =
Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
SrcElementPHI->addIncoming(SrcBegin, EntryBB);
Address SrcElementCurrent =
Address(SrcElementPHI,
SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
llvm::PHINode *DestElementPHI = Builder.CreatePHI(
DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
DestElementPHI->addIncoming(DestBegin, EntryBB);
Address DestElementCurrent =
Address(DestElementPHI,
DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
// Emit copy.
CopyGen(DestElementCurrent, SrcElementCurrent);
// Shift the address forward by one element.
llvm::Value *DestElementNext =
Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI,
/*Idx0=*/1, "omp.arraycpy.dest.element");
llvm::Value *SrcElementNext =
Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI,
/*Idx0=*/1, "omp.arraycpy.src.element");
// Check whether we've reached the end.
llvm::Value *Done =
Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
Builder.CreateCondBr(Done, DoneBB, BodyBB);
DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
// Done.
EmitBlock(DoneBB, /*IsFinished=*/true);
}
void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
Address SrcAddr, const VarDecl *DestVD,
const VarDecl *SrcVD, const Expr *Copy) {
if (OriginalType->isArrayType()) {
const auto *BO = dyn_cast<BinaryOperator>(Copy);
if (BO && BO->getOpcode() == BO_Assign) {
// Perform simple memcpy for simple copying.
LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
EmitAggregateAssign(Dest, Src, OriginalType);
} else {
// For arrays with complex element types perform element by element
// copying.
EmitOMPAggregateAssign(
DestAddr, SrcAddr, OriginalType,
[this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
// Working with the single array element, so have to remap
// destination and source variables to corresponding array
// elements.
CodeGenFunction::OMPPrivateScope Remap(*this);
Remap.addPrivate(DestVD, [DestElement]() { return DestElement; });
Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; });
(void)Remap.Privatize();
EmitIgnoredExpr(Copy);
});
}
} else {
// Remap pseudo source variable to private copy.
CodeGenFunction::OMPPrivateScope Remap(*this);
Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; });
Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; });
(void)Remap.Privatize();
// Emit copying of the whole variable.
EmitIgnoredExpr(Copy);
}
}
bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
OMPPrivateScope &PrivateScope) {
if (!HaveInsertPoint())
return false;
bool DeviceConstTarget =
getLangOpts().OpenMPIsDevice &&
isOpenMPTargetExecutionDirective(D.getDirectiveKind());
bool FirstprivateIsLastprivate = false;
llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
for (const auto *D : C->varlists())
Lastprivates.try_emplace(
cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
C->getKind());
}
llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
// Force emission of the firstprivate copy if the directive does not emit
// outlined function, like omp for, omp simd, omp distribute etc.
bool MustEmitFirstprivateCopy =
CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
const auto *IRef = C->varlist_begin();
const auto *InitsRef = C->inits().begin();
for (const Expr *IInit : C->private_copies()) {
const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
bool ThisFirstprivateIsLastprivate =
Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
!FD->getType()->isReferenceType() &&
(!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
++IRef;
++InitsRef;
continue;
}
// Do not emit copy for firstprivate constant variables in target regions,
// captured by reference.
if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
FD && FD->getType()->isReferenceType() &&
(!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
++IRef;
++InitsRef;
continue;
}
FirstprivateIsLastprivate =
FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
const auto *VDInit =
cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
bool IsRegistered;
DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
/*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
(*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
LValue OriginalLVal;
if (!FD) {
// Check if the firstprivate variable is just a constant value.
ConstantEmission CE = tryEmitAsConstant(&DRE);
if (CE && !CE.isReference()) {
// Constant value, no need to create a copy.
++IRef;
++InitsRef;
continue;
}
if (CE && CE.isReference()) {
OriginalLVal = CE.getReferenceLValue(*this, &DRE);
} else {
assert(!CE && "Expected non-constant firstprivate.");
OriginalLVal = EmitLValue(&DRE);
}
} else {
OriginalLVal = EmitLValue(&DRE);
}
QualType Type = VD->getType();
if (Type->isArrayType()) {
// Emit VarDecl with copy init for arrays.
// Get the address of the original variable captured in current
// captured region.
IsRegistered = PrivateScope.addPrivate(
OrigVD, [this, VD, Type, OriginalLVal, VDInit]() {
AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
const Expr *Init = VD->getInit();
if (!isa<CXXConstructExpr>(Init) ||
isTrivialInitializer(Init)) {
// Perform simple memcpy.
LValue Dest =
MakeAddrLValue(Emission.getAllocatedAddress(), Type);
EmitAggregateAssign(Dest, OriginalLVal, Type);
} else {
EmitOMPAggregateAssign(
Emission.getAllocatedAddress(),
OriginalLVal.getAddress(*this), Type,
[this, VDInit, Init](Address DestElement,
Address SrcElement) {
// Clean up any temporaries needed by the
// initialization.
RunCleanupsScope InitScope(*this);
// Emit initialization for single element.
setAddrOfLocalVar(VDInit, SrcElement);
EmitAnyExprToMem(Init, DestElement,
Init->getType().getQualifiers(),
/*IsInitializer*/ false);
LocalDeclMap.erase(VDInit);
});
}
EmitAutoVarCleanups(Emission);
return Emission.getAllocatedAddress();
});
} else {
Address OriginalAddr = OriginalLVal.getAddress(*this);
IsRegistered =
PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD,
ThisFirstprivateIsLastprivate,
OrigVD, &Lastprivates, IRef]() {
// Emit private VarDecl with copy init.
// Remap temp VDInit variable to the address of the original
// variable (for proper handling of captured global variables).
setAddrOfLocalVar(VDInit, OriginalAddr);
EmitDecl(*VD);
LocalDeclMap.erase(VDInit);
if (ThisFirstprivateIsLastprivate &&
Lastprivates[OrigVD->getCanonicalDecl()] ==
OMPC_LASTPRIVATE_conditional) {
// Create/init special variable for lastprivate conditionals.
Address VDAddr =
CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
*this, OrigVD);
llvm::Value *V = EmitLoadOfScalar(
MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(),
AlignmentSource::Decl),
(*IRef)->getExprLoc());
EmitStoreOfScalar(V,
MakeAddrLValue(VDAddr, (*IRef)->getType(),
AlignmentSource::Decl));
LocalDeclMap.erase(VD);
setAddrOfLocalVar(VD, VDAddr);
return VDAddr;
}
return GetAddrOfLocalVar(VD);
});
}
assert(IsRegistered &&
"firstprivate var already registered as private");
// Silence the warning about unused variable.
(void)IsRegistered;
}
++IRef;
++InitsRef;
}
}
return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
}
void CodeGenFunction::EmitOMPPrivateClause(
const OMPExecutableDirective &D,
CodeGenFunction::OMPPrivateScope &PrivateScope) {
if (!HaveInsertPoint())
return;
llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
auto IRef = C->varlist_begin();
for (const Expr *IInit : C->private_copies()) {
const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
// Emit private VarDecl with copy init.
EmitDecl(*VD);
return GetAddrOfLocalVar(VD);
});
assert(IsRegistered && "private var already registered as private");
// Silence the warning about unused variable.
(void)IsRegistered;
}
++IRef;
}
}
}
bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
if (!HaveInsertPoint())
return false;
// threadprivate_var1 = master_threadprivate_var1;
// operator=(threadprivate_var2, master_threadprivate_var2);
// ...
// __kmpc_barrier(&loc, global_tid);
llvm::DenseSet<const VarDecl *> CopiedVars;
llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
auto IRef = C->varlist_begin();
auto ISrcRef = C->source_exprs().begin();
auto IDestRef = C->destination_exprs().begin();
for (const Expr *AssignOp : C->assignment_ops()) {
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
QualType Type = VD->getType();
if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
// Get the address of the master variable. If we are emitting code with
// TLS support, the address is passed from the master as field in the
// captured declaration.
Address MasterAddr = Address::invalid();
if (getLangOpts().OpenMPUseTLS &&
getContext().getTargetInfo().isTLSSupported()) {
assert(CapturedStmtInfo->lookup(VD) &&
"Copyin threadprivates should have been captured!");
DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
(*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
MasterAddr = EmitLValue(&DRE).getAddress(*this);
LocalDeclMap.erase(VD);
} else {
MasterAddr =
Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
: CGM.GetAddrOfGlobal(VD),
getContext().getDeclAlign(VD));
}
// Get the address of the threadprivate variable.
Address PrivateAddr = EmitLValue(*IRef).getAddress(*this);
if (CopiedVars.size() == 1) {
// At first check if current thread is a master thread. If it is, no
// need to copy data.
CopyBegin = createBasicBlock("copyin.not.master");
CopyEnd = createBasicBlock("copyin.not.master.end");
// TODO: Avoid ptrtoint conversion.
auto *MasterAddrInt =
Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy);
auto *PrivateAddrInt =
Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy);
Builder.CreateCondBr(
Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin,
CopyEnd);
EmitBlock(CopyBegin);
}
const auto *SrcVD =
cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
const auto *DestVD =
cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
}
++IRef;
++ISrcRef;
++IDestRef;
}
}
if (CopyEnd) {
// Exit out of copying procedure for non-master thread.
EmitBlock(CopyEnd, /*IsFinished=*/true);
return true;
}
return false;
}
bool CodeGenFunction::EmitOMPLastprivateClauseInit(
const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
if (!HaveInsertPoint())
return false;
bool HasAtLeastOneLastprivate = false;
llvm::DenseSet<const VarDecl *> SIMDLCVs;
if (isOpenMPSimdDirective(D.getDirectiveKind())) {
const auto *LoopDirective = cast<OMPLoopDirective>(&D);
for (const Expr *C : LoopDirective->counters()) {
SIMDLCVs.insert(
cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
}
}
llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
HasAtLeastOneLastprivate = true;
if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
!getLangOpts().OpenMPSimd)
break;
const auto *IRef = C->varlist_begin();
const auto *IDestRef = C->destination_exprs().begin();
for (const Expr *IInit : C->private_copies()) {
// Keep the address of the original variable for future update at the end
// of the loop.
const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
// Taskloops do not require additional initialization, it is done in
// runtime support library.
if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
const auto *DestVD =
cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() {
DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
/*RefersToEnclosingVariableOrCapture=*/
CapturedStmtInfo->lookup(OrigVD) != nullptr,
(*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
return EmitLValue(&DRE).getAddress(*this);
});
// Check if the variable is also a firstprivate: in this case IInit is
// not generated. Initialization of this variable will happen in codegen
// for 'firstprivate' clause.
if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
bool IsRegistered =
PrivateScope.addPrivate(OrigVD, [this, VD, C, OrigVD]() {
if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
Address VDAddr =
CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
*this, OrigVD);
setAddrOfLocalVar(VD, VDAddr);
return VDAddr;
}
// Emit private VarDecl with copy init.
EmitDecl(*VD);
return GetAddrOfLocalVar(VD);
});
assert(IsRegistered &&
"lastprivate var already registered as private");
(void)IsRegistered;
}
}
++IRef;
++IDestRef;
}
}
return HasAtLeastOneLastprivate;
}
void CodeGenFunction::EmitOMPLastprivateClauseFinal(
const OMPExecutableDirective &D, bool NoFinals,
llvm::Value *IsLastIterCond) {
if (!HaveInsertPoint())
return;
// Emit following code:
// if (<IsLastIterCond>) {
// orig_var1 = private_orig_var1;
// ...
// orig_varn = private_orig_varn;
// }
llvm::BasicBlock *ThenBB = nullptr;
llvm::BasicBlock *DoneBB = nullptr;
if (IsLastIterCond) {
// Emit implicit barrier if at least one lastprivate conditional is found
// and this is not a simd mode.
if (!getLangOpts().OpenMPSimd &&
llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(),
[](const OMPLastprivateClause *C) {
return C->getKind() == OMPC_LASTPRIVATE_conditional;
})) {
CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
OMPD_unknown,
/*EmitChecks=*/false,
/*ForceSimpleCall=*/true);
}
ThenBB = createBasicBlock(".omp.lastprivate.then");
DoneBB = createBasicBlock(".omp.lastprivate.done");
Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
EmitBlock(ThenBB);
}
llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
auto IC = LoopDirective->counters().begin();
for (const Expr *F : LoopDirective->finals()) {
const auto *D =
cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
if (NoFinals)
AlreadyEmittedVars.insert(D);
else
LoopCountersAndUpdates[D] = F;
++IC;
}
}
for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
auto IRef = C->varlist_begin();
auto ISrcRef = C->source_exprs().begin();
auto IDestRef = C->destination_exprs().begin();
for (const Expr *AssignOp : C->assignment_ops()) {
const auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
QualType Type = PrivateVD->getType();
const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
if (AlreadyEmittedVars.insert(CanonicalVD).second) {
// If lastprivate variable is a loop control variable for loop-based
// directive, update its value before copyin back to original
// variable.
if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
EmitIgnoredExpr(FinalExpr);
const auto *SrcVD =
cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
const auto *DestVD =
cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
// Get the address of the private variable.
Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
PrivateAddr =
Address(Builder.CreateLoad(PrivateAddr),
CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));
// Store the last value to the private copy in the last iteration.
if (C->getKind() == OMPC_LASTPRIVATE_conditional)
CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
*this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD,
(*IRef)->getExprLoc());
// Get the address of the original variable.
Address OriginalAddr = GetAddrOfLocalVar(DestVD);
EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
}
++IRef;
++ISrcRef;
++IDestRef;
}
if (const Expr *PostUpdate = C->getPostUpdateExpr())
EmitIgnoredExpr(PostUpdate);
}
if (IsLastIterCond)
EmitBlock(DoneBB, /*IsFinished=*/true);
}
void CodeGenFunction::EmitOMPReductionClauseInit(
const OMPExecutableDirective &D,
CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
if (!HaveInsertPoint())
return;
SmallVector<const Expr *, 4> Shareds;
SmallVector<const Expr *, 4> Privates;
SmallVector<const Expr *, 4> ReductionOps;
SmallVector<const Expr *, 4> LHSs;
SmallVector<const Expr *, 4> RHSs;
OMPTaskDataTy Data;
SmallVector<const Expr *, 4> TaskLHSs;
SmallVector<const Expr *, 4> TaskRHSs;
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
continue;
Shareds.append(C->varlist_begin(), C->varlist_end());
Privates.append(C->privates().begin(), C->privates().end());
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
if (C->getModifier() == OMPC_REDUCTION_task) {
Data.ReductionVars.append(C->privates().begin(), C->privates().end());
Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
Data.ReductionOps.append(C->reduction_ops().begin(),
C->reduction_ops().end());
TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
}
}
ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
unsigned Count = 0;
auto *ILHS = LHSs.begin();
auto *IRHS = RHSs.begin();
auto *IPriv = Privates.begin();
for (const Expr *IRef : Shareds) {
const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
// Emit private VarDecl with reduction init.
RedCG.emitSharedOrigLValue(*this, Count);
RedCG.emitAggregateType(*this, Count);
AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
RedCG.getSharedLValue(Count),
[&Emission](CodeGenFunction &CGF) {
CGF.EmitAutoVarInit(Emission);
return true;
});
EmitAutoVarCleanups(Emission);
Address BaseAddr = RedCG.adjustPrivateAddress(
*this, Count, Emission.getAllocatedAddress());
bool IsRegistered = PrivateScope.addPrivate(
RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; });
assert(IsRegistered && "private var already registered as private");
// Silence the warning about unused variable.
(void)IsRegistered;
const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
QualType Type = PrivateVD->getType();
bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
// Store the address of the original variable associated with the LHS
// implicit variable.
PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
return RedCG.getSharedLValue(Count).getAddress(*this);
});
PrivateScope.addPrivate(
RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); });
} else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
isa<ArraySubscriptExpr>(IRef)) {
// Store the address of the original variable associated with the LHS
// implicit variable.
PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
return RedCG.getSharedLValue(Count).getAddress(*this);
});
PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() {
return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
ConvertTypeForMem(RHSVD->getType()),
"rhs.begin");
});
} else {
QualType Type = PrivateVD->getType();
bool IsArray = getContext().getAsArrayType(Type) != nullptr;
Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this);
// Store the address of the original variable associated with the LHS
// implicit variable.
if (IsArray) {
OriginalAddr = Builder.CreateElementBitCast(
OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
}
PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; });
PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD, IsArray]() {
return IsArray ? Builder.CreateElementBitCast(
GetAddrOfLocalVar(PrivateVD),
ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
: GetAddrOfLocalVar(PrivateVD);
});
}
++ILHS;
++IRHS;
++IPriv;
++Count;
}
if (!Data.ReductionVars.empty()) {
Data.IsReductionWithTaskMod = true;
Data.IsWorksharingReduction =
isOpenMPWorksharingDirective(D.getDirectiveKind());
llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
*this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
const Expr *TaskRedRef = nullptr;
switch (D.getDirectiveKind()) {
case OMPD_parallel:
TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
break;
case OMPD_for:
TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
break;
case OMPD_sections:
TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
break;
case OMPD_parallel_for:
TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
break;
case OMPD_parallel_master:
TaskRedRef =
cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
break;
case OMPD_parallel_sections:
TaskRedRef =
cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
break;
case OMPD_target_parallel:
TaskRedRef =
cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
break;
case OMPD_target_parallel_for:
TaskRedRef =
cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
break;
case OMPD_distribute_parallel_for:
TaskRedRef =
cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
break;
case OMPD_teams_distribute_parallel_for:
TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
.getTaskReductionRefExpr();
break;
case OMPD_target_teams_distribute_parallel_for:
TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
.getTaskReductionRefExpr();
break;
case OMPD_simd:
case OMPD_for_simd:
case OMPD_section:
case OMPD_single:
case OMPD_master:
case OMPD_critical:
case OMPD_parallel_for_simd:
case OMPD_task:
case OMPD_taskyield:
case OMPD_barrier:
case OMPD_taskwait:
case OMPD_taskgroup:
case OMPD_flush:
case OMPD_depobj:
case OMPD_scan:
case OMPD_ordered:
case OMPD_atomic:
case OMPD_teams:
case OMPD_target:
case OMPD_cancellation_point:
case OMPD_cancel:
case OMPD_target_data:
case OMPD_target_enter_data:
case OMPD_target_exit_data:
case OMPD_taskloop:
case OMPD_taskloop_simd:
case OMPD_master_taskloop:
case OMPD_master_taskloop_simd:
case OMPD_parallel_master_taskloop:
case OMPD_parallel_master_taskloop_simd:
case OMPD_distribute:
case OMPD_target_update:
case OMPD_distribute_parallel_for_simd:
case OMPD_distribute_simd:
case OMPD_target_parallel_for_simd:
case OMPD_target_simd:
case OMPD_teams_distribute:
case OMPD_teams_distribute_simd:
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_target_teams:
case OMPD_target_teams_distribute:
case OMPD_target_teams_distribute_parallel_for_simd:
case OMPD_target_teams_distribute_simd:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_threadprivate:
case OMPD_allocate:
case OMPD_declare_reduction:
case OMPD_declare_mapper:
case OMPD_declare_simd:
case OMPD_requires:
case OMPD_declare_variant:
case OMPD_begin_declare_variant:
case OMPD_end_declare_variant:
case OMPD_unknown:
default:
llvm_unreachable("Enexpected directive with task reductions.");
}
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
EmitVarDecl(*VD);
EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
/*Volatile=*/false, TaskRedRef->getType());
}
}
void CodeGenFunction::EmitOMPReductionClauseFinal(
const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
if (!HaveInsertPoint())
return;
llvm::SmallVector<const Expr *, 8> Privates;
llvm::SmallVector<const Expr *, 8> LHSExprs;
llvm::SmallVector<const Expr *, 8> RHSExprs;
llvm::SmallVector<const Expr *, 8> ReductionOps;
bool HasAtLeastOneReduction = false;
bool IsReductionWithTaskMod = false;
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
// Do not emit for inscan reductions.
if (C->getModifier() == OMPC_REDUCTION_inscan)
continue;
HasAtLeastOneReduction = true;
Privates.append(C->privates().begin(), C->privates().end());
LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
IsReductionWithTaskMod =
IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
}
if (HasAtLeastOneReduction) {
if (IsReductionWithTaskMod) {
CGM.getOpenMPRuntime().emitTaskReductionFini(
*this, D.getBeginLoc(),
isOpenMPWorksharingDirective(D.getDirectiveKind()));
}
bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
isOpenMPParallelDirective(D.getDirectiveKind()) ||
ReductionKind == OMPD_simd;
bool SimpleReduction = ReductionKind == OMPD_simd;
// Emit nowait reduction if nowait clause is present or directive is a
// parallel directive (it always has implicit barrier).
CGM.getOpenMPRuntime().emitReduction(
*this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
{WithNowait, SimpleReduction, ReductionKind});
}
}
static void emitPostUpdateForReductionClause(
CodeGenFunction &CGF, const OMPExecutableDirective &D,
const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
if (!CGF.HaveInsertPoint())
return;
llvm::BasicBlock *DoneBB = nullptr;
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
if (!DoneBB) {
if (llvm::Value *Cond = CondGen(CGF)) {
// If the first post-update expression is found, emit conditional
// block if it was requested.
llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
CGF.EmitBlock(ThenBB);
}
}
CGF.EmitIgnoredExpr(PostUpdate);
}
}
if (DoneBB)
CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
}
namespace {
/// Codegen lambda for appending distribute lower and upper bounds to outlined
/// parallel function. This is necessary for combined constructs such as
/// 'distribute parallel for'
typedef llvm::function_ref<void(CodeGenFunction &,
const OMPExecutableDirective &,
llvm::SmallVectorImpl<llvm::Value *> &)>
CodeGenBoundParametersTy;
} // anonymous namespace
static void
checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
const OMPExecutableDirective &S) {
if (CGF.getLangOpts().OpenMP < 50)
return;
llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
for (const Expr *Ref : C->varlists()) {
if (!Ref->getType()->isScalarType())
continue;
const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
if (!DRE)
continue;
PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
}
}
for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
for (const Expr *Ref : C->varlists()) {
if (!Ref->getType()->isScalarType())
continue;
const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
if (!DRE)
continue;
PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
}
}
for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
for (const Expr *Ref : C->varlists()) {
if (!Ref->getType()->isScalarType())
continue;
const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
if (!DRE)
continue;
PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
}
}
// Privates should ne analyzed since they are not captured at all.
// Task reductions may be skipped - tasks are ignored.
// Firstprivates do not return value but may be passed by reference - no need
// to check for updated lastprivate conditional.
for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
for (const Expr *Ref : C->varlists()) {
if (!Ref->getType()->isScalarType())
continue;
const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
if (!DRE)
continue;
PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
}
}
CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
CGF, S, PrivateDecls);
}
static void emitCommonOMPParallelDirective(
CodeGenFunction &CGF, const OMPExecutableDirective &S,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
const CodeGenBoundParametersTy &CodeGenBoundParameters) {
const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
llvm::Function *OutlinedFn =
CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
llvm::Value *NumThreads =
CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
/*IgnoreResultAssign=*/true);
CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
CGF, NumThreads, NumThreadsClause->getBeginLoc());
}
if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
CGF.CGM.getOpenMPRuntime().emitProcBindClause(
CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
}
const Expr *IfCond = nullptr;
for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
if (C->getNameModifier() == OMPD_unknown ||
C->getNameModifier() == OMPD_parallel) {
IfCond = C->getCondition();
break;
}
}
OMPParallelScope Scope(CGF, S);
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
// Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
// lower and upper bounds with the pragma 'for' chunking mechanism.
// The following lambda takes care of appending the lower and upper bound
// parameters when necessary
CodeGenBoundParameters(CGF, S, CapturedVars);
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
CapturedVars, IfCond);
}
static bool isAllocatableDecl(const VarDecl *VD) {
const VarDecl *CVD = VD->getCanonicalDecl();
if (!CVD->hasAttr<OMPAllocateDeclAttr>())
return false;
const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
// Use the default allocation.
return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
!AA->getAllocator());
}
static void emitEmptyBoundParameters(CodeGenFunction &,
const OMPExecutableDirective &,
llvm::SmallVectorImpl<llvm::Value *> &) {}
Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
CodeGenFunction &CGF, const VarDecl *VD) {
CodeGenModule &CGM = CGF.CGM;
auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
if (!VD)
return Address::invalid();
const VarDecl *CVD = VD->getCanonicalDecl();
if (!isAllocatableDecl(CVD))
return Address::invalid();
llvm::Value *Size;
CharUnits Align = CGM.getContext().getDeclAlign(CVD);
if (CVD->getType()->isVariablyModifiedType()) {
Size = CGF.getTypeSize(CVD->getType());
// Align the size: ((size + align - 1) / align) * align
Size = CGF.Builder.CreateNUWAdd(
Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
} else {
CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
Size = CGM.getSize(Sz.alignTo(Align));
}
const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
assert(AA->getAllocator() &&
"Expected allocator expression for non-default allocator.");
llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
// According to the standard, the original allocator type is a enum (integer).
// Convert to pointer type, if required.
if (Allocator->getType()->isIntegerTy())
Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
else if (Allocator->getType()->isPointerTy())
Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
CGM.VoidPtrTy);
llvm::Value *Addr = OMPBuilder.createOMPAlloc(
CGF.Builder, Size, Allocator,
getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
llvm::CallInst *FreeCI =
OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator);
CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Addr,
CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
return Address(Addr, Align);
}
Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
SourceLocation Loc) {
CodeGenModule &CGM = CGF.CGM;
if (CGM.getLangOpts().OpenMPUseTLS &&
CGM.getContext().getTargetInfo().isTLSSupported())
return VDAddr;
llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
llvm::Type *VarTy = VDAddr.getElementType();
llvm::Value *Data =
CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy);
llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
std::string Suffix = getNameWithSeparators({"cache", ""});
llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
llvm::CallInst *ThreadPrivateCacheCall =
OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
return Address(ThreadPrivateCacheCall, VDAddr.getAlignment());
}
std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
SmallString<128> Buffer;
llvm::raw_svector_ostream OS(Buffer);
StringRef Sep = FirstSeparator;
for (StringRef Part : Parts) {
OS << Sep << Part;
Sep = Separator;
}
return OS.str().str();
}
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
if (CGM.getLangOpts().OpenMPIRBuilder) {
llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
// Check if we have any if clause associated with the directive.
llvm::Value *IfCond = nullptr;
if (const auto *C = S.getSingleClause<OMPIfClause>())
IfCond = EmitScalarExpr(C->getCondition(),
/*IgnoreResultAssign=*/true);
llvm::Value *NumThreads = nullptr;
if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
/*IgnoreResultAssign=*/true);
ProcBindKind ProcBind = OMP_PROC_BIND_default;
if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
ProcBind = ProcBindClause->getProcBindKind();
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
// The cleanup callback that finalizes all variabels at the given location,
// thus calls destructors etc.
auto FiniCB = [this](InsertPointTy IP) {
OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
};
// Privatization callback that performs appropriate action for
// shared/private/firstprivate/lastprivate/copyin/... variables.
//
// TODO: This defaults to shared right now.
auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
// The next line is appropriate only for variables (Val) with the
// data-sharing attribute "shared".
ReplVal = &Val;
return CodeGenIP;
};
const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
auto BodyGenCB = [ParallelRegionBodyStmt,
this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
llvm::BasicBlock &ContinuationBB) {
OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP,
ContinuationBB);
OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt,
CodeGenIP, ContinuationBB);
};
CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
Builder.restoreIP(
OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
IfCond, NumThreads, ProcBind, S.hasCancel()));
return;
}
// Emit parallel region as a standalone region.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
OMPPrivateScope PrivateScope(CGF);
bool Copyins = CGF.EmitOMPCopyinClause(S);
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
if (Copyins) {
// Emit implicit barrier to synchronize threads and avoid data races on
// propagation master's thread values of threadprivate variables to local
// instances of that variables of all other implicit threads.
CGF.CGM.getOpenMPRuntime().emitBarrierCall(
CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
/*ForceSimpleCall=*/true);
}
CGF.EmitOMPPrivateClause(S, PrivateScope);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
};
{
auto LPCRegion =
CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
emitEmptyBoundParameters);
emitPostUpdateForReductionClause(*this, S,
[](CodeGenFunction &) { return nullptr; });
}
// Check for outer lastprivate conditional update.
checkForLastprivateConditionalUpdate(*this, S);
}
void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) {
EmitStmt(S.getIfStmt());
}
namespace {
/// RAII to handle scopes for loop transformation directives.
class OMPTransformDirectiveScopeRAII {
OMPLoopScope *Scope = nullptr;
CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
public:
OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) {
Scope = new OMPLoopScope(CGF, *Dir);
CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
}
}
~OMPTransformDirectiveScopeRAII() {
if (!Scope)
return;
delete CapInfoRAII;
delete CGSI;
delete Scope;
}
};
} // namespace
static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
int MaxLevel, int Level = 0) {
assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
const Stmt *SimplifiedS = S->IgnoreContainers();
if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) {
PrettyStackTraceLoc CrashInfo(
CGF.getContext().getSourceManager(), CS->getLBracLoc(),
"LLVM IR generation of compound statement ('{}')");
// Keep track of the current cleanup stack depth, including debug scopes.
CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
for (const Stmt *CurStmt : CS->body())
emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level);
return;
}
if (SimplifiedS == NextLoop) {
if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS))
SimplifiedS = Dir->getTransformedStmt();
if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
SimplifiedS = CanonLoop->getLoopStmt();
if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
S = For->getBody();
} else {
assert(isa<CXXForRangeStmt>(SimplifiedS) &&
"Expected canonical for loop or range-based for loop.");
const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS);
CGF.EmitStmt(CXXFor->getLoopVarStmt());
S = CXXFor->getBody();
}
if (Level + 1 < MaxLevel) {
NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
S, /*TryImperfectlyNestedLoops=*/true);
emitBody(CGF, S, NextLoop, MaxLevel, Level + 1);
return;
}
}
CGF.EmitStmt(S);
}
void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
JumpDest LoopExit) {
RunCleanupsScope BodyScope(*this);
// Update counters values on current iteration.
for (const Expr *UE : D.updates())
EmitIgnoredExpr(UE);
// Update the linear variables.
// In distribute directives only loop counters may be marked as linear, no
// need to generate the code for them.
if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
for (const Expr *UE : C->updates())
EmitIgnoredExpr(UE);
}
}
// On a continue in the body, jump to the end.
JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
for (const Expr *E : D.finals_conditions()) {
if (!E)
continue;
// Check that loop counter in non-rectangular nest fits into the iteration
// space.
llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
getProfileCount(D.getBody()));
EmitBlock(NextBB);
}
OMPPrivateScope InscanScope(*this);
EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
bool IsInscanRegion = InscanScope.Privatize();
if (IsInscanRegion) {
// Need to remember the block before and after scan directive
// to dispatch them correctly depending on the clause used in
// this directive, inclusive or exclusive. For inclusive scan the natural
// order of the blocks is used, for exclusive clause the blocks must be
// executed in reverse order.
OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
// No need to allocate inscan exit block, in simd mode it is selected in the
// codegen for the scan directive.
if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd)
OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
EmitBranch(OMPScanDispatch);
EmitBlock(OMPBeforeScanBlock);
}
// Emit loop variables for C++ range loops.
const Stmt *Body =
D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
// Emit loop body.
emitBody(*this, Body,
OMPLoopBasedDirective::tryToFindNextInnerLoop(
Body, /*TryImperfectlyNestedLoops=*/true),
D.getLoopsNumber());
// Jump to the dispatcher at the end of the loop body.
if (IsInscanRegion)
EmitBranch(OMPScanExitBlock);
// The end (updates/cleanups).
EmitBlock(Continue.getBlock());
BreakContinueStack.pop_back();
}
using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
/// Emit a captured statement and return the function as well as its captured
/// closure context.
static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF,
const CapturedStmt *S) {
LValue CapStruct = ParentCGF.InitCapturedStruct(*S);
CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S);
return {F, CapStruct.getPointer(ParentCGF)};
}
/// Emit a call to a previously captured closure.
static llvm::CallInst *
emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap,
llvm::ArrayRef<llvm::Value *> Args) {
// Append the closure context to the argument.
SmallVector<llvm::Value *> EffectiveArgs;
EffectiveArgs.reserve(Args.size() + 1);
llvm::append_range(EffectiveArgs, Args);
EffectiveArgs.push_back(Cap.second);
return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs);
}
llvm::CanonicalLoopInfo *
CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
// The caller is processing the loop-associated directive processing the \p
// Depth loops nested in \p S. Put the previous pending loop-associated
// directive to the stack. If the current loop-associated directive is a loop
// transformation directive, it will push its generated loops onto the stack
// such that together with the loops left here they form the combined loop
// nest for the parent loop-associated directive.
int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth;
ExpectedOMPLoopDepth = Depth;
EmitStmt(S);
assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
// The last added loop is the outermost one.
llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back();
// Pop the \p Depth loops requested by the call from that stack and restore
// the previous context.
OMPLoopNestStack.set_size(OMPLoopNestStack.size() - Depth);
ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth;
return Result;
}
void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
const Stmt *SyntacticalLoop = S->getLoopStmt();
if (!getLangOpts().OpenMPIRBuilder) {
// Ignore if OpenMPIRBuilder is not enabled.
EmitStmt(SyntacticalLoop);
return;
}
LexicalScope ForScope(*this, S->getSourceRange());
// Emit init statements. The Distance/LoopVar funcs may reference variable
// declarations they contain.
const Stmt *BodyStmt;
if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) {
if (const Stmt *InitStmt = For->getInit())
EmitStmt(InitStmt);
BodyStmt = For->getBody();
} else if (const auto *RangeFor =
dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) {
if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
EmitStmt(RangeStmt);
if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
EmitStmt(BeginStmt);
if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
EmitStmt(EndStmt);
if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
EmitStmt(LoopVarStmt);
BodyStmt = RangeFor->getBody();
} else
llvm_unreachable("Expected for-stmt or range-based for-stmt");
// Emit closure for later use. By-value captures will be captured here.
const CapturedStmt *DistanceFunc = S->getDistanceFunc();
EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc);
const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc);
// Call the distance function to get the number of iterations of the loop to
// come.
QualType LogicalTy = DistanceFunc->getCapturedDecl()
->getParam(0)
->getType()
.getNonReferenceType();
Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr");
emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()});
llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count");
// Emit the loop structure.
llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
llvm::Value *IndVar) {
Builder.restoreIP(CodeGenIP);
// Emit the loop body: Convert the logical iteration number to the loop
// variable and emit the body.
const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
LValue LCVal = EmitLValue(LoopVarRef);
Address LoopVarAddress = LCVal.getAddress(*this);
emitCapturedStmtCall(*this, LoopVarClosure,
{LoopVarAddress.getPointer(), IndVar});
RunCleanupsScope BodyScope(*this);
EmitStmt(BodyStmt);
};
llvm::CanonicalLoopInfo *CL =
OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal);
// Finish up the loop.
Builder.restoreIP(CL->getAfterIP());
ForScope.ForceCleanup();
// Remember the CanonicalLoopInfo for parent AST nodes consuming it.
OMPLoopNestStack.push_back(CL);
}
void CodeGenFunction::EmitOMPInnerLoop(
const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
const Expr *IncExpr,
const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
// Start the loop with a block that tests the condition.
auto CondBlock = createBasicBlock("omp.inner.for.cond");
EmitBlock(CondBlock);
const SourceRange R = S.getSourceRange();
// If attributes are attached, push to the basic block with them.
const auto &OMPED = cast<OMPExecutableDirective>(S);
const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
const Stmt *SS = ICS->getCapturedStmt();
const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
OMPLoopNestStack.clear();
if (AS)
LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
SourceLocToDebugLoc(R.getEnd()));
else
LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
SourceLocToDebugLoc(R.getEnd()));
// If there are any cleanups between here and the loop-exit scope,
// create a block to stage a loop exit along.
llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
if (RequiresCleanup)
ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
// Emit condition.
EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
if (ExitBlock != LoopExit.getBlock()) {
EmitBlock(ExitBlock);
EmitBranchThroughCleanup(LoopExit);
}
EmitBlock(LoopBody);
incrementProfileCounter(&S);
// Create a block for the increment.
JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
BodyGen(*this);
// Emit "IV = IV + 1" and a back-edge to the condition block.
EmitBlock(Continue.getBlock());
EmitIgnoredExpr(IncExpr);
PostIncGen(*this);
BreakContinueStack.pop_back();
EmitBranch(CondBlock);
LoopStack.pop();
// Emit the fall-through block.
EmitBlock(LoopExit.getBlock());
}
bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
if (!HaveInsertPoint())
return false;
// Emit inits for the linear variables.
bool HasLinears = false;
for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
for (const Expr *Init : C->inits()) {
HasLinears = true;
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
if (const auto *Ref =
dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
CapturedStmtInfo->lookup(OrigVD) != nullptr,
VD->getInit()->getType(), VK_LValue,
VD->getInit()->getExprLoc());
EmitExprAsInit(
&DRE, VD,
MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
/*capturedByInit=*/false);
EmitAutoVarCleanups(Emission);
} else {
EmitVarDecl(*VD);
}
}
// Emit the linear steps for the linear clauses.
// If a step is not constant, it is pre-calculated before the loop.
if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
// Emit calculation of the linear step.
EmitIgnoredExpr(CS);
}
}
return HasLinears;
}
void CodeGenFunction::EmitOMPLinearClauseFinal(
const OMPLoopDirective &D,
const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
if (!HaveInsertPoint())
return;
llvm::BasicBlock *DoneBB = nullptr;
// Emit the final values of the linear variables.
for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
auto IC = C->varlist_begin();
for (const Expr *F : C->finals()) {
if (!DoneBB) {
if (llvm::Value *Cond = CondGen(*this)) {
// If the first post-update expression is found, emit conditional
// block if it was requested.
llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
DoneBB = createBasicBlock(".omp.linear.pu.done");
Builder.CreateCondBr(Cond, ThenBB, DoneBB);
EmitBlock(ThenBB);
}
}
const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
CapturedStmtInfo->lookup(OrigVD) != nullptr,
(*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
Address OrigAddr = EmitLValue(&DRE).getAddress(*this);
CodeGenFunction::OMPPrivateScope VarScope(*this);
VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
(void)VarScope.Privatize();
EmitIgnoredExpr(F);
++IC;
}
if (const Expr *PostUpdate = C->getPostUpdateExpr())
EmitIgnoredExpr(PostUpdate);
}
if (DoneBB)
EmitBlock(DoneBB, /*IsFinished=*/true);
}
static void emitAlignedClause(CodeGenFunction &CGF,
const OMPExecutableDirective &D) {
if (!CGF.HaveInsertPoint())
return;
for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
llvm::APInt ClauseAlignment(64, 0);
if (const Expr *AlignmentExpr = Clause->getAlignment()) {
auto *AlignmentCI =
cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
ClauseAlignment = AlignmentCI->getValue();
}
for (const Expr *E : Clause->varlists()) {
llvm::APInt Alignment(ClauseAlignment);
if (Alignment == 0) {
// OpenMP [2.8.1, Description]
// If no optional parameter is specified, implementation-defined default
// alignments for SIMD instructions on the target platforms are assumed.
Alignment =
CGF.getContext()
.toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
E->getType()->getPointeeType()))
.getQuantity();
}
assert((Alignment == 0 || Alignment.isPowerOf2()) &&
"alignment is not power of 2");
if (Alignment != 0) {
llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
CGF.emitAlignmentAssumption(
PtrValue, E, /*No second loc needed*/ SourceLocation(),
llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
}
}
}
}
void CodeGenFunction::EmitOMPPrivateLoopCounters(
const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
if (!HaveInsertPoint())
return;
auto I = S.private_counters().begin();
for (const Expr *E : S.counters()) {
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
// Emit var without initialization.
AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
EmitAutoVarCleanups(VarEmission);
LocalDeclMap.erase(PrivateVD);
(void)LoopScope.addPrivate(
VD, [&VarEmission]() { return VarEmission.getAllocatedAddress(); });
if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
VD->hasGlobalStorage()) {
(void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() {
DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
E->getType(), VK_LValue, E->getExprLoc());
return EmitLValue(&DRE).getAddress(*this);
});
} else {
(void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() {
return VarEmission.getAllocatedAddress();
});
}
++I;
}
// Privatize extra loop counters used in loops for ordered(n) clauses.
for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
if (!C->getNumForLoops())
continue;
for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
I < E; ++I) {
const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
const auto *VD = cast<VarDecl>(DRE->getDecl());
// Override only those variables that can be captured to avoid re-emission
// of the variables declared within the loops.
if (DRE->refersToEnclosingVariableOrCapture()) {
(void)LoopScope.addPrivate(VD, [this, DRE, VD]() {
return CreateMemTemp(DRE->getType(), VD->getName());
});
}
}
}
}
static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
const Expr *Cond, llvm::BasicBlock *TrueBlock,
llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
if (!CGF.HaveInsertPoint())
return;
{
CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
(void)PreCondScope.Privatize();
// Get initial values of real counters.
for (const Expr *I : S.inits()) {
CGF.EmitIgnoredExpr(I);
}
}
// Create temp loop control variables with their init values to support
// non-rectangular loops.
CodeGenFunction::OMPMapVars PreCondVars;
for (const Expr *E : S.dependent_counters()) {
if (!E)
continue;
assert(!E->getType().getNonReferenceType()->isRecordType() &&
"dependent counter must not be an iterator.");
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
Address CounterAddr =
CGF.CreateMemTemp(VD->getType().getNonReferenceType());
(void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
}
(void)PreCondVars.apply(CGF);
for (const Expr *E : S.dependent_inits()) {
if (!E)
continue;
CGF.EmitIgnoredExpr(E);
}
// Check that loop is executed at least one time.
CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
PreCondVars.restore(CGF);
}
void CodeGenFunction::EmitOMPLinearClause(
const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
if (!HaveInsertPoint())
return;
llvm::DenseSet<const VarDecl *> SIMDLCVs;
if (isOpenMPSimdDirective(D.getDirectiveKind())) {
const auto *LoopDirective = cast<OMPLoopDirective>(&D);
for (const Expr *C : LoopDirective->counters()) {
SIMDLCVs.insert(
cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
}
}
for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
auto CurPrivate = C->privates().begin();
for (const Expr *E : C->varlists()) {
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
const auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() {
// Emit private VarDecl with copy init.
EmitVarDecl(*PrivateVD);
return GetAddrOfLocalVar(PrivateVD);
});
assert(IsRegistered && "linear var already registered as private");
// Silence the warning about unused variable.
(void)IsRegistered;
} else {
EmitVarDecl(*PrivateVD);
}
++CurPrivate;
}
}
}
static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
const OMPExecutableDirective &D) {
if (!CGF.HaveInsertPoint())
return;
if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
/*ignoreResult=*/true);
auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
// In presence of finite 'safelen', it may be unsafe to mark all
// the memory instructions parallel, because loop-carried
// dependences of 'safelen' iterations are possible.
CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
} else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
/*ignoreResult=*/true);
auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
// In presence of finite 'safelen', it may be unsafe to mark all
// the memory instructions parallel, because loop-carried
// dependences of 'safelen' iterations are possible.
CGF.LoopStack.setParallel(/*Enable=*/false);
}
}
void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
// Walk clauses and process safelen/lastprivate.
LoopStack.setParallel(/*Enable=*/true);
LoopStack.setVectorizeEnable();
emitSimdlenSafelenClause(*this, D);
if (const auto *C = D.getSingleClause<OMPOrderClause>())
if (C->getKind() == OMPC_ORDER_concurrent)
LoopStack.setParallel(/*Enable=*/true);
if ((D.getDirectiveKind() == OMPD_simd ||
(getLangOpts().OpenMPSimd &&
isOpenMPSimdDirective(D.getDirectiveKind()))) &&
llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
[](const OMPReductionClause *C) {
return C->getModifier() == OMPC_REDUCTION_inscan;
}))
// Disable parallel access in case of prefix sum.
LoopStack.setParallel(/*Enable=*/false);
}
void CodeGenFunction::EmitOMPSimdFinal(
const OMPLoopDirective &D,
const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
if (!HaveInsertPoint())
return;
llvm::BasicBlock *DoneBB = nullptr;
auto IC = D.counters().begin();
auto IPC = D.private_counters().begin();
for (const Expr *F : D.finals()) {
const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
OrigVD->hasGlobalStorage() || CED) {
if (!DoneBB) {
if (llvm::Value *Cond = CondGen(*this)) {
// If the first post-update expression is found, emit conditional
// block if it was requested.
llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
DoneBB = createBasicBlock(".omp.final.done");
Builder.CreateCondBr(Cond, ThenBB, DoneBB);
EmitBlock(ThenBB);
}
}
Address OrigAddr = Address::invalid();
if (CED) {
OrigAddr =
EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this);
} else {
DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
/*RefersToEnclosingVariableOrCapture=*/false,
(*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
OrigAddr = EmitLValue(&DRE).getAddress(*this);
}
OMPPrivateScope VarScope(*this);
VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
(void)VarScope.Privatize();
EmitIgnoredExpr(F);
}
++IC;
++IPC;
}
if (DoneBB)
EmitBlock(DoneBB, /*IsFinished=*/true);
}
static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
const OMPLoopDirective &S,
CodeGenFunction::JumpDest LoopExit) {
CGF.EmitOMPLoopBody(S, LoopExit);
CGF.EmitStopPoint(&S);
}
/// Emit a helper variable and return corresponding lvalue.
static LValue EmitOMPHelperVar(CodeGenFunction &CGF,