[OpenMP] Added codegen for masked directive
Reviewed By: ABataev
Differential Revision: https://reviews.llvm.org/D100514
GitOrigin-RevId: e0c2125d1d1e72039b8e071d468d9f740c7dbfbd
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index 3714698..18ad506 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -2279,6 +2279,35 @@
Action.Done(CGF);
}
+void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
+ const RegionCodeGenTy &MaskedOpGen,
+ SourceLocation Loc, const Expr *Filter) {
+ if (!CGF.HaveInsertPoint())
+ return;
+ // if(__kmpc_masked(ident_t *, gtid, filter)) {
+ // MaskedOpGen();
+ // __kmpc_end_masked(iden_t *, gtid);
+ // }
+ // Prepare arguments and build a call to __kmpc_masked
+ llvm::Value *FilterVal = Filter
+ ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
+ : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
+ llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
+ FilterVal};
+ llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
+ getThreadID(CGF, Loc)};
+ CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_masked),
+ Args,
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_end_masked),
+ ArgsEnd,
+ /*Conditional=*/true);
+ MaskedOpGen.setAction(Action);
+ emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
+ Action.Done(CGF);
+}
+
void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
SourceLocation Loc) {
if (!CGF.HaveInsertPoint())
@@ -6232,7 +6261,8 @@
return;
InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
InnerKind != OMPD_critical &&
- InnerKind != OMPD_master);
+ InnerKind != OMPD_master &&
+ InnerKind != OMPD_masked);
CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
}
@@ -12596,6 +12626,13 @@
llvm_unreachable("Not supported in SIMD-only mode");
}
+void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
+ const RegionCodeGenTy &MasterOpGen,
+ SourceLocation Loc,
+ const Expr *Filter) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
SourceLocation Loc) {
llvm_unreachable("Not supported in SIMD-only mode");
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
index 541904a..c35202a 100644
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -1012,6 +1012,14 @@
const RegionCodeGenTy &MasterOpGen,
SourceLocation Loc);
+ /// Emits a masked region.
+ /// \param MaskedOpGen Generator for the statement associated with the given
+ /// masked region.
+ virtual void emitMaskedRegion(CodeGenFunction &CGF,
+ const RegionCodeGenTy &MaskedOpGen,
+ SourceLocation Loc,
+ const Expr *Filter = nullptr);
+
/// Emits code for a taskyield directive.
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc);
@@ -1984,6 +1992,17 @@
const RegionCodeGenTy &MasterOpGen,
SourceLocation Loc) override;
+ /// Emits a masked region.
+ /// \param MaskedOpGen Generator for the statement associated with the given
+ /// masked region.
+ void emitMaskedRegion(CodeGenFunction &CGF,
+ const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc,
+ const Expr *Filter = nullptr) override;
+
+ /// Emits a masked region.
+ /// \param MaskedOpGen Generator for the statement associated with the given
+ /// masked region.
+
/// Emits code for a taskyield directive.
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override;
diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp
index daecb44..c2b64c8 100644
--- a/lib/CodeGen/CGStmt.cpp
+++ b/lib/CodeGen/CGStmt.cpp
@@ -382,7 +382,7 @@
llvm_unreachable("Dispatch directive not supported yet.");
break;
case Stmt::OMPMaskedDirectiveClass:
- llvm_unreachable("Masked directive not supported yet.");
+ EmitOMPMaskedDirective(cast<OMPMaskedDirective>(*S));
break;
}
}
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp
index 4b93fdd..827102b 100644
--- a/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -3844,6 +3844,55 @@
emitMaster(*this, S);
}
+static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
+ CGF.EmitStmt(S.getRawStmt());
+ };
+ Expr *Filter = nullptr;
+ if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
+ Filter = FilterClause->getThreadID();
+ CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(),
+ Filter);
+}
+
+void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
+ if (CGM.getLangOpts().OpenMPIRBuilder) {
+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+
+ const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
+ const Expr *Filter = nullptr;
+ if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
+ Filter = FilterClause->getThreadID();
+ llvm::Value *FilterVal = Filter
+ ? EmitScalarExpr(Filter, CGM.Int32Ty)
+ : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
+
+ auto FiniCB = [this](InsertPointTy IP) {
+ OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+ };
+
+ auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP,
+ llvm::BasicBlock &FiniBB) {
+ OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
+ OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MaskedRegionBodyStmt,
+ CodeGenIP, FiniBB);
+ };
+
+ LexicalScope Scope(*this, S.getSourceRange());
+ EmitStopPoint(&S);
+ Builder.restoreIP(
+ OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal));
+
+ return;
+ }
+ LexicalScope Scope(*this, S.getSourceRange());
+ EmitStopPoint(&S);
+ emitMasked(*this, S);
+}
+
void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
if (CGM.getLangOpts().OpenMPIRBuilder) {
llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
@@ -6930,7 +6979,8 @@
if (D.getDirectiveKind() == OMPD_atomic ||
D.getDirectiveKind() == OMPD_critical ||
D.getDirectiveKind() == OMPD_section ||
- D.getDirectiveKind() == OMPD_master) {
+ D.getDirectiveKind() == OMPD_master ||
+ D.getDirectiveKind() == OMPD_masked) {
EmitStmt(D.getAssociatedStmt());
} else {
auto LPCRegion =
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index ceb161f..0b3da9e 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -3421,6 +3421,7 @@
void EmitOMPSectionDirective(const OMPSectionDirective &S);
void EmitOMPSingleDirective(const OMPSingleDirective &S);
void EmitOMPMasterDirective(const OMPMasterDirective &S);
+ void EmitOMPMaskedDirective(const OMPMaskedDirective &S);
void EmitOMPCriticalDirective(const OMPCriticalDirective &S);
void EmitOMPParallelForDirective(const OMPParallelForDirective &S);
void EmitOMPParallelForSimdDirective(const OMPParallelForSimdDirective &S);
diff --git a/test/OpenMP/masked_codegen.cpp b/test/OpenMP/masked_codegen.cpp
new file mode 100644
index 0000000..97cb037
--- /dev/null
+++ b/test/OpenMP/masked_codegen.cpp
@@ -0,0 +1,143 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=ALL,NORMAL
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=ALL,NORMAL
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=51 -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=ALL,IRBUILDER
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=ALL,IRBUILDER
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=51 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fopenmp-version=51 -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// ALL: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
+
+// ALL: define {{.*}}void [[FOO:@.+]]()
+
+void foo() { extern void mayThrow(); mayThrow(); }
+
+// ALL-LABEL: @main
+// TERM_DEBUG-LABEL: @main
+int main() {
+ // ALL: [[A_ADDR:%.+]] = alloca i8
+ char a;
+
+// ALL: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
+// ALL: [[RES:%.+]] = call {{.*}}i32 @__kmpc_masked([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 0)
+// ALL-NEXT: [[IS_MASKED:%.+]] = icmp ne i32 [[RES]], 0
+// ALL-NEXT: br i1 [[IS_MASKED]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// ALL: [[THEN]]
+// ALL-NEXT: store i8 2, i8* [[A_ADDR]]
+// ALL-NEXT: call {{.*}}void @__kmpc_end_masked([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// ALL-NEXT: br label {{%?}}[[EXIT]]
+// ALL: [[EXIT]]
+#pragma omp masked
+ a = 2;
+// IRBUILDER: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
+// ALL: [[RES:%.+]] = call {{.*}}i32 @__kmpc_masked([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 2)
+// ALL-NEXT: [[IS_MASKED:%.+]] = icmp ne i32 [[RES]], 0
+// ALL-NEXT: br i1 [[IS_MASKED]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// ALL: [[THEN]]
+// IRBUILDER-NEXT: call {{.*}}void [[FOO]]()
+// NORMAL-NEXT: invoke {{.*}}void [[FOO]]()
+// ALL: call {{.*}}void @__kmpc_end_masked([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// ALL-NEXT: br label {{%?}}[[EXIT]]
+// ALL: [[EXIT]]
+#pragma omp masked filter(2)
+ foo();
+// ALL: store i32 9, i32* [[X:.+]],
+// ALL: [[X_VAL:%.+]] = load i32, i32* [[X]]
+// IRBUILDER: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
+// ALL: [[RES:%.+]] = call {{.*}}i32 @__kmpc_masked([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 [[X_VAL]])
+// ALL-NEXT: [[IS_MASKED:%.+]] = icmp ne i32 [[RES]], 0
+// ALL-NEXT: br i1 [[IS_MASKED]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// ALL: [[THEN]]
+// IRBUILDER-NEXT: call {{.*}}void [[FOO]]()
+// NORMAL-NEXT: invoke {{.*}}void [[FOO]]()
+// ALL: call {{.*}}void @__kmpc_end_masked([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// ALL-NEXT: br label {{%?}}[[EXIT]]
+// ALL: [[EXIT]]
+ int x = 9;
+#pragma omp masked filter(x)
+ foo();
+ // ALL-NOT: call i32 @__kmpc_masked
+ // ALL-NOT: call void @__kmpc_end_masked
+ return a;
+}
+
+// ALL-LABEL: lambda_masked
+// TERM_DEBUG-LABEL: lambda_masked
+void lambda_masked(int a, int b) {
+ auto l = [=]() {
+#pragma omp masked
+ {
+ // ALL: call i32 @__kmpc_masked(
+ int c = a + b;
+ }
+ };
+
+ l();
+
+ auto l1 = [=]() {
+#pragma omp parallel
+#pragma omp masked filter(1)
+ {
+ // ALL: call i32 @__kmpc_masked(
+ int c = a + b;
+ }
+ };
+
+ l1();
+
+ int y = 1;
+ auto l2 = [=](int yy) {
+#pragma omp parallel
+#pragma omp masked filter(yy)
+ {
+ // ALL: call i32 @__kmpc_masked(
+ int c = a + b;
+ }
+ };
+
+ l2(y);
+}
+
+// ALL-LABEL: parallel_masked
+// TERM_DEBUG-LABEL: parallel_masked
+void parallel_masked() {
+#pragma omp parallel
+#pragma omp masked filter(1)
+ // TERM_DEBUG-NOT: __kmpc_global_thread_num
+ // TERM_DEBUG: call i32 @__kmpc_masked({{.+}}), !dbg [[DBG_LOC_START:![0-9]+]]
+ // TERM_DEBUG: invoke void {{.*}}foo{{.*}}()
+ // TERM_DEBUG: unwind label %[[TERM_LPAD:.+]],
+ // TERM_DEBUG-NOT: __kmpc_global_thread_num
+ // TERM_DEBUG: call void @__kmpc_end_masked({{.+}}), !dbg [[DBG_LOC_END:![0-9]+]]
+ // TERM_DEBUG: [[TERM_LPAD]]
+ // TERM_DEBUG: call void @__clang_call_terminate
+ // TERM_DEBUG: unreachable
+ foo();
+
+ int x;
+#pragma omp parallel
+#pragma omp masked filter(x)
+ // TERM_DEBUG-NOT: __kmpc_global_thread_num
+ // TERM_DEBUG: call i32 @__kmpc_masked({{.+}}), !dbg [[DBG_LOC_START:![0-9]+]]
+ // TERM_DEBUG: invoke void {{.*}}foo{{.*}}()
+ // TERM_DEBUG: unwind label %[[TERM_LPAD:.+]],
+ // TERM_DEBUG-NOT: __kmpc_global_thread_num
+ // TERM_DEBUG: call void @__kmpc_end_masked({{.+}}), !dbg [[DBG_LOC_END:![0-9]+]]
+ // TERM_DEBUG: [[TERM_LPAD]]
+ // TERM_DEBUG: call void @__clang_call_terminate
+ // TERM_DEBUG: unreachable
+ foo();
+}
+// TERM_DEBUG-DAG: [[DBG_LOC_START]] = !DILocation(line: [[@LINE-12]],
+// TERM_DEBUG-DAG: [[DBG_LOC_END]] = !DILocation(line: [[@LINE-3]],
+
+#endif