[OpenMP] Added codegen for masked directive

Reviewed By: ABataev

Differential Revision: https://reviews.llvm.org/D100514

GitOrigin-RevId: e0c2125d1d1e72039b8e071d468d9f740c7dbfbd
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index 3714698..18ad506 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -2279,6 +2279,35 @@
   Action.Done(CGF);
 }
 
+void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
+                                       const RegionCodeGenTy &MaskedOpGen,
+                                       SourceLocation Loc, const Expr *Filter) {
+  if (!CGF.HaveInsertPoint())
+    return;
+  // if(__kmpc_masked(ident_t *, gtid, filter)) {
+  //   MaskedOpGen();
+  //   __kmpc_end_masked(iden_t *, gtid);
+  // }
+  // Prepare arguments and build a call to __kmpc_masked
+  llvm::Value *FilterVal = Filter
+                               ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
+                               : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
+  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
+                         FilterVal};
+  llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
+                            getThreadID(CGF, Loc)};
+  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
+                            CGM.getModule(), OMPRTL___kmpc_masked),
+                        Args,
+                        OMPBuilder.getOrCreateRuntimeFunction(
+                            CGM.getModule(), OMPRTL___kmpc_end_masked),
+                        ArgsEnd,
+                        /*Conditional=*/true);
+  MaskedOpGen.setAction(Action);
+  emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
+  Action.Done(CGF);
+}
+
 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
                                         SourceLocation Loc) {
   if (!CGF.HaveInsertPoint())
@@ -6232,7 +6261,8 @@
     return;
   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
                                  InnerKind != OMPD_critical &&
-                                     InnerKind != OMPD_master);
+                                     InnerKind != OMPD_master &&
+                                     InnerKind != OMPD_masked);
   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
 }
 
@@ -12596,6 +12626,13 @@
   llvm_unreachable("Not supported in SIMD-only mode");
 }
 
+void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
+                                           const RegionCodeGenTy &MasterOpGen,
+                                           SourceLocation Loc,
+                                           const Expr *Filter) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
                                             SourceLocation Loc) {
   llvm_unreachable("Not supported in SIMD-only mode");
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
index 541904a..c35202a 100644
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -1012,6 +1012,14 @@
                                 const RegionCodeGenTy &MasterOpGen,
                                 SourceLocation Loc);
 
+  /// Emits a masked region.
+  /// \param MaskedOpGen Generator for the statement associated with the given
+  /// masked region.
+  virtual void emitMaskedRegion(CodeGenFunction &CGF,
+                                const RegionCodeGenTy &MaskedOpGen,
+                                SourceLocation Loc,
+                                const Expr *Filter = nullptr);
+
   /// Emits code for a taskyield directive.
   virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc);
 
@@ -1984,6 +1992,17 @@
                         const RegionCodeGenTy &MasterOpGen,
                         SourceLocation Loc) override;
 
+  /// Emits a masked region.
+  /// \param MaskedOpGen Generator for the statement associated with the given
+  /// masked region.
+  void emitMaskedRegion(CodeGenFunction &CGF,
+                        const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc,
+                        const Expr *Filter = nullptr) override;
+
+  /// Emits a masked region.
+  /// \param MaskedOpGen Generator for the statement associated with the given
+  /// masked region.
+
   /// Emits code for a taskyield directive.
   void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override;
 
diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp
index daecb44..c2b64c8 100644
--- a/lib/CodeGen/CGStmt.cpp
+++ b/lib/CodeGen/CGStmt.cpp
@@ -382,7 +382,7 @@
     llvm_unreachable("Dispatch directive not supported yet.");
     break;
   case Stmt::OMPMaskedDirectiveClass:
-    llvm_unreachable("Masked directive not supported yet.");
+    EmitOMPMaskedDirective(cast<OMPMaskedDirective>(*S));
     break;
   }
 }
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp
index 4b93fdd..827102b 100644
--- a/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -3844,6 +3844,55 @@
   emitMaster(*this, S);
 }
 
+static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    Action.Enter(CGF);
+    CGF.EmitStmt(S.getRawStmt());
+  };
+  Expr *Filter = nullptr;
+  if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
+    Filter = FilterClause->getThreadID();
+  CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(),
+                                              Filter);
+}
+
+void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
+  if (CGM.getLangOpts().OpenMPIRBuilder) {
+    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+
+    const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
+    const Expr *Filter = nullptr;
+    if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
+      Filter = FilterClause->getThreadID();
+    llvm::Value *FilterVal = Filter
+                                 ? EmitScalarExpr(Filter, CGM.Int32Ty)
+                                 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
+
+    auto FiniCB = [this](InsertPointTy IP) {
+      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+    };
+
+    auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
+                                                  InsertPointTy CodeGenIP,
+                                                  llvm::BasicBlock &FiniBB) {
+      OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
+      OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MaskedRegionBodyStmt,
+                                             CodeGenIP, FiniBB);
+    };
+
+    LexicalScope Scope(*this, S.getSourceRange());
+    EmitStopPoint(&S);
+    Builder.restoreIP(
+        OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal));
+
+    return;
+  }
+  LexicalScope Scope(*this, S.getSourceRange());
+  EmitStopPoint(&S);
+  emitMasked(*this, S);
+}
+
 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
   if (CGM.getLangOpts().OpenMPIRBuilder) {
     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
@@ -6930,7 +6979,8 @@
   if (D.getDirectiveKind() == OMPD_atomic ||
       D.getDirectiveKind() == OMPD_critical ||
       D.getDirectiveKind() == OMPD_section ||
-      D.getDirectiveKind() == OMPD_master) {
+      D.getDirectiveKind() == OMPD_master ||
+      D.getDirectiveKind() == OMPD_masked) {
     EmitStmt(D.getAssociatedStmt());
   } else {
     auto LPCRegion =
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index ceb161f..0b3da9e 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -3421,6 +3421,7 @@
   void EmitOMPSectionDirective(const OMPSectionDirective &S);
   void EmitOMPSingleDirective(const OMPSingleDirective &S);
   void EmitOMPMasterDirective(const OMPMasterDirective &S);
+  void EmitOMPMaskedDirective(const OMPMaskedDirective &S);
   void EmitOMPCriticalDirective(const OMPCriticalDirective &S);
   void EmitOMPParallelForDirective(const OMPParallelForDirective &S);
   void EmitOMPParallelForSimdDirective(const OMPParallelForSimdDirective &S);
diff --git a/test/OpenMP/masked_codegen.cpp b/test/OpenMP/masked_codegen.cpp
new file mode 100644
index 0000000..97cb037
--- /dev/null
+++ b/test/OpenMP/masked_codegen.cpp
@@ -0,0 +1,143 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=ALL,NORMAL
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=ALL,NORMAL
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=51 -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=ALL,IRBUILDER
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=ALL,IRBUILDER
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=51 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fopenmp-version=51 -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// ALL:       [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
+
+// ALL:       define {{.*}}void [[FOO:@.+]]()
+
+void foo() { extern void mayThrow(); mayThrow(); }
+
+// ALL-LABEL: @main
+// TERM_DEBUG-LABEL: @main
+int main() {
+  // ALL:      			[[A_ADDR:%.+]] = alloca i8
+  char a;
+
+// ALL:       			[[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
+// ALL:       			[[RES:%.+]] = call {{.*}}i32 @__kmpc_masked([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 0)
+// ALL-NEXT:  			[[IS_MASKED:%.+]] = icmp ne i32 [[RES]], 0
+// ALL-NEXT:  			br i1 [[IS_MASKED]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// ALL:       			[[THEN]]
+// ALL-NEXT:  			store i8 2, i8* [[A_ADDR]]
+// ALL-NEXT:  			call {{.*}}void @__kmpc_end_masked([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// ALL-NEXT:  			br label {{%?}}[[EXIT]]
+// ALL:       			[[EXIT]]
+#pragma omp masked
+  a = 2;
+// IRBUILDER: 			[[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
+// ALL:       			[[RES:%.+]] = call {{.*}}i32 @__kmpc_masked([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 2)
+// ALL-NEXT:  			[[IS_MASKED:%.+]] = icmp ne i32 [[RES]], 0
+// ALL-NEXT:  			br i1 [[IS_MASKED]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// ALL:       			[[THEN]]
+// IRBUILDER-NEXT:  call {{.*}}void [[FOO]]()
+// NORMAL-NEXT:  		invoke {{.*}}void [[FOO]]()
+// ALL:       			call {{.*}}void @__kmpc_end_masked([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// ALL-NEXT:  			br label {{%?}}[[EXIT]]
+// ALL:       			[[EXIT]]
+#pragma omp masked filter(2)
+  foo();
+// ALL:                         store i32 9, i32* [[X:.+]],
+// ALL:                         [[X_VAL:%.+]] = load i32, i32* [[X]]
+// IRBUILDER: 			[[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
+// ALL:       			[[RES:%.+]] = call {{.*}}i32 @__kmpc_masked([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 [[X_VAL]])
+// ALL-NEXT:  			[[IS_MASKED:%.+]] = icmp ne i32 [[RES]], 0
+// ALL-NEXT:  			br i1 [[IS_MASKED]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// ALL:       			[[THEN]]
+// IRBUILDER-NEXT:  call {{.*}}void [[FOO]]()
+// NORMAL-NEXT:  		invoke {{.*}}void [[FOO]]()
+// ALL:       			call {{.*}}void @__kmpc_end_masked([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// ALL-NEXT:  			br label {{%?}}[[EXIT]]
+// ALL:       			[[EXIT]]
+  int x = 9;
+#pragma omp masked filter(x)
+  foo();
+  // ALL-NOT:   call i32 @__kmpc_masked
+  // ALL-NOT:   call void @__kmpc_end_masked
+  return a;
+}
+
+// ALL-LABEL:        lambda_masked
+// TERM_DEBUG-LABEL: lambda_masked
+void lambda_masked(int a, int b) {
+  auto l = [=]() {
+#pragma omp masked
+    {
+      // ALL: call i32 @__kmpc_masked(
+      int c = a + b;
+    }
+  };
+
+  l();
+
+  auto l1 = [=]() {
+#pragma omp parallel
+#pragma omp masked filter(1)
+    {
+      // ALL: call i32 @__kmpc_masked(
+      int c = a + b;
+    }
+  };
+
+  l1();
+
+  int y = 1;
+  auto l2 = [=](int yy) {
+#pragma omp parallel
+#pragma omp masked filter(yy)
+    {
+      // ALL: call i32 @__kmpc_masked(
+      int c = a + b;
+    }
+  };
+
+  l2(y);
+}
+
+// ALL-LABEL:      parallel_masked
+// TERM_DEBUG-LABEL: parallel_masked
+void parallel_masked() {
+#pragma omp parallel
+#pragma omp masked filter(1)
+  // TERM_DEBUG-NOT: __kmpc_global_thread_num
+  // TERM_DEBUG:     call i32 @__kmpc_masked({{.+}}), !dbg [[DBG_LOC_START:![0-9]+]]
+  // TERM_DEBUG:     invoke void {{.*}}foo{{.*}}()
+  // TERM_DEBUG:     unwind label %[[TERM_LPAD:.+]],
+  // TERM_DEBUG-NOT: __kmpc_global_thread_num
+  // TERM_DEBUG:     call void @__kmpc_end_masked({{.+}}), !dbg [[DBG_LOC_END:![0-9]+]]
+  // TERM_DEBUG:     [[TERM_LPAD]]
+  // TERM_DEBUG:     call void @__clang_call_terminate
+  // TERM_DEBUG:     unreachable
+  foo();
+
+  int x;
+#pragma omp parallel
+#pragma omp masked filter(x)
+  // TERM_DEBUG-NOT: __kmpc_global_thread_num
+  // TERM_DEBUG:     call i32 @__kmpc_masked({{.+}}), !dbg [[DBG_LOC_START:![0-9]+]]
+  // TERM_DEBUG:     invoke void {{.*}}foo{{.*}}()
+  // TERM_DEBUG:     unwind label %[[TERM_LPAD:.+]],
+  // TERM_DEBUG-NOT: __kmpc_global_thread_num
+  // TERM_DEBUG:     call void @__kmpc_end_masked({{.+}}), !dbg [[DBG_LOC_END:![0-9]+]]
+  // TERM_DEBUG:     [[TERM_LPAD]]
+  // TERM_DEBUG:     call void @__clang_call_terminate
+  // TERM_DEBUG:     unreachable
+  foo();
+}
+// TERM_DEBUG-DAG: [[DBG_LOC_START]] = !DILocation(line: [[@LINE-12]],
+// TERM_DEBUG-DAG: [[DBG_LOC_END]] = !DILocation(line: [[@LINE-3]],
+
+#endif