[OPENMP50]Support for declare variant directive for NVPTX target.

NVPTX does not support global aliases. Instead, we have to copy the full
body of the variant function for the original function.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@374387 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index 2ad6d01..7626f7a 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1264,9 +1264,10 @@
   loadOffloadInfoMetadata();
 }
 
-static bool tryEmitAlias(CodeGenModule &CGM, const GlobalDecl &NewGD,
-                         const GlobalDecl &OldGD, llvm::GlobalValue *OrigAddr,
-                         bool IsForDefinition) {
+bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD,
+                                            const GlobalDecl &OldGD,
+                                            llvm::GlobalValue *OrigAddr,
+                                            bool IsForDefinition) {
   // Emit at least a definition for the aliasee if the the address of the
   // original function is requested.
   if (IsForDefinition || OrigAddr)
@@ -1327,8 +1328,8 @@
     StringRef MangledName = CGM.getMangledName(Pair.second.second);
     llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName);
     // If not able to emit alias, just emit original declaration.
-    (void)tryEmitAlias(CGM, Pair.second.first, Pair.second.second, Addr,
-                       /*IsForDefinition=*/false);
+    (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr,
+                                /*IsForDefinition=*/false);
   }
 }
 
@@ -11273,7 +11274,7 @@
   if (NewFD == D)
     return false;
   GlobalDecl NewGD = GD.getWithDecl(NewFD);
-  if (tryEmitAlias(CGM, NewGD, GD, Orig, IsForDefinition)) {
+  if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) {
     DeferredVariantFunction.erase(D);
     return true;
   }
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
index b8137a2..9215bd6 100644
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -291,6 +291,17 @@
   /// default location.
   virtual unsigned getDefaultLocationReserved2Flags() const { return 0; }
 
+  /// Tries to emit declare variant function for \p OldGD from \p NewGD.
+  /// \param OrigAddr LLVM IR value for \p OldGD.
+  /// \param IsForDefinition true, if requested emission for the definition of
+  /// \p OldGD.
+  /// \returns true, was able to emit a definition function for \p OldGD, which
+  /// points to \p NewGD.
+  virtual bool tryEmitDeclareVariant(const GlobalDecl &NewGD,
+                                     const GlobalDecl &OldGD,
+                                     llvm::GlobalValue *OrigAddr,
+                                     bool IsForDefinition);
+
   /// Returns default flags for the barriers depending on the directive, for
   /// which this barier is going to be emitted.
   static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind);
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 9e70a5a..83f74fe 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -1895,6 +1895,19 @@
   llvm_unreachable("Unknown flags are requested.");
 }
 
+bool CGOpenMPRuntimeNVPTX::tryEmitDeclareVariant(const GlobalDecl &NewGD,
+                                                 const GlobalDecl &OldGD,
+                                                 llvm::GlobalValue *OrigAddr,
+                                                 bool IsForDefinition) {
+  // Emit the function in OldGD with the body from NewGD, if NewGD is defined.
+  auto *NewFD = cast<FunctionDecl>(NewGD.getDecl());
+  if (NewFD->isDefined()) {
+    CGM.emitOpenMPDeviceFunctionRedefinition(OldGD, NewGD, OrigAddr);
+    return true;
+  }
+  return false;
+}
+
 CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
     : CGOpenMPRuntime(CGM, "_", "$") {
   if (!CGM.getLangOpts().OpenMPIsDevice)
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index e7fd458..0f78627 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -193,6 +193,18 @@
   /// Full/Lightweight runtime mode. Used for better optimization.
   unsigned getDefaultLocationReserved2Flags() const override;
 
+  /// Tries to emit declare variant function for \p OldGD from \p NewGD.
+  /// \param OrigAddr LLVM IR value for \p OldGD.
+  /// \param IsForDefinition true, if requested emission for the definition of
+  /// \p OldGD.
+  /// \returns true, was able to emit a definition function for \p OldGD, which
+  /// points to \p NewGD.
+  /// NVPTX backend does not support global aliases, so just use the function,
+  /// emitted for \p NewGD instead of \p OldGD.
+  bool tryEmitDeclareVariant(const GlobalDecl &NewGD, const GlobalDecl &OldGD,
+                             llvm::GlobalValue *OrigAddr,
+                             bool IsForDefinition) override;
+
 public:
   explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
   void clear() override;
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index 080914a..eab48cc 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -2742,6 +2742,50 @@
     EmitGlobalFunctionDefinition(GD, GV);
 }
 
+void CodeGenModule::emitOpenMPDeviceFunctionRedefinition(
+    GlobalDecl OldGD, GlobalDecl NewGD, llvm::GlobalValue *GV) {
+  assert(getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice &&
+         OpenMPRuntime && "Expected OpenMP device mode.");
+  const auto *D = cast<FunctionDecl>(OldGD.getDecl());
+
+  // Compute the function info and LLVM type.
+  const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(OldGD);
+  llvm::FunctionType *Ty = getTypes().GetFunctionType(FI);
+
+  // Get or create the prototype for the function.
+  if (!GV || (GV->getType()->getElementType() != Ty)) {
+    GV = cast<llvm::GlobalValue>(GetOrCreateLLVMFunction(
+        getMangledName(OldGD), Ty, GlobalDecl(), /*ForVTable=*/false,
+        /*DontDefer=*/true, /*IsThunk=*/false, llvm::AttributeList(),
+        ForDefinition));
+    SetFunctionAttributes(OldGD, cast<llvm::Function>(GV),
+                          /*IsIncompleteFunction=*/false,
+                          /*IsThunk=*/false);
+  }
+  // We need to set linkage and visibility on the function before
+  // generating code for it because various parts of IR generation
+  // want to propagate this information down (e.g. to local static
+  // declarations).
+  auto *Fn = cast<llvm::Function>(GV);
+  setFunctionLinkage(OldGD, Fn);
+
+  // FIXME: this is redundant with part of
+  // setFunctionDefinitionAttributes
+  setGVProperties(Fn, OldGD);
+
+  MaybeHandleStaticInExternC(D, Fn);
+
+  maybeSetTrivialComdat(*D, *Fn);
+
+  CodeGenFunction(*this).GenerateCode(NewGD, Fn, FI);
+
+  setNonAliasAttributes(OldGD, Fn);
+  SetLLVMFunctionAttributesForDefinition(D, Fn);
+
+  if (D->hasAttr<AnnotateAttr>())
+    AddGlobalAnnotations(D, Fn);
+}
+
 void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
   const auto *D = cast<ValueDecl>(GD.getDecl());
 
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
index 95964af..597b8d7 100644
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -1270,6 +1270,11 @@
   /// \param D Requires declaration
   void EmitOMPRequiresDecl(const OMPRequiresDecl *D);
 
+  /// Emits the definition of \p OldGD function with body from \p NewGD.
+  /// Required for proper handling of declare variant directive on the GPU.
+  void emitOpenMPDeviceFunctionRedefinition(GlobalDecl OldGD, GlobalDecl NewGD,
+                                            llvm::GlobalValue *GV);
+
   /// Returns whether the given record has hidden LTO visibility and therefore
   /// may participate in (single-module) CFI and whole-program vtable
   /// optimization.
diff --git a/test/OpenMP/nvptx_declare_variant_implementation_vendor_codegen.cpp b/test/OpenMP/nvptx_declare_variant_implementation_vendor_codegen.cpp
new file mode 100644
index 0000000..04870f0
--- /dev/null
+++ b/test/OpenMP/nvptx_declare_variant_implementation_vendor_codegen.cpp
@@ -0,0 +1,158 @@
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=50
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -fopenmp-version=50 | FileCheck %s --implicit-check-not='ret i32 {{1|81|84}}'
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t -fopenmp-version=50
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - -fopenmp-version=50 | FileCheck %s --implicit-check-not='ret i32 {{1|81|84}}'
+// expected-no-diagnostics
+
+// CHECK-NOT: ret i32 {{1|81|84}}
+// CHECK-DAG: define {{.*}}i32 @_Z3barv()
+// CHECK-DAG: define {{.*}}i32 @_ZN16SpecSpecialFuncs6MethodEv(%struct.SpecSpecialFuncs* %{{.+}})
+// CHECK-DAG: define {{.*}}i32 @_ZN12SpecialFuncs6MethodEv(%struct.SpecialFuncs* %{{.+}})
+// CHECK-DAG: define linkonce_odr {{.*}}i32 @_ZN16SpecSpecialFuncs6methodEv(%struct.SpecSpecialFuncs* %{{.+}})
+// CHECK-DAG: define linkonce_odr {{.*}}i32 @_ZN12SpecialFuncs6methodEv(%struct.SpecialFuncs* %{{.+}})
+// CHECK-DAG: define {{.*}}i32 @_Z5prio_v()
+// CHECK-DAG: define internal i32 @_ZL6prio1_v()
+// CHECK-DAG: define {{.*}}i32 @_Z4callv()
+// CHECK-DAG: define internal i32 @_ZL9stat_usedv()
+// CHECK-DAG: define {{.*}}i32 @fn_linkage()
+// CHECK-DAG: define {{.*}}i32 @_Z11fn_linkage1v()
+
+// CHECK-DAG: ret i32 2
+// CHECK-DAG: ret i32 3
+// CHECK-DAG: ret i32 4
+// CHECK-DAG: ret i32 5
+// CHECK-DAG: ret i32 6
+// CHECK-DAG: ret i32 7
+// CHECK-DAG: ret i32 82
+// CHECK-DAG: ret i32 83
+// CHECK-DAG: ret i32 85
+// CHECK-DAG: ret i32 86
+// CHECK-DAG: ret i32 87
+
+// Outputs for function members
+// CHECK-DAG: ret i32 6
+// CHECK-DAG: ret i32 7
+// CHECK-NOT: ret i32 {{1|81|84}}
+
+#ifndef HEADER
+#define HEADER
+
+int foo() { return 2; }
+int bazzz();
+int test();
+static int stat_unused_();
+static int stat_used_();
+
+#pragma omp declare target
+
+#pragma omp declare variant(foo) match(implementation = {vendor(llvm)})
+int bar() { return 1; }
+
+#pragma omp declare variant(bazzz) match(implementation = {vendor(llvm)})
+int baz() { return 1; }
+
+#pragma omp declare variant(test) match(implementation = {vendor(llvm)})
+int call() { return 1; }
+
+#pragma omp declare variant(stat_unused_) match(implementation = {vendor(llvm)})
+static int stat_unused() { return 1; }
+
+#pragma omp declare variant(stat_used_) match(implementation = {vendor(llvm)})
+static int stat_used() { return 1; }
+
+#pragma omp end declare target
+
+int main() {
+  int res;
+#pragma omp target map(from \
+                       : res)
+  res = bar() + baz() + call();
+  return res;
+}
+
+int test() { return 3; }
+static int stat_unused_() { return 4; }
+static int stat_used_() { return 5; }
+
+#pragma omp declare target
+
+struct SpecialFuncs {
+  void vd() {}
+  SpecialFuncs();
+  ~SpecialFuncs();
+
+  int method_() { return 6; }
+#pragma omp declare variant(SpecialFuncs::method_) \
+    match(implementation = {vendor(llvm)})
+  int method() { return 1; }
+#pragma omp declare variant(SpecialFuncs::method_) \
+    match(implementation = {vendor(llvm)})
+  int Method();
+} s;
+
+int SpecialFuncs::Method() { return 1; }
+
+struct SpecSpecialFuncs {
+  void vd() {}
+  SpecSpecialFuncs();
+  ~SpecSpecialFuncs();
+
+  int method_();
+#pragma omp declare variant(SpecSpecialFuncs::method_) \
+    match(implementation = {vendor(llvm)})
+  int method() { return 1; }
+#pragma omp declare variant(SpecSpecialFuncs::method_) \
+    match(implementation = {vendor(llvm)})
+  int Method();
+} s1;
+
+#pragma omp end declare target
+
+int SpecSpecialFuncs::method_() { return 7; }
+int SpecSpecialFuncs::Method() { return 1; }
+
+int prio() { return 81; }
+int prio1() { return 82; }
+static int prio2() { return 83; }
+static int prio3() { return 84; }
+static int prio4() { return 84; }
+int fn_linkage_variant() { return 85; }
+extern "C" int fn_linkage_variant1() { return 86; }
+int fn_variant2() { return 1; }
+
+#pragma omp declare target
+
+void xxx() {
+  (void)s.method();
+  (void)s1.method();
+}
+
+#pragma omp declare variant(prio) match(implementation = {vendor(llvm)})
+#pragma omp declare variant(prio1) match(implementation = {vendor(score(1) \
+                                                                  : llvm)})
+int prio_() { return 1; }
+
+#pragma omp declare variant(prio4) match(implementation = {vendor(score(3) \
+                                                                  : llvm)})
+#pragma omp declare variant(prio2) match(implementation = {vendor(score(5) \
+                                                                  : llvm)})
+#pragma omp declare variant(prio3) match(implementation = {vendor(score(1) \
+                                                                  : llvm)})
+static int prio1_() { return 1; }
+
+int int_fn() { return prio1_(); }
+
+extern "C" {
+#pragma omp declare variant(fn_linkage_variant) match(implementation = {vendor(llvm)})
+int fn_linkage() { return 1; }
+}
+
+#pragma omp declare variant(fn_linkage_variant1) match(implementation = {vendor(llvm)})
+int fn_linkage1() { return 1; }
+
+#pragma omp declare variant(fn_variant2) match(implementation = {vendor(llvm, ibm)})
+int fn2() { return 87; }
+
+#pragma omp end declare target
+
+#endif // HEADER