[OpenMP][libomptarget] Enable usage of unified memory for declare target link variables

Summary: This patch enables the usage of a host variable on the device for declare target link variables when unified memory is available.

Reviewers: ABataev, caomhin, grokos

Reviewed By: grokos

Subscribers: Hahnfeld, guansong, jdoerfert, openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D60884

git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@362505 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/libomptarget/include/omptargetplugin.h b/libomptarget/include/omptargetplugin.h
index 2876bfb..e03416c 100644
--- a/libomptarget/include/omptargetplugin.h
+++ b/libomptarget/include/omptargetplugin.h
@@ -31,6 +31,9 @@
 // having to load the library, which can be expensive.
 int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image);
 
+// Initialize the requires flags for the device.
+int64_t __tgt_rtl_init_requires(int64_t RequiresFlags);
+
 // Initialize the specified device. In case of success return 0; otherwise
 // return an error code.
 int32_t __tgt_rtl_init_device(int32_t ID);
diff --git a/libomptarget/plugins/cuda/src/rtl.cpp b/libomptarget/plugins/cuda/src/rtl.cpp
index fc0c1ec..844afa1 100644
--- a/libomptarget/plugins/cuda/src/rtl.cpp
+++ b/libomptarget/plugins/cuda/src/rtl.cpp
@@ -111,6 +111,9 @@
   int EnvNumTeams;
   int EnvTeamLimit;
 
+  // OpenMP Requires Flags
+  int64_t RequiresFlags;
+
   //static int EnvNumThreads;
   static const int HardTeamLimit = 1<<16; // 64k
   static const int HardThreadLimit = 1024;
@@ -227,6 +230,9 @@
     } else {
       EnvNumTeams = -1;
     }
+
+    // Default state.
+    RequiresFlags = OMP_REQ_UNDEFINED;
   }
 
   ~RTLDeviceInfoTy() {
@@ -264,6 +270,12 @@
 
 int32_t __tgt_rtl_number_of_devices() { return DeviceInfo.NumberOfDevices; }
 
+int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) {
+  DP("Init requires flags to %ld\n", RequiresFlags);
+  DeviceInfo.RequiresFlags = RequiresFlags;
+  return RequiresFlags;
+}
+
 int32_t __tgt_rtl_init_device(int32_t device_id) {
 
   CUdevice cuDevice;
@@ -436,6 +448,17 @@
           DPxPTR(e - HostBegin), e->name, DPxPTR(cuptr));
       entry.addr = (void *)cuptr;
 
+      if (DeviceInfo.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
+          e->flags & OMP_DECLARE_TARGET_LINK) {
+        // If unified memory is present any target link variables
+        // can access host addresses directly. There is no longer a
+        // need for device copies.
+        cuMemcpyHtoD(cuptr, e->addr, sizeof(void *));
+        DP("Copy linked variable host address (" DPxMOD ")"
+           "to device address (" DPxMOD ")\n",
+          DPxPTR(*((void**)e->addr)), DPxPTR(cuptr));
+      }
+
       DeviceInfo.addOffloadEntry(device_id, entry);
 
       continue;
diff --git a/libomptarget/plugins/exports b/libomptarget/plugins/exports
index 3f9f7d4..a14bedf 100644
--- a/libomptarget/plugins/exports
+++ b/libomptarget/plugins/exports
@@ -2,6 +2,7 @@
   global:
     __tgt_rtl_is_valid_binary;
     __tgt_rtl_number_of_devices;
+    __tgt_rtl_init_requires;
     __tgt_rtl_init_device;
     __tgt_rtl_load_binary;
     __tgt_rtl_data_alloc;
diff --git a/libomptarget/src/device.cpp b/libomptarget/src/device.cpp
index a946b92..5ecba57 100644
--- a/libomptarget/src/device.cpp
+++ b/libomptarget/src/device.cpp
@@ -275,6 +275,9 @@
 
 /// Init device, should not be called directly.
 void DeviceTy::init() {
+  // Make call to init_requires if it exists for this plugin.
+  if (RTL->init_requires)
+    RTL->init_requires(RTLRequiresFlags);
   int32_t rc = RTL->init_device(RTLDeviceID);
   if (rc == OFFLOAD_SUCCESS) {
     IsInit = true;
diff --git a/libomptarget/src/rtl.cpp b/libomptarget/src/rtl.cpp
index 770ae36..4eb7ab7 100644
--- a/libomptarget/src/rtl.cpp
+++ b/libomptarget/src/rtl.cpp
@@ -107,6 +107,10 @@
               dynlib_handle, "__tgt_rtl_run_target_team_region")))
       continue;
 
+    // Optional functions
+    *((void**) &R.init_requires) = dlsym(
+        dynlib_handle, "__tgt_rtl_init_requires");
+
     // No devices are supported by this RTL?
     if (!(R.NumberOfDevices = R.number_of_devices())) {
       DP("No devices supported in this RTL\n");
diff --git a/libomptarget/src/rtl.h b/libomptarget/src/rtl.h
index 381f23e..8148e81 100644
--- a/libomptarget/src/rtl.h
+++ b/libomptarget/src/rtl.h
@@ -36,6 +36,7 @@
                                  int32_t);
   typedef int32_t(run_team_region_ty)(int32_t, void *, void **, ptrdiff_t *,
                                       int32_t, int32_t, int32_t, uint64_t);
+  typedef int64_t(init_requires_ty)(int64_t);
 
   int32_t Idx;                     // RTL index, index is the number of devices
                                    // of other RTLs that were registered before,
@@ -60,6 +61,7 @@
   data_delete_ty *data_delete;
   run_region_ty *run_region;
   run_team_region_ty *run_team_region;
+  init_requires_ty *init_requires;
 
   // Are there images associated with this RTL.
   bool isUsed;
@@ -78,8 +80,8 @@
 #endif
         is_valid_binary(0), number_of_devices(0), init_device(0),
         load_binary(0), data_alloc(0), data_submit(0), data_retrieve(0),
-        data_delete(0), run_region(0), run_team_region(0), isUsed(false),
-        Mtx() {}
+        data_delete(0), run_region(0), run_team_region(0),
+        init_requires(0), isUsed(false), Mtx() {}
 
   RTLInfoTy(const RTLInfoTy &r) : Mtx() {
     Idx = r.Idx;
@@ -98,6 +100,7 @@
     data_delete = r.data_delete;
     run_region = r.run_region;
     run_team_region = r.run_team_region;
+    init_requires = r.init_requires;
     isUsed = r.isUsed;
   }
 };