[OpenMP][Libomptarget] Introduce changes to support remote plugin

In order to support remote execution, we need to be able to send the
target binary description to the remote host for registration (and
consequent deregistration). To support this, I added these two
optional new functions to the plugin API:
- `__tgt_rtl_register_lib`
- `__tgt_rtl_unregister_lib`

These functions will be called to properly manage the instance of
libomptarget running on the remote host.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D93293

GitOrigin-RevId: 683719bc0cc8e12a5f9c06135fc97a13ef414f69
diff --git a/libomptarget/src/interface.cpp b/libomptarget/src/interface.cpp
index 85a289c..2395709 100644
--- a/libomptarget/src/interface.cpp
+++ b/libomptarget/src/interface.cpp
@@ -94,6 +94,14 @@
 /// adds a target shared library to the target execution image
 EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) {
   TIMESCOPE();
+  std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, PM->RTLs);
+  for (auto &RTL : PM->RTLs.AllRTLs) {
+    if (RTL.register_lib) {
+      if ((*RTL.register_lib)(desc) != OFFLOAD_SUCCESS) {
+        DP("Could not register library with %s", RTL.RTLName.c_str());
+      }
+    }
+  }
   PM->RTLs.RegisterLib(desc);
 }
 
@@ -102,6 +110,13 @@
 EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) {
   TIMESCOPE();
   PM->RTLs.UnregisterLib(desc);
+  for (auto &RTL : PM->RTLs.UsedRTLs) {
+    if (RTL->unregister_lib) {
+      if ((*RTL->unregister_lib)(desc) != OFFLOAD_SUCCESS) {
+        DP("Could not register library with %s", RTL->RTLName.c_str());
+      }
+    }
+  }
 }
 
 /// creates host-to-target data mapping, stores it in the
diff --git a/libomptarget/src/rtl.cpp b/libomptarget/src/rtl.cpp
index 4a2a6d9..45ac5bb 100644
--- a/libomptarget/src/rtl.cpp
+++ b/libomptarget/src/rtl.cpp
@@ -29,6 +29,7 @@
     /* AArch64 target       */ "libomptarget.rtl.aarch64.so",
     /* SX-Aurora VE target  */ "libomptarget.rtl.ve.so",
     /* AMDGPU target        */ "libomptarget.rtl.amdgpu.so",
+    /* Remote target        */ "libomptarget.rtl.rpc.so",
 };
 
 PluginManager *PM;
@@ -168,6 +169,10 @@
         dlsym(dynlib_handle, "__tgt_rtl_data_exchange_async");
     *((void **)&R.is_data_exchangable) =
         dlsym(dynlib_handle, "__tgt_rtl_is_data_exchangable");
+    *((void **)&R.register_lib) =
+        dlsym(dynlib_handle, "__tgt_rtl_register_lib");
+    *((void **)&R.unregister_lib) =
+        dlsym(dynlib_handle, "__tgt_rtl_unregister_lib");
   }
 
   DP("RTLs loaded!\n");
@@ -277,9 +282,6 @@
 }
 
 void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
-  // Attempt to load all plugins available in the system.
-  std::call_once(initFlag, &RTLsTy::LoadRTLs, this);
-
   PM->RTLsMtx.lock();
   // Register the images with the RTLs that understand them, if any.
   for (int32_t i = 0; i < desc->NumDeviceImages; ++i) {
diff --git a/libomptarget/src/rtl.h b/libomptarget/src/rtl.h
index b9ead48..8daafe1 100644
--- a/libomptarget/src/rtl.h
+++ b/libomptarget/src/rtl.h
@@ -53,6 +53,7 @@
                                             __tgt_async_info *);
   typedef int64_t(init_requires_ty)(int64_t);
   typedef int64_t(synchronize_ty)(int32_t, __tgt_async_info *);
+  typedef int32_t (*register_lib_ty)(__tgt_bin_desc *);
 
   int32_t Idx = -1;             // RTL index, index is the number of devices
                                 // of other RTLs that were registered before,
@@ -86,6 +87,8 @@
   run_team_region_async_ty *run_team_region_async = nullptr;
   init_requires_ty *init_requires = nullptr;
   synchronize_ty *synchronize = nullptr;
+  register_lib_ty register_lib = nullptr;
+  register_lib_ty unregister_lib = nullptr;
 
   // Are there images associated with this RTL.
   bool isUsed = false;
@@ -97,14 +100,7 @@
 };
 
 /// RTLs identified in the system.
-class RTLsTy {
-private:
-  // Mutex-like object to guarantee thread-safety and unique initialization
-  // (i.e. the library attempts to load the RTLs (plugins) only once).
-  std::once_flag initFlag;
-  void LoadRTLs(); // not thread-safe
-
-public:
+struct RTLsTy {
   // List of the detected runtime libraries.
   std::list<RTLInfoTy> AllRTLs;
 
@@ -124,8 +120,12 @@
 
   // Unregister a shared library from all RTLs.
   void UnregisterLib(__tgt_bin_desc *desc);
-};
 
+  // Mutex-like object to guarantee thread-safety and unique initialization
+  // (i.e. the library attempts to load the RTLs (plugins) only once).
+  std::once_flag initFlag;
+  void LoadRTLs(); // not thread-safe
+};
 
 /// Map between the host entry begin and the translation table. Each
 /// registered library gets one TranslationTable. Use the map from