[libomptarget] Load images in order of registration

This makes sure that images are loaded in the order in which they are registered with libomptarget.

If a target can load multiple images and these images depend on each other (for example if one image contains the programs target regions and one image contains library code), then the order in which images are loaded can be important for symbol resolution (for example, in the VE plugin).
In this case: because the same code exist in the host binaries, the order in which the host linker loads them (which is also the order in which images are registered with libomptarget) is the order in which the images have to be loaded onto the device.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D95530

GitOrigin-RevId: 542d9c21541d64da7e1c63a67563aa93ebc93446
diff --git a/libomptarget/include/omptargetplugin.h b/libomptarget/include/omptargetplugin.h
index 6785e77..b67a441 100644
--- a/libomptarget/include/omptargetplugin.h
+++ b/libomptarget/include/omptargetplugin.h
@@ -36,6 +36,11 @@
 // function to move data from source device to destination device directly.
 int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDevId, int32_t DstDevId);
 
+// Return an integer other than zero if the plugin can handle images which do
+// not contain target regions and global variables (but can contain other
+// functions)
+int32_t __tgt_rtl_supports_empty_images();
+
 // Initialize the requires flags for the device.
 int64_t __tgt_rtl_init_requires(int64_t RequiresFlags);
 
diff --git a/libomptarget/plugins/exports b/libomptarget/plugins/exports
index 48f81c0..63042a0 100644
--- a/libomptarget/plugins/exports
+++ b/libomptarget/plugins/exports
@@ -21,6 +21,7 @@
     __tgt_rtl_synchronize;
     __tgt_rtl_register_lib;
     __tgt_rtl_unregister_lib;
+    __tgt_rtl_supports_empty_images;
   local:
     *;
 };
diff --git a/libomptarget/plugins/ve/src/rtl.cpp b/libomptarget/plugins/ve/src/rtl.cpp
index 186247d..a77cd31 100644
--- a/libomptarget/plugins/ve/src/rtl.cpp
+++ b/libomptarget/plugins/ve/src/rtl.cpp
@@ -444,3 +444,5 @@
   return __tgt_rtl_run_target_team_region(ID, Entry, Args, Offsets, NumArgs, 1,
                                           1, 0);
 }
+
+int32_t __tgt_rtl_supports_empty_images() { return 1; }
diff --git a/libomptarget/src/device.h b/libomptarget/src/device.h
index 84a0987..bdd269e 100644
--- a/libomptarget/src/device.h
+++ b/libomptarget/src/device.h
@@ -241,6 +241,8 @@
   /// Translation table retreived from the binary
   HostEntriesBeginToTransTableTy HostEntriesBeginToTransTable;
   std::mutex TrlTblMtx; ///< For Translation Table
+  /// Host offload entries in order of image registration
+  std::vector<__tgt_offload_entry *> HostEntriesBeginRegistrationOrder;
 
   /// Map from ptrs on the host to an entry in the Translation Table
   HostPtrToTableMapTy HostPtrToTableMap;
diff --git a/libomptarget/src/omptarget.cpp b/libomptarget/src/omptarget.cpp
index 3d218ab..f273b66 100644
--- a/libomptarget/src/omptarget.cpp
+++ b/libomptarget/src/omptarget.cpp
@@ -75,18 +75,21 @@
    */
   int32_t device_id = Device.DeviceID;
   int rc = OFFLOAD_SUCCESS;
+  bool supportsEmptyImages = Device.RTL->supports_empty_images &&
+                             Device.RTL->supports_empty_images() > 0;
 
   Device.PendingGlobalsMtx.lock();
   PM->TrlTblMtx.lock();
-  for (HostEntriesBeginToTransTableTy::iterator entry_it =
-           PM->HostEntriesBeginToTransTable.begin();
-       entry_it != PM->HostEntriesBeginToTransTable.end(); ++entry_it) {
-    TranslationTable *TransTable = &entry_it->second;
+  for (auto *HostEntriesBegin : PM->HostEntriesBeginRegistrationOrder) {
+    TranslationTable *TransTable =
+        &PM->HostEntriesBeginToTransTable[HostEntriesBegin];
     if (TransTable->HostTable.EntriesBegin ==
-        TransTable->HostTable.EntriesEnd) {
+            TransTable->HostTable.EntriesEnd &&
+        !supportsEmptyImages) {
       // No host entry so no need to proceed
       continue;
     }
+
     if (TransTable->TargetsTable[device_id] != 0) {
       // Library entries have already been processed
       continue;
diff --git a/libomptarget/src/rtl.cpp b/libomptarget/src/rtl.cpp
index 60f3d49..7bf4f9b 100644
--- a/libomptarget/src/rtl.cpp
+++ b/libomptarget/src/rtl.cpp
@@ -173,6 +173,8 @@
         dlsym(dynlib_handle, "__tgt_rtl_register_lib");
     *((void **)&R.unregister_lib) =
         dlsym(dynlib_handle, "__tgt_rtl_unregister_lib");
+    *((void **)&R.supports_empty_images) =
+        dlsym(dynlib_handle, "__tgt_rtl_supports_empty_images");
   }
 
   DP("RTLs loaded!\n");
@@ -334,6 +336,7 @@
       // Initialize (if necessary) translation table for this library.
       PM->TrlTblMtx.lock();
       if (!PM->HostEntriesBeginToTransTable.count(desc->HostEntriesBegin)) {
+        PM->HostEntriesBeginRegistrationOrder.push_back(desc->HostEntriesBegin);
         TranslationTable &TransTable =
             (PM->HostEntriesBeginToTransTable)[desc->HostEntriesBegin];
         TransTable.HostTable.EntriesBegin = desc->HostEntriesBegin;
diff --git a/libomptarget/src/rtl.h b/libomptarget/src/rtl.h
index 8daafe1..a67b868 100644
--- a/libomptarget/src/rtl.h
+++ b/libomptarget/src/rtl.h
@@ -54,6 +54,7 @@
   typedef int64_t(init_requires_ty)(int64_t);
   typedef int64_t(synchronize_ty)(int32_t, __tgt_async_info *);
   typedef int32_t (*register_lib_ty)(__tgt_bin_desc *);
+  typedef int32_t(supports_empty_images_ty)();
 
   int32_t Idx = -1;             // RTL index, index is the number of devices
                                 // of other RTLs that were registered before,
@@ -89,6 +90,7 @@
   synchronize_ty *synchronize = nullptr;
   register_lib_ty register_lib = nullptr;
   register_lib_ty unregister_lib = nullptr;
+  supports_empty_images_ty *supports_empty_images = nullptr;
 
   // Are there images associated with this RTL.
   bool isUsed = false;