[OpenMP][NFC] Unify `target` API with other by passing a `__tgt_async_info` pointer

Reviewed By: tianshilei1992

Differential Revision: https://reviews.llvm.org/D96430

GitOrigin-RevId: 942728763b8e8a6ec440d29f0f8608a1ac1f9cee
diff --git a/libomptarget/src/interface.cpp b/libomptarget/src/interface.cpp
index 6c20819..0140c55 100644
--- a/libomptarget/src/interface.cpp
+++ b/libomptarget/src/interface.cpp
@@ -336,7 +336,7 @@
 
   DeviceTy &Device = PM->Devices[device_id];
   int rc = targetDataUpdate(loc, Device, arg_num, args_base, args, arg_sizes,
-                            arg_types, arg_names, arg_mappers);
+                            arg_types, arg_names, arg_mappers, nullptr);
   HandleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
 }
 
@@ -408,8 +408,9 @@
 #endif
 
   DeviceTy &Device = PM->Devices[device_id];
-  int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes,
-                  arg_types, arg_names, arg_mappers, 0, 0, false /*team*/);
+  int rc =
+      target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes,
+             arg_types, arg_names, arg_mappers, 0, 0, false /*team*/, nullptr);
   HandleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
   return rc;
 }
@@ -491,7 +492,7 @@
   DeviceTy &Device = PM->Devices[device_id];
   int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes,
                   arg_types, arg_names, arg_mappers, team_num, thread_limit,
-                  true /*team*/);
+                  true /*team*/, nullptr);
   HandleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
   return rc;
 }
diff --git a/libomptarget/src/omptarget.cpp b/libomptarget/src/omptarget.cpp
index 8c1ed5b..e133012 100644
--- a/libomptarget/src/omptarget.cpp
+++ b/libomptarget/src/omptarget.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "omptarget.h"
 #include "device.h"
 #include "private.h"
 #include "rtl.h"
@@ -159,8 +160,9 @@
         DP("Has pending ctors... call now\n");
         for (auto &entry : lib.second.PendingCtors) {
           void *ctor = entry;
-          int rc = target(nullptr, Device, ctor, 0, nullptr, nullptr, nullptr,
-                          nullptr, nullptr, nullptr, 1, 1, true /*team*/);
+          int rc =
+              target(nullptr, Device, ctor, 0, nullptr, nullptr, nullptr,
+                     nullptr, nullptr, nullptr, 1, 1, true /*team*/, nullptr);
           if (rc != OFFLOAD_SUCCESS) {
             REPORT("Running ctor " DPxMOD " failed.\n", DPxPTR(ctor));
             Device.PendingGlobalsMtx.unlock();
@@ -255,7 +257,7 @@
 int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
                     void **args_base, void **args, int64_t *arg_sizes,
                     int64_t *arg_types, map_var_info_t *arg_names,
-                    void **arg_mappers, __tgt_async_info *async_info_ptr) {
+                    void **arg_mappers, __tgt_async_info *AsyncInfo) {
   // process each input.
   for (int32_t i = 0; i < arg_num; ++i) {
     // Ignore private variables and arrays - there is no mapping for them.
@@ -401,8 +403,8 @@
       if (copy && !IsHostPtr) {
         DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
            data_size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
-        int rt = Device.submitData(TgtPtrBegin, HstPtrBegin, data_size,
-                                   async_info_ptr);
+        int rt =
+            Device.submitData(TgtPtrBegin, HstPtrBegin, data_size, AsyncInfo);
         if (rt != OFFLOAD_SUCCESS) {
           REPORT("Copying data to device failed.\n");
           return OFFLOAD_FAIL;
@@ -416,7 +418,7 @@
       uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
       void *TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta);
       int rt = Device.submitData(PointerTgtPtrBegin, &TgtPtrBase,
-                                 sizeof(void *), async_info_ptr);
+                                 sizeof(void *), AsyncInfo);
       if (rt != OFFLOAD_SUCCESS) {
         REPORT("Copying data to device failed.\n");
         return OFFLOAD_FAIL;
@@ -791,12 +793,12 @@
 }
 
 /// Internal function to pass data to/from the target.
-// async_info_ptr is currently unused, added here so targetDataUpdate has the
+// AsyncInfo is currently unused, added here so targetDataUpdate has the
 // same signature as targetDataBegin and targetDataEnd.
 int targetDataUpdate(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
                      void **ArgsBase, void **Args, int64_t *ArgSizes,
                      int64_t *ArgTypes, map_var_info_t *ArgNames,
-                     void **ArgMappers, __tgt_async_info *AsyncInfoPtr) {
+                     void **ArgMappers, __tgt_async_info *AsyncInfo) {
   // process each input.
   for (int32_t I = 0; I < ArgNum; ++I) {
     if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) ||
@@ -1240,7 +1242,8 @@
 int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
            void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
            map_var_info_t *ArgNames, void **ArgMappers, int32_t TeamNum,
-           int32_t ThreadLimit, int IsTeamConstruct) {
+           int32_t ThreadLimit, int IsTeamConstruct,
+           __tgt_async_info *AsyncInfo) {
   int32_t DeviceId = Device.DeviceID;
 
   TableMap *TM = getTableMap(HostPtr);
@@ -1261,19 +1264,23 @@
   }
   assert(TargetTable && "Global data has not been mapped\n");
 
-  __tgt_async_info AsyncInfo;
+  // TODO: This will go away as soon as we consequently pass in async info
+  // objects (as references).
+  __tgt_async_info InternalAsyncInfo;
+  if (!AsyncInfo)
+    AsyncInfo = &InternalAsyncInfo;
 
   std::vector<void *> TgtArgs;
   std::vector<ptrdiff_t> TgtOffsets;
 
-  PrivateArgumentManagerTy PrivateArgumentManager(Device, &AsyncInfo);
+  PrivateArgumentManagerTy PrivateArgumentManager(Device, AsyncInfo);
 
   int Ret;
   if (ArgNum) {
     // Process data, such as data mapping, before launching the kernel
     Ret = processDataBefore(loc, DeviceId, HostPtr, ArgNum, ArgBases, Args,
                             ArgSizes, ArgTypes, ArgNames, ArgMappers, TgtArgs,
-                            TgtOffsets, PrivateArgumentManager, &AsyncInfo);
+                            TgtOffsets, PrivateArgumentManager, AsyncInfo);
     if (Ret != OFFLOAD_SUCCESS) {
       REPORT("Failed to process data before launching the kernel.\n");
       return OFFLOAD_FAIL;
@@ -1294,10 +1301,10 @@
     if (IsTeamConstruct)
       Ret = Device.runTeamRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0],
                                  TgtArgs.size(), TeamNum, ThreadLimit,
-                                 LoopTripCount, &AsyncInfo);
+                                 LoopTripCount, AsyncInfo);
     else
       Ret = Device.runRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0],
-                             TgtArgs.size(), &AsyncInfo);
+                             TgtArgs.size(), AsyncInfo);
   }
 
   if (Ret != OFFLOAD_SUCCESS) {
@@ -1310,16 +1317,16 @@
     // variables
     Ret = processDataAfter(loc, DeviceId, HostPtr, ArgNum, ArgBases, Args,
                            ArgSizes, ArgTypes, ArgNames, ArgMappers,
-                           PrivateArgumentManager, &AsyncInfo);
+                           PrivateArgumentManager, AsyncInfo);
     if (Ret != OFFLOAD_SUCCESS) {
       REPORT("Failed to process data after launching the kernel.\n");
       return OFFLOAD_FAIL;
     }
-  } else if (AsyncInfo.Queue) {
+  } else if (AsyncInfo->Queue) {
     // If ArgNum is zero, but AsyncInfo.Queue is valid, then the kernel doesn't
     // hava any argument, and the device supports async operations, so we need a
     // sync at this point.
-    return syncDevice(Device, &AsyncInfo);
+    return syncDevice(Device, AsyncInfo);
   }
 
   return OFFLOAD_SUCCESS;
diff --git a/libomptarget/src/private.h b/libomptarget/src/private.h
index 36d7539..43d9d4a 100644
--- a/libomptarget/src/private.h
+++ b/libomptarget/src/private.h
@@ -23,8 +23,7 @@
 extern int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
                            void **args_base, void **args, int64_t *arg_sizes,
                            int64_t *arg_types, map_var_info_t *arg_names,
-                           void **arg_mappers,
-                           __tgt_async_info *async_info_ptr);
+                           void **arg_mappers, __tgt_async_info *AsyncInfo);
 
 extern int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
                          void **ArgBases, void **Args, int64_t *ArgSizes,
@@ -34,14 +33,13 @@
 extern int targetDataUpdate(ident_t *loc, DeviceTy &Device, int32_t arg_num,
                             void **args_base, void **args, int64_t *arg_sizes,
                             int64_t *arg_types, map_var_info_t *arg_names,
-                            void **arg_mappers,
-                            __tgt_async_info *async_info_ptr = nullptr);
+                            void **arg_mappers, __tgt_async_info *AsyncInfo);
 
 extern int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
                   void **ArgBases, void **Args, int64_t *ArgSizes,
                   int64_t *ArgTypes, map_var_info_t *arg_names,
                   void **ArgMappers, int32_t TeamNum, int32_t ThreadLimit,
-                  int IsTeamConstruct);
+                  int IsTeamConstruct, __tgt_async_info *AsyncInfo);
 
 extern int CheckDeviceAndCtors(int64_t device_id);
 
diff --git a/libomptarget/src/rtl.cpp b/libomptarget/src/rtl.cpp
index fbe4d7d..efbac2c 100644
--- a/libomptarget/src/rtl.cpp
+++ b/libomptarget/src/rtl.cpp
@@ -401,8 +401,9 @@
         Device.PendingGlobalsMtx.lock();
         if (Device.PendingCtorsDtors[desc].PendingCtors.empty()) {
           for (auto &dtor : Device.PendingCtorsDtors[desc].PendingDtors) {
-            int rc = target(nullptr, Device, dtor, 0, nullptr, nullptr, nullptr,
-                            nullptr, nullptr, nullptr, 1, 1, true /*team*/);
+            int rc =
+                target(nullptr, Device, dtor, 0, nullptr, nullptr, nullptr,
+                       nullptr, nullptr, nullptr, 1, 1, true /*team*/, nullptr);
             if (rc != OFFLOAD_SUCCESS) {
               DP("Running destructor " DPxMOD " failed.\n", DPxPTR(dtor));
             }