[OpenMP] Add source location information to the libomptarget profile
In much of the libomptarget interface we have an ident_t object now, if
it is not null we can use it to improve the profile output. For now, we
simply use the ident_t "source information string" as generated by the
FE.
Reviewed By: tianshilei1992
Differential Revision: https://reviews.llvm.org/D95282
GitOrigin-RevId: 8c7fdc4c61bff94a3ac1bb4877d1c00e01ee53be
diff --git a/libomptarget/include/SourceInfo.h b/libomptarget/include/SourceInfo.h
index 32f1159..7d30a04 100644
--- a/libomptarget/include/SourceInfo.h
+++ b/libomptarget/include/SourceInfo.h
@@ -91,6 +91,7 @@
const char *getName() const { return Name.c_str(); }
const char *getFilename() const { return Filename.c_str(); }
+ const char *getProfileLocation() const { return SourceStr.data(); }
int32_t getLine() const { return Line; }
int32_t getColumn() const { return Column; }
bool isAvailible() const { return (Line || Column); }
diff --git a/libomptarget/src/interface.cpp b/libomptarget/src/interface.cpp
index c773e1f..85a289c 100644
--- a/libomptarget/src/interface.cpp
+++ b/libomptarget/src/interface.cpp
@@ -132,7 +132,7 @@
int64_t *arg_types,
map_var_info_t *arg_names,
void **arg_mappers) {
- TIMESCOPE();
+ TIMESCOPE_WITH_IDENT(loc);
if (IsOffloadDisabled()) return;
DP("Entering data begin region for device %" PRId64 " with %d mappings\n",
@@ -164,7 +164,7 @@
}
#endif
- int rc = targetDataBegin(Device, arg_num, args_base, args, arg_sizes,
+ int rc = targetDataBegin(loc, Device, arg_num, args_base, args, arg_sizes,
arg_types, arg_names, arg_mappers, nullptr);
HandleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
}
@@ -174,7 +174,7 @@
void **args, int64_t *arg_sizes, int64_t *arg_types,
map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
- TIMESCOPE();
+ TIMESCOPE_WITH_IDENT(loc);
if (depNum + noAliasDepNum > 0)
__kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
@@ -210,7 +210,7 @@
int64_t *arg_types,
map_var_info_t *arg_names,
void **arg_mappers) {
- TIMESCOPE();
+ TIMESCOPE_WITH_IDENT(loc);
if (IsOffloadDisabled()) return;
DP("Entering data end region with %d mappings\n", arg_num);
@@ -247,8 +247,8 @@
}
#endif
- int rc = targetDataEnd(Device, arg_num, args_base, args, arg_sizes, arg_types,
- arg_names, arg_mappers, nullptr);
+ int rc = targetDataEnd(loc, Device, arg_num, args_base, args, arg_sizes,
+ arg_types, arg_names, arg_mappers, nullptr);
HandleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
}
@@ -257,7 +257,7 @@
void **args, int64_t *arg_sizes, int64_t *arg_types,
map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
- TIMESCOPE();
+ TIMESCOPE_WITH_IDENT(loc);
if (depNum + noAliasDepNum > 0)
__kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
@@ -290,7 +290,7 @@
int64_t *arg_types,
map_var_info_t *arg_names,
void **arg_mappers) {
- TIMESCOPE();
+ TIMESCOPE_WITH_IDENT(loc);
if (IsOffloadDisabled()) return;
DP("Entering data update with %d mappings\n", arg_num);
@@ -310,7 +310,7 @@
arg_names, "Updating OpenMP data");
DeviceTy &Device = PM->Devices[device_id];
- int rc = targetDataUpdate(Device, arg_num, args_base, args, arg_sizes,
+ int rc = targetDataUpdate(loc, Device, arg_num, args_base, args, arg_sizes,
arg_types, arg_names, arg_mappers);
HandleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
}
@@ -320,7 +320,7 @@
void **args, int64_t *arg_sizes, int64_t *arg_types,
map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
- TIMESCOPE();
+ TIMESCOPE_WITH_IDENT(loc);
if (depNum + noAliasDepNum > 0)
__kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
@@ -351,7 +351,7 @@
int32_t arg_num, void **args_base, void **args,
int64_t *arg_sizes, int64_t *arg_types,
map_var_info_t *arg_names, void **arg_mappers) {
- TIMESCOPE();
+ TIMESCOPE_WITH_IDENT(loc);
if (IsOffloadDisabled()) return OFFLOAD_FAIL;
DP("Entering target region with entry point " DPxMOD " and device Id %"
PRId64 "\n", DPxPTR(host_ptr), device_id);
@@ -378,7 +378,7 @@
}
#endif
- int rc = target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
+ int rc = target(loc, device_id, host_ptr, arg_num, args_base, args, arg_sizes,
arg_types, arg_names, arg_mappers, 0, 0, false /*team*/);
HandleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
return rc;
@@ -389,7 +389,7 @@
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
- TIMESCOPE();
+ TIMESCOPE_WITH_IDENT(loc);
if (depNum + noAliasDepNum > 0)
__kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
@@ -426,7 +426,6 @@
map_var_info_t *arg_names,
void **arg_mappers, int32_t team_num,
int32_t thread_limit) {
- TIMESCOPE();
if (IsOffloadDisabled()) return OFFLOAD_FAIL;
DP("Entering target region with entry point " DPxMOD " and device Id %"
PRId64 "\n", DPxPTR(host_ptr), device_id);
@@ -453,7 +452,7 @@
}
#endif
- int rc = target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
+ int rc = target(loc, device_id, host_ptr, arg_num, args_base, args, arg_sizes,
arg_types, arg_names, arg_mappers, team_num, thread_limit,
true /*team*/);
HandleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
@@ -466,7 +465,7 @@
map_var_info_t *arg_names, void **arg_mappers, int32_t team_num,
int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum,
void *noAliasDepList) {
- TIMESCOPE();
+ TIMESCOPE_WITH_IDENT(loc);
if (depNum + noAliasDepNum > 0)
__kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
@@ -502,7 +501,7 @@
EXTERN void __kmpc_push_target_tripcount(ident_t *loc, int64_t device_id,
uint64_t loop_tripcount) {
- TIMESCOPE();
+ TIMESCOPE_WITH_IDENT(loc);
if (IsOffloadDisabled())
return;
diff --git a/libomptarget/src/omptarget.cpp b/libomptarget/src/omptarget.cpp
index 8cb16a4..85e3cf6 100644
--- a/libomptarget/src/omptarget.cpp
+++ b/libomptarget/src/omptarget.cpp
@@ -51,7 +51,7 @@
static const int64_t Alignment = 8;
/// Map global data and execute pending ctors
-static int InitLibrary(DeviceTy& Device) {
+static int InitLibrary(DeviceTy &Device) {
/*
* Map global data
*/
@@ -84,8 +84,8 @@
break;
}
// 2) load image into the target table.
- __tgt_target_table *TargetTable =
- TransTable->TargetsTable[device_id] = Device.load_binary(img);
+ __tgt_target_table *TargetTable = TransTable->TargetsTable[device_id] =
+ Device.load_binary(img);
// Unable to get table for this image: invalidate image and fail.
if (!TargetTable) {
REPORT("Unable to generate entries table for device id %d.\n", device_id);
@@ -129,8 +129,9 @@
if (Device.getTgtPtrBegin(CurrHostEntry->addr, CurrHostEntry->size))
continue;
DP("Add mapping from host " DPxMOD " to device " DPxMOD " with size %zu"
- "\n", DPxPTR(CurrHostEntry->addr), DPxPTR(CurrDeviceEntry->addr),
- CurrDeviceEntry->size);
+ "\n",
+ DPxPTR(CurrHostEntry->addr), DPxPTR(CurrDeviceEntry->addr),
+ CurrDeviceEntry->size);
Device.HostDataToTargetMap.emplace(
(uintptr_t)CurrHostEntry->addr /*HstPtrBase*/,
(uintptr_t)CurrHostEntry->addr /*HstPtrBegin*/,
@@ -158,8 +159,9 @@
DP("Has pending ctors... call now\n");
for (auto &entry : lib.second.PendingCtors) {
void *ctor = entry;
- int rc = target(device_id, ctor, 0, nullptr, nullptr, nullptr,
- nullptr, nullptr, nullptr, 1, 1, true /*team*/);
+ int rc =
+ target(nullptr, device_id, ctor, 0, nullptr, nullptr, nullptr,
+ nullptr, nullptr, nullptr, 1, 1, true /*team*/);
if (rc != OFFLOAD_SUCCESS) {
REPORT("Running ctor " DPxMOD " failed.\n", DPxPTR(ctor));
Device.PendingGlobalsMtx.unlock();
@@ -208,10 +210,11 @@
/// Call the user-defined mapper function followed by the appropriate
// target_data_* function (target_data_{begin,end,update}).
-int targetDataMapper(DeviceTy &Device, void *arg_base, void *arg,
+int targetDataMapper(ident_t *loc, DeviceTy &Device, void *arg_base, void *arg,
int64_t arg_size, int64_t arg_type,
map_var_info_t arg_names, void *arg_mapper,
TargetDataFuncPtrTy target_data_function) {
+ TIMESCOPE_WITH_IDENT(loc);
DP("Calling the mapper function " DPxMOD "\n", DPxPTR(arg_mapper));
// The mapper function fills up Components.
@@ -240,7 +243,7 @@
MapperArgNames[I] = C.Name;
}
- int rc = target_data_function(Device, MapperComponents.Components.size(),
+ int rc = target_data_function(loc, Device, MapperComponents.Components.size(),
MapperArgsBase.data(), MapperArgs.data(),
MapperArgSizes.data(), MapperArgTypes.data(),
MapperArgNames.data(), /*arg_mappers*/ nullptr,
@@ -250,10 +253,10 @@
}
/// Internal function to do the mapping and transfer the data to the device
-int targetDataBegin(DeviceTy &Device, int32_t arg_num, void **args_base,
- void **args, int64_t *arg_sizes, int64_t *arg_types,
- map_var_info_t *arg_names, void **arg_mappers,
- __tgt_async_info *async_info_ptr) {
+int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
+ void **args_base, void **args, int64_t *arg_sizes,
+ int64_t *arg_types, map_var_info_t *arg_names,
+ void **arg_mappers, __tgt_async_info *async_info_ptr) {
// process each input.
for (int32_t i = 0; i < arg_num; ++i) {
// Ignore private variables and arrays - there is no mapping for them.
@@ -268,7 +271,7 @@
DP("Calling targetDataMapper for the %dth argument\n", i);
map_var_info_t arg_name = (!arg_names) ? nullptr : arg_names[i];
- int rc = targetDataMapper(Device, args_base[i], args[i], arg_sizes[i],
+ int rc = targetDataMapper(loc, Device, args_base[i], args[i], arg_sizes[i],
arg_types[i], arg_name, arg_mappers[i],
targetDataBegin);
@@ -291,14 +294,15 @@
// Look at the next argument - if that is MEMBER_OF this one, then this one
// is a combined entry.
int64_t padding = 0;
- const int next_i = i+1;
+ const int next_i = i + 1;
if (getParentIndex(arg_types[i]) < 0 && next_i < arg_num &&
getParentIndex(arg_types[next_i]) == i) {
padding = (int64_t)HstPtrBegin % Alignment;
if (padding) {
DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD
- "\n", padding, DPxPTR(HstPtrBegin));
- HstPtrBegin = (char *) HstPtrBegin - padding;
+ "\n",
+ padding, DPxPTR(HstPtrBegin));
+ HstPtrBegin = (char *)HstPtrBegin - padding;
data_size += padding;
}
}
@@ -344,8 +348,9 @@
return OFFLOAD_FAIL;
}
DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new"
- "\n", sizeof(void *), DPxPTR(PointerTgtPtrBegin),
- (Pointer_IsNew ? "" : " not"));
+ "\n",
+ sizeof(void *), DPxPTR(PointerTgtPtrBegin),
+ (Pointer_IsNew ? "" : " not"));
Pointer_HstPtrBegin = HstPtrBase;
// modify current entry.
HstPtrBase = *(void **)HstPtrBase;
@@ -364,8 +369,8 @@
return OFFLOAD_FAIL;
}
DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
- " - is%s new\n", data_size, DPxPTR(TgtPtrBegin),
- (IsNew ? "" : " not"));
+ " - is%s new\n",
+ data_size, DPxPTR(TgtPtrBegin), (IsNew ? "" : " not"));
if (arg_types[i] & OMP_TGT_MAPTYPE_RETURN_PARAM) {
uintptr_t Delta = (uintptr_t)HstPtrBegin - (uintptr_t)HstPtrBase;
@@ -449,10 +454,10 @@
} // namespace
/// Internal function to undo the mapping and retrieve the data from the device.
-int targetDataEnd(DeviceTy &Device, int32_t ArgNum, void **ArgBases,
- void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
- map_var_info_t *ArgNames, void **ArgMappers,
- __tgt_async_info *AsyncInfo) {
+int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
+ void **ArgBases, void **Args, int64_t *ArgSizes,
+ int64_t *ArgTypes, map_var_info_t *ArgNames,
+ void **ArgMappers, __tgt_async_info *AsyncInfo) {
int Ret;
std::vector<DeallocTgtPtrInfo> DeallocTgtPtrs;
// process each input.
@@ -471,7 +476,7 @@
map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I];
Ret =
- targetDataMapper(Device, ArgBases[I], Args[I], ArgSizes[I],
+ targetDataMapper(loc, Device, ArgBases[I], Args[I], ArgSizes[I],
ArgTypes[I], ArgName, ArgMappers[I], targetDataEnd);
if (Ret != OFFLOAD_SUCCESS) {
@@ -646,9 +651,10 @@
return OFFLOAD_SUCCESS;
}
-static int targetDataContiguous(DeviceTy &Device, void *ArgsBase,
+static int targetDataContiguous(ident_t *loc, DeviceTy &Device, void *ArgsBase,
void *HstPtrBegin, int64_t ArgSize,
int64_t ArgType) {
+ TIMESCOPE_WITH_IDENT(loc);
bool IsLast, IsHostPtr;
void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, ArgSize, IsLast, false,
IsHostPtr, /*MustContain=*/true);
@@ -732,11 +738,13 @@
return OFFLOAD_SUCCESS;
}
-static int targetDataNonContiguous(DeviceTy &Device, void *ArgsBase,
+static int targetDataNonContiguous(ident_t *loc, DeviceTy &Device,
+ void *ArgsBase,
__tgt_target_non_contig *NonContig,
uint64_t Size, int64_t ArgType,
int CurrentDim, int DimSize,
uint64_t Offset) {
+ TIMESCOPE_WITH_IDENT(loc);
int Ret = OFFLOAD_SUCCESS;
if (CurrentDim < DimSize) {
for (unsigned int I = 0; I < NonContig[CurrentDim].Count; ++I) {
@@ -745,7 +753,7 @@
// we only need to transfer the first element for the last dimension
// since we've already got a contiguous piece.
if (CurrentDim != DimSize - 1 || I == 0) {
- Ret = targetDataNonContiguous(Device, ArgsBase, NonContig, Size,
+ Ret = targetDataNonContiguous(loc, Device, ArgsBase, NonContig, Size,
ArgType, CurrentDim + 1, DimSize,
Offset + CurOffset);
// Stop the whole process if any contiguous piece returns anything
@@ -758,7 +766,7 @@
char *Ptr = (char *)ArgsBase + Offset;
DP("Transfer of non-contiguous : host ptr %lx offset %ld len %ld\n",
(uint64_t)Ptr, Offset, Size);
- Ret = targetDataContiguous(Device, ArgsBase, Ptr, Size, ArgType);
+ Ret = targetDataContiguous(loc, Device, ArgsBase, Ptr, Size, ArgType);
}
return Ret;
}
@@ -776,10 +784,10 @@
/// Internal function to pass data to/from the target.
// async_info_ptr is currently unused, added here so targetDataUpdate has the
// same signature as targetDataBegin and targetDataEnd.
-int targetDataUpdate(DeviceTy &Device, int32_t ArgNum, void **ArgsBase,
- void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
- map_var_info_t *ArgNames, void **ArgMappers,
- __tgt_async_info *AsyncInfoPtr) {
+int targetDataUpdate(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
+ void **ArgsBase, void **Args, int64_t *ArgSizes,
+ int64_t *ArgTypes, map_var_info_t *ArgNames,
+ void **ArgMappers, __tgt_async_info *AsyncInfoPtr) {
// process each input.
for (int32_t I = 0; I < ArgNum; ++I) {
if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) ||
@@ -793,7 +801,7 @@
DP("Calling targetDataMapper for the %dth argument\n", I);
map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I];
- int Ret = targetDataMapper(Device, ArgsBase[I], Args[I], ArgSizes[I],
+ int Ret = targetDataMapper(loc, Device, ArgsBase[I], Args[I], ArgSizes[I],
ArgTypes[I], ArgName, ArgMappers[I],
targetDataUpdate);
@@ -816,10 +824,10 @@
NonContig[DimSize - 1].Count * NonContig[DimSize - 1].Stride;
int32_t MergedDim = getNonContigMergedDimension(NonContig, DimSize);
Ret = targetDataNonContiguous(
- Device, ArgsBase[I], NonContig, Size, ArgTypes[I],
+ loc, Device, ArgsBase[I], NonContig, Size, ArgTypes[I],
/*current_dim=*/0, DimSize - MergedDim, /*offset=*/0);
} else {
- Ret = targetDataContiguous(Device, ArgsBase[I], Args[I], ArgSizes[I],
+ Ret = targetDataContiguous(loc, Device, ArgsBase[I], Args[I], ArgSizes[I],
ArgTypes[I]);
}
if (Ret == OFFLOAD_FAIL)
@@ -1063,16 +1071,18 @@
/// Process data before launching the kernel, including calling targetDataBegin
/// to map and transfer data to target device, transferring (first-)private
/// variables.
-int processDataBefore(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
- void **ArgBases, void **Args, int64_t *ArgSizes,
- int64_t *ArgTypes, map_var_info_t *ArgNames,
- void **ArgMappers, std::vector<void *> &TgtArgs,
- std::vector<ptrdiff_t> &TgtOffsets,
- PrivateArgumentManagerTy &PrivateArgumentManager,
- __tgt_async_info *AsyncInfo) {
+static int processDataBefore(ident_t *loc, int64_t DeviceId, void *HostPtr,
+ int32_t ArgNum, void **ArgBases, void **Args,
+ int64_t *ArgSizes, int64_t *ArgTypes,
+ map_var_info_t *ArgNames, void **ArgMappers,
+ std::vector<void *> &TgtArgs,
+ std::vector<ptrdiff_t> &TgtOffsets,
+ PrivateArgumentManagerTy &PrivateArgumentManager,
+ __tgt_async_info *AsyncInfo) {
+ TIMESCOPE_WITH_NAME_AND_IDENT("mappingBeforeTargetRegion", loc);
DeviceTy &Device = PM->Devices[DeviceId];
- int Ret = targetDataBegin(Device, ArgNum, ArgBases, Args, ArgSizes, ArgTypes,
- ArgNames, ArgMappers, AsyncInfo);
+ int Ret = targetDataBegin(loc, Device, ArgNum, ArgBases, Args, ArgSizes,
+ ArgTypes, ArgNames, ArgMappers, AsyncInfo);
if (Ret != OFFLOAD_SUCCESS) {
REPORT("Call to targetDataBegin failed, abort target.\n");
return OFFLOAD_FAIL;
@@ -1184,17 +1194,18 @@
/// Process data after launching the kernel, including transferring data back to
/// host if needed and deallocating target memory of (first-)private variables.
-int processDataAfter(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
- void **ArgBases, void **Args, int64_t *ArgSizes,
- int64_t *ArgTypes, map_var_info_t *ArgNames,
- void **ArgMappers,
- PrivateArgumentManagerTy &PrivateArgumentManager,
- __tgt_async_info *AsyncInfo) {
+static int processDataAfter(ident_t *loc, int64_t DeviceId, void *HostPtr,
+ int32_t ArgNum, void **ArgBases, void **Args,
+ int64_t *ArgSizes, int64_t *ArgTypes,
+ map_var_info_t *ArgNames, void **ArgMappers,
+ PrivateArgumentManagerTy &PrivateArgumentManager,
+ __tgt_async_info *AsyncInfo) {
+ TIMESCOPE_WITH_NAME_AND_IDENT("mappingAfterTargetRegion", loc);
DeviceTy &Device = PM->Devices[DeviceId];
// Move data from device.
- int Ret = targetDataEnd(Device, ArgNum, ArgBases, Args, ArgSizes, ArgTypes,
- ArgNames, ArgMappers, AsyncInfo);
+ int Ret = targetDataEnd(loc, Device, ArgNum, ArgBases, Args, ArgSizes,
+ ArgTypes, ArgNames, ArgMappers, AsyncInfo);
if (Ret != OFFLOAD_SUCCESS) {
REPORT("Call to targetDataEnd failed, abort target.\n");
return OFFLOAD_FAIL;
@@ -1217,8 +1228,8 @@
/// performs the same action as data_update and data_end above. This function
/// returns 0 if it was able to transfer the execution to a target and an
/// integer different from zero otherwise.
-int target(int64_t DeviceId, void *HostPtr, int32_t ArgNum, void **ArgBases,
- void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
+int target(ident_t *loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum,
+ void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
map_var_info_t *ArgNames, void **ArgMappers, int32_t TeamNum,
int32_t ThreadLimit, int IsTeamConstruct) {
DeviceTy &Device = PM->Devices[DeviceId];
@@ -1248,13 +1259,16 @@
PrivateArgumentManagerTy PrivateArgumentManager(Device, &AsyncInfo);
- // Process data, such as data mapping, before launching the kernel
- int Ret = processDataBefore(DeviceId, HostPtr, ArgNum, ArgBases, Args,
- ArgSizes, ArgTypes, ArgNames, ArgMappers, TgtArgs,
- TgtOffsets, PrivateArgumentManager, &AsyncInfo);
- if (Ret != OFFLOAD_SUCCESS) {
- REPORT("Failed to process data before launching the kernel.\n");
- return OFFLOAD_FAIL;
+ int Ret;
+ if (ArgNum) {
+ // Process data, such as data mapping, before launching the kernel
+ Ret = processDataBefore(loc, DeviceId, HostPtr, ArgNum, ArgBases, Args,
+ ArgSizes, ArgTypes, ArgNames, ArgMappers, TgtArgs,
+ TgtOffsets, PrivateArgumentManager, &AsyncInfo);
+ if (Ret != OFFLOAD_SUCCESS) {
+ REPORT("Failed to process data before launching the kernel.\n");
+ return OFFLOAD_FAIL;
+ }
}
// Get loop trip count
@@ -1265,27 +1279,33 @@
DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n",
TargetTable->EntriesBegin[TM->Index].name, DPxPTR(TgtEntryPtr), TM->Index);
- if (IsTeamConstruct)
- Ret = Device.runTeamRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0],
- TgtArgs.size(), TeamNum, ThreadLimit,
- LoopTripCount, &AsyncInfo);
- else
- Ret = Device.runRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0],
- TgtArgs.size(), &AsyncInfo);
+ {
+ TIMESCOPE_WITH_NAME_AND_IDENT(
+ IsTeamConstruct ? "runTargetTeamRegion" : "runTargetRegion", loc);
+ if (IsTeamConstruct)
+ Ret = Device.runTeamRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0],
+ TgtArgs.size(), TeamNum, ThreadLimit,
+ LoopTripCount, &AsyncInfo);
+ else
+ Ret = Device.runRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0],
+ TgtArgs.size(), &AsyncInfo);
+ }
if (Ret != OFFLOAD_SUCCESS) {
REPORT("Executing target region abort target.\n");
return OFFLOAD_FAIL;
}
- // Transfer data back and deallocate target memory for (first-)private
- // variables
- Ret = processDataAfter(DeviceId, HostPtr, ArgNum, ArgBases, Args, ArgSizes,
- ArgTypes, ArgNames, ArgMappers, PrivateArgumentManager,
- &AsyncInfo);
- if (Ret != OFFLOAD_SUCCESS) {
- REPORT("Failed to process data after launching the kernel.\n");
- return OFFLOAD_FAIL;
+ if (ArgNum) {
+ // Transfer data back and deallocate target memory for (first-)private
+ // variables
+ Ret = processDataAfter(loc, DeviceId, HostPtr, ArgNum, ArgBases, Args,
+ ArgSizes, ArgTypes, ArgNames, ArgMappers,
+ PrivateArgumentManager, &AsyncInfo);
+ if (Ret != OFFLOAD_SUCCESS) {
+ REPORT("Failed to process data after launching the kernel.\n");
+ return OFFLOAD_FAIL;
+ }
}
return OFFLOAD_SUCCESS;
diff --git a/libomptarget/src/private.h b/libomptarget/src/private.h
index 6a44dd9..340ed23 100644
--- a/libomptarget/src/private.h
+++ b/libomptarget/src/private.h
@@ -19,22 +19,24 @@
#include <cstdint>
-extern int targetDataBegin(DeviceTy &Device, int32_t arg_num, void **args_base,
- void **args, int64_t *arg_sizes, int64_t *arg_types,
- map_var_info_t *arg_names, void **arg_mappers,
+extern int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
+ void **args_base, void **args, int64_t *arg_sizes,
+ int64_t *arg_types, map_var_info_t *arg_names,
+ void **arg_mappers,
__tgt_async_info *async_info_ptr);
-extern int targetDataEnd(DeviceTy &Device, int32_t ArgNum, void **ArgBases,
- void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
- map_var_info_t *arg_names, void **ArgMappers,
- __tgt_async_info *AsyncInfo);
+extern int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
+ void **ArgBases, void **Args, int64_t *ArgSizes,
+ int64_t *ArgTypes, map_var_info_t *arg_names,
+ void **ArgMappers, __tgt_async_info *AsyncInfo);
-extern int targetDataUpdate(DeviceTy &Device, int32_t arg_num, void **args_base,
- void **args, int64_t *arg_sizes, int64_t *arg_types,
- map_var_info_t *arg_names, void **arg_mappers,
+extern int targetDataUpdate(ident_t *loc, DeviceTy &Device, int32_t arg_num,
+ void **args_base, void **args, int64_t *arg_sizes,
+ int64_t *arg_types, map_var_info_t *arg_names,
+ void **arg_mappers,
__tgt_async_info *async_info_ptr = nullptr);
-extern int target(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
+extern int target(ident_t *loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum,
void **ArgBases, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *arg_names,
void **ArgMappers, int32_t TeamNum, int32_t ThreadLimit,
@@ -73,9 +75,10 @@
// Function pointer type for target_data_* functions (targetDataBegin,
// targetDataEnd and targetDataUpdate).
-typedef int (*TargetDataFuncPtrTy)(DeviceTy &, int32_t, void **, void **,
- int64_t *, int64_t *, map_var_info_t *,
- void **, __tgt_async_info *);
+typedef int (*TargetDataFuncPtrTy)(ident_t *, DeviceTy &, int32_t, void **,
+ void **, int64_t *, int64_t *,
+ map_var_info_t *, void **,
+ __tgt_async_info *);
// Implemented in libomp, they are called from within __tgt_* functions.
#ifdef __cplusplus
@@ -157,8 +160,16 @@
#ifdef OMPTARGET_PROFILE_ENABLED
#include "llvm/Support/TimeProfiler.h"
#define TIMESCOPE() llvm::TimeTraceScope TimeScope(__FUNCTION__)
+#define TIMESCOPE_WITH_IDENT(IDENT) \
+ SourceInfo SI(IDENT); \
+ llvm::TimeTraceScope TimeScope(__FUNCTION__, SI.getProfileLocation())
+#define TIMESCOPE_WITH_NAME_AND_IDENT(NAME, IDENT) \
+ SourceInfo SI(IDENT); \
+ llvm::TimeTraceScope TimeScope(NAME, SI.getProfileLocation())
#else
#define TIMESCOPE()
+#define TIMESCOPE_WITH_IDENT(IDENT)
+#define TIMESCOPE_WITH_NAME_AND_IDENT(NAME IDENT)
#endif
#endif
diff --git a/libomptarget/src/rtl.cpp b/libomptarget/src/rtl.cpp
index 443359a..4a2a6d9 100644
--- a/libomptarget/src/rtl.cpp
+++ b/libomptarget/src/rtl.cpp
@@ -396,8 +396,9 @@
Device.PendingGlobalsMtx.lock();
if (Device.PendingCtorsDtors[desc].PendingCtors.empty()) {
for (auto &dtor : Device.PendingCtorsDtors[desc].PendingDtors) {
- int rc = target(Device.DeviceID, dtor, 0, nullptr, nullptr, nullptr,
- nullptr, nullptr, nullptr, 1, 1, true /*team*/);
+ int rc =
+ target(nullptr, Device.DeviceID, dtor, 0, nullptr, nullptr,
+ nullptr, nullptr, nullptr, nullptr, 1, 1, true /*team*/);
if (rc != OFFLOAD_SUCCESS) {
DP("Running destructor " DPxMOD " failed.\n", DPxPTR(dtor));
}