[OpenMP][libomptarget] Fix master warp check
Summary: The check for the master warp must take into consideration the actual number of warps: the master warp is equal to the last active warp not necessarily WARPSIZE - 1.
Reviewers: grokos, carlo.bertolli, ABataev, caomhin
Reviewed By: grokos
Subscribers: guansong, openmp-commits
Differential Revision: https://reviews.llvm.org/D44537
git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@328146 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
index aa97c00..e0256d3 100644
--- a/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
@@ -88,7 +88,7 @@
omptarget_nvptx_TeamDescr *teamDescr =
&omptarget_nvptx_threadPrivateContext->TeamContext();
- __kmpc_data_sharing_slot *RootS = teamDescr->RootS(WID);
+ __kmpc_data_sharing_slot *RootS = teamDescr->RootS(WID, IsMasterThread());
DataSharingState.SlotPtr[WID] = RootS;
DataSharingState.StackPtr[WID] = (void *)&RootS->Data[0];
@@ -337,17 +337,27 @@
// This function initializes the stack pointer with the pointer to the
// statically allocated shared memory slots. The size of a shared memory
// slot is pre-determined to be 256 bytes.
- unsigned WID = getWarpId();
- omptarget_nvptx_TeamDescr *teamDescr =
- &omptarget_nvptx_threadPrivateContext->TeamContext();
- __kmpc_data_sharing_slot *RootS = teamDescr->RootS(WID);
- DataSharingState.SlotPtr[WID] = RootS;
- DataSharingState.TailPtr[WID] = RootS;
- DataSharingState.StackPtr[WID] = (void *)&RootS->Data[0];
+ // Initialize the data sharing structures. This section should only be
+ // executed by the warp active master threads.
+ if (IsWarpMasterActiveThread()) {
+ unsigned WID = getWarpId();
+ omptarget_nvptx_TeamDescr *teamDescr =
+ &omptarget_nvptx_threadPrivateContext->TeamContext();
+ __kmpc_data_sharing_slot *RootS = teamDescr->RootS(WID, IsMasterThread());
- // We initialize the list of references to arguments here.
- omptarget_nvptx_globalArgs.Init();
+ DataSharingState.SlotPtr[WID] = RootS;
+ DataSharingState.TailPtr[WID] = RootS;
+ DataSharingState.StackPtr[WID] = (void *)&RootS->Data[0];
+ }
+
+ // Currently we only support the sharing of variables between master and
+ // workers. The list of references to shared variables exists only for
+ // the master thread.
+ if (IsMasterThread()) {
+ // Initialize the list of references to arguments.
+ omptarget_nvptx_globalArgs.Init();
+ }
}
// Called at the time of the kernel initialization. This is used to initilize
diff --git a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
index 5c5c88b..4a86104 100644
--- a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
@@ -168,7 +168,8 @@
if (RequiresDataSharing && threadId % WARPSIZE == 0) {
// Warp master innitializes data sharing environment.
unsigned WID = threadId / WARPSIZE;
- __kmpc_data_sharing_slot *RootS = currTeamDescr.RootS(WID);
+ __kmpc_data_sharing_slot *RootS = currTeamDescr.RootS(
+ WID, WID == WARPSIZE - 1);
DataSharingState.SlotPtr[WID] = RootS;
DataSharingState.StackPtr[WID] = (void *)&RootS->Data[0];
}
diff --git a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
index 8f4f1cd..1902861 100644
--- a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@@ -259,10 +259,10 @@
// init
INLINE void InitTeamDescr();
- INLINE __kmpc_data_sharing_slot *RootS(int wid) {
+ INLINE __kmpc_data_sharing_slot *RootS(int wid, bool IsMasterThread) {
// If this is invoked by the master thread of the master warp then intialize
// it with a smaller slot.
- if (wid == WARPSIZE - 1) {
+ if (IsMasterThread) {
// Initialize the pointer to the end of the slot given the size of the
// data section. DataEnd is non-inclusive.
master_rootS[0].DataEnd = &master_rootS[0].Data[0] + DS_Slot_Size;