[libomptarget][nfc] Update remaining uint32 to use lanemask_t

Summary:
[libomptarget][nfc] Update remaining uint32 to use lanemask_t

Update a few functions in the API to use lanemask_t instead of i32. NFC for
nvptx. Also update the ActiveThreads type in DataSharingStateTy.
This removes a lot of #ifdef from the downsteam amdgcn implementation.

Reviewers: ABataev, jdoerfert, grokos, ronlieb, RaviNarayanaswamy

Subscribers: openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D68513

git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@373806 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
index bd4cfec..5e936b0 100644
--- a/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
@@ -96,7 +96,7 @@
 
 EXTERN void *__kmpc_data_sharing_environment_begin(
     __kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack,
-    void **SavedSharedFrame, int32_t *SavedActiveThreads,
+    void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads,
     size_t SharingDataSize, size_t SharingDefaultDataSize,
     int16_t IsOMPRuntimeInitialized) {
 
@@ -117,7 +117,7 @@
   __kmpc_data_sharing_slot *&SlotP = DataSharingState.SlotPtr[WID];
   void *&StackP = DataSharingState.StackPtr[WID];
   void * volatile &FrameP = DataSharingState.FramePtr[WID];
-  int32_t &ActiveT = DataSharingState.ActiveThreads[WID];
+  __kmpc_impl_lanemask_t &ActiveT = DataSharingState.ActiveThreads[WID];
 
   DSPRINT0(DSFLAG, "Save current slot/stack values.\n");
   // Save the current values.
@@ -225,7 +225,7 @@
 
 EXTERN void __kmpc_data_sharing_environment_end(
     __kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack,
-    void **SavedSharedFrame, int32_t *SavedActiveThreads,
+    void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads,
     int32_t IsEntryPoint) {
 
   DSPRINT0(DSFLAG, "Entering __kmpc_data_sharing_environment_end\n");
@@ -260,7 +260,7 @@
   // assume that threads will converge right after the call site that started
   // the environment.
   if (IsWarpMasterActiveThread()) {
-    int32_t &ActiveT = DataSharingState.ActiveThreads[WID];
+    __kmpc_impl_lanemask_t &ActiveT = DataSharingState.ActiveThreads[WID];
 
     DSPRINT0(DSFLAG, "Before restoring the stack\n");
     // Zero the bits in the mask. If it is still different from zero, then we
diff --git a/libomptarget/deviceRTLs/nvptx/src/interface.h b/libomptarget/deviceRTLs/nvptx/src/interface.h
index ab57715..4a84922 100644
--- a/libomptarget/deviceRTLs/nvptx/src/interface.h
+++ b/libomptarget/deviceRTLs/nvptx/src/interface.h
@@ -19,6 +19,7 @@
 #define _INTERFACES_H_
 
 #include "option.h"
+#include "target_impl.h"
 
 ////////////////////////////////////////////////////////////////////////////////
 // OpenMP interface
@@ -422,9 +423,9 @@
 EXTERN void __kmpc_flush(kmp_Ident *loc);
 
 // vote
-EXTERN int32_t __kmpc_warp_active_thread_mask();
+EXTERN __kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask();
 // syncwarp
-EXTERN void __kmpc_syncwarp(int32_t);
+EXTERN void __kmpc_syncwarp(__kmpc_impl_lanemask_t);
 
 // tasks
 EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(kmp_Ident *loc,
@@ -475,11 +476,13 @@
 EXTERN bool __kmpc_kernel_parallel(void **WorkFn,
                                    int16_t IsOMPRuntimeInitialized);
 EXTERN void __kmpc_kernel_end_parallel();
-EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, uint32_t Mask,
+EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer,
+                                              __kmpc_impl_lanemask_t Mask,
                                               bool *IsFinal,
                                               int32_t *LaneSource);
 EXTERN void __kmpc_kernel_end_convergent_parallel(void *buffer);
-EXTERN bool __kmpc_kernel_convergent_simd(void *buffer, uint32_t Mask,
+EXTERN bool __kmpc_kernel_convergent_simd(void *buffer,
+                                          __kmpc_impl_lanemask_t Mask,
                                           bool *IsFinal, int32_t *LaneSource,
                                           int32_t *LaneId, int32_t *NumLanes);
 EXTERN void __kmpc_kernel_end_convergent_simd(void *buffer);
@@ -510,12 +513,13 @@
                                            size_t InitialDataSize);
 EXTERN void *__kmpc_data_sharing_environment_begin(
     __kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack,
-    void **SavedSharedFrame, int32_t *SavedActiveThreads,
+    void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads,
     size_t SharingDataSize, size_t SharingDefaultDataSize,
     int16_t IsOMPRuntimeInitialized);
 EXTERN void __kmpc_data_sharing_environment_end(
     __kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack,
-    void **SavedSharedFrame, int32_t *SavedActiveThreads, int32_t IsEntryPoint);
+    void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads,
+    int32_t IsEntryPoint);
 
 EXTERN void *
 __kmpc_get_data_sharing_environment_frame(int32_t SourceThreadID,
diff --git a/libomptarget/deviceRTLs/nvptx/src/loop.cu b/libomptarget/deviceRTLs/nvptx/src/loop.cu
index f9a3015..ee37846 100644
--- a/libomptarget/deviceRTLs/nvptx/src/loop.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/loop.cu
@@ -380,7 +380,8 @@
   ////////////////////////////////////////////////////////////////////////////////
   // Support for dispatch next
 
-  INLINE static int64_t Shuffle(unsigned active, int64_t val, int leader) {
+  INLINE static uint64_t Shuffle(__kmpc_impl_lanemask_t active, int64_t val,
+                                 int leader) {
     uint32_t lo, hi;
     __kmpc_impl_unpack(val, lo, hi);
     hi = __kmpc_impl_shfl_sync(active, hi, leader);
diff --git a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
index 2299d24..70e6c28 100644
--- a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@@ -107,7 +107,7 @@
   __kmpc_data_sharing_slot *SlotPtr[DS_Max_Warp_Number];
   void *StackPtr[DS_Max_Warp_Number];
   void * volatile FramePtr[DS_Max_Warp_Number];
-  int32_t ActiveThreads[DS_Max_Warp_Number];
+  __kmpc_impl_lanemask_t ActiveThreads[DS_Max_Warp_Number];
 };
 // Additional worker slot type which is initialized with the default worker slot
 // size of 4*32 bytes.
diff --git a/libomptarget/deviceRTLs/nvptx/src/parallel.cu b/libomptarget/deviceRTLs/nvptx/src/parallel.cu
index 24a235d..016ded8 100644
--- a/libomptarget/deviceRTLs/nvptx/src/parallel.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/parallel.cu
@@ -44,13 +44,14 @@
 ////////////////////////////////////////////////////////////////////////////////
 // support for convergent simd (team of threads in a warp only)
 ////////////////////////////////////////////////////////////////////////////////
-EXTERN bool __kmpc_kernel_convergent_simd(void *buffer, uint32_t Mask,
+EXTERN bool __kmpc_kernel_convergent_simd(void *buffer,
+                                          __kmpc_impl_lanemask_t Mask,
                                           bool *IsFinal, int32_t *LaneSource,
                                           int32_t *LaneId, int32_t *NumLanes) {
   PRINT0(LD_IO, "call to __kmpc_kernel_convergent_simd\n");
-  uint32_t ConvergentMask = Mask;
+  __kmpc_impl_lanemask_t ConvergentMask = Mask;
   int32_t ConvergentSize = __kmpc_impl_popc(ConvergentMask);
-  uint32_t WorkRemaining = ConvergentMask >> (*LaneSource + 1);
+  __kmpc_impl_lanemask_t WorkRemaining = ConvergentMask >> (*LaneSource + 1);
   *LaneSource += __kmpc_impl_ffs(WorkRemaining);
   *IsFinal = __kmpc_impl_popc(WorkRemaining) == 1;
   __kmpc_impl_lanemask_t lanemask_lt = __kmpc_impl_lanemask_lt();
@@ -117,13 +118,14 @@
 ////////////////////////////////////////////////////////////////////////////////
 // support for convergent parallelism (team of threads in a warp only)
 ////////////////////////////////////////////////////////////////////////////////
-EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, uint32_t Mask,
+EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer,
+                                              __kmpc_impl_lanemask_t Mask,
                                               bool *IsFinal,
                                               int32_t *LaneSource) {
   PRINT0(LD_IO, "call to __kmpc_kernel_convergent_parallel\n");
-  uint32_t ConvergentMask = Mask;
+  __kmpc_impl_lanemask_t ConvergentMask = Mask;
   int32_t ConvergentSize = __kmpc_impl_popc(ConvergentMask);
-  uint32_t WorkRemaining = ConvergentMask >> (*LaneSource + 1);
+  __kmpc_impl_lanemask_t WorkRemaining = ConvergentMask >> (*LaneSource + 1);
   *LaneSource += __kmpc_impl_ffs(WorkRemaining);
   *IsFinal = __kmpc_impl_popc(WorkRemaining) == 1;
   __kmpc_impl_lanemask_t lanemask_lt = __kmpc_impl_lanemask_lt();
diff --git a/libomptarget/deviceRTLs/nvptx/src/sync.cu b/libomptarget/deviceRTLs/nvptx/src/sync.cu
index 343293e..28a5419 100644
--- a/libomptarget/deviceRTLs/nvptx/src/sync.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/sync.cu
@@ -140,7 +140,7 @@
 // Vote
 ////////////////////////////////////////////////////////////////////////////////
 
-EXTERN int32_t __kmpc_warp_active_thread_mask() {
+EXTERN __kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask() {
   PRINT0(LD_IO, "call __kmpc_warp_active_thread_mask\n");
   return __kmpc_impl_activemask();
 }
@@ -149,7 +149,7 @@
 // Syncwarp
 ////////////////////////////////////////////////////////////////////////////////
 
-EXTERN void __kmpc_syncwarp(int32_t Mask) {
+EXTERN void __kmpc_syncwarp(__kmpc_impl_lanemask_t Mask) {
   PRINT0(LD_IO, "call __kmpc_syncwarp\n");
   __kmpc_impl_syncwarp(Mask);
 }