Revert "[clang][openmp][NFC] Remove arch-specific CGOpenMPRuntimeGPU files"

This reverts commit 81a7cad2ffc18f15b732f69d991c8398c979c5ca.

GitOrigin-RevId: ef717f385232562047acdc25ad355c3f29dfbe7d
diff --git a/libomptarget/DeviceRTL/src/Mapping.cpp b/libomptarget/DeviceRTL/src/Mapping.cpp
index 75a500f..bece294 100644
--- a/libomptarget/DeviceRTL/src/Mapping.cpp
+++ b/libomptarget/DeviceRTL/src/Mapping.cpp
@@ -277,10 +277,5 @@
   FunctionTracingRAII();
   return impl::getNumHardwareThreadsInBlock();
 }
-
-__attribute__((noinline)) uint32_t __kmpc_get_warp_size() {
-  FunctionTracingRAII();
-  return impl::getWarpSize();
-}
 }
 #pragma omp end declare target
diff --git a/libomptarget/DeviceRTL/src/Utils.cpp b/libomptarget/DeviceRTL/src/Utils.cpp
index 8cd633b..df57497 100644
--- a/libomptarget/DeviceRTL/src/Utils.cpp
+++ b/libomptarget/DeviceRTL/src/Utils.cpp
@@ -24,7 +24,6 @@
 __attribute__((used, weak, optnone)) void keepAlive() {
   __kmpc_get_hardware_thread_id_in_block();
   __kmpc_get_hardware_num_threads_in_block();
-  __kmpc_get_warp_size();
   __kmpc_barrier_simple_spmd(nullptr, 0);
   __kmpc_barrier_simple_generic(nullptr, 0);
 }
diff --git a/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip b/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
index 3747c87..2c6b888 100644
--- a/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
+++ b/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
@@ -133,11 +133,8 @@
                            __builtin_amdgcn_workgroup_size_x());
 }
 
-EXTERN unsigned __kmpc_get_warp_size() {
-  return WARPSIZE;
-}
-
 EXTERN unsigned GetWarpId() { return __kmpc_get_hardware_thread_id_in_block() / WARPSIZE; }
+EXTERN unsigned GetWarpSize() { return WARPSIZE; }
 EXTERN unsigned GetLaneId() {
   return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
 }
diff --git a/libomptarget/deviceRTLs/common/include/target/shuffle.h b/libomptarget/deviceRTLs/common/include/target/shuffle.h
index ed8d97a..5841b11 100644
--- a/libomptarget/deviceRTLs/common/include/target/shuffle.h
+++ b/libomptarget/deviceRTLs/common/include/target/shuffle.h
@@ -35,7 +35,7 @@
 ///{
 extern "C" {
 unsigned GetLaneId();
-unsigned __kmpc_get_warp_size();
+unsigned GetWarpSize();
 void __kmpc_impl_unpack(uint64_t val, uint32_t &lo, uint32_t &hi);
 uint64_t __kmpc_impl_pack(uint32_t lo, uint32_t hi);
 }
@@ -60,7 +60,7 @@
 
 inline int32_t __kmpc_impl_shfl_sync(uint64_t Mask, int32_t Var,
                                      int32_t SrcLane) {
-  int Width = __kmpc_get_warp_size();
+  int Width = GetWarpSize();
   int Self = GetLaneId();
   int Index = SrcLane + (Self & ~(Width - 1));
   return __builtin_amdgcn_ds_bpermute(Index << 2, Var);
@@ -90,7 +90,7 @@
 
 inline int32_t __kmpc_impl_shfl_down_sync(uint64_t Mask, int32_t Var,
                                           uint32_t Delta, int32_t Width) {
-  int32_t T = ((__kmpc_get_warp_size() - Width) << 8) | 0x1f;
+  int32_t T = ((GetWarpSize() - Width) << 8) | 0x1f;
   return __nvvm_shfl_sync_down_i32(Mask, Var, Delta, T);
 }
 
diff --git a/libomptarget/deviceRTLs/nvptx/src/target_impl.cu b/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
index ee58369..11f017c 100644
--- a/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
@@ -102,10 +102,10 @@
 EXTERN int __kmpc_get_hardware_num_threads_in_block() {
   return __nvvm_read_ptx_sreg_ntid_x();
 }
-EXTERN unsigned __kmpc_get_warp_size() { return WARPSIZE; }
 EXTERN unsigned GetWarpId() {
   return __kmpc_get_hardware_thread_id_in_block() / WARPSIZE;
 }
+EXTERN unsigned GetWarpSize() { return WARPSIZE; }
 EXTERN unsigned GetLaneId() {
   return __kmpc_get_hardware_thread_id_in_block() & (WARPSIZE - 1);
 }