[clang][openmp][NFC] Remove arch-specific CGOpenMPRuntimeGPU files
The existing CGOpenMPRuntimeAMDGCN and CGOpenMPRuntimeNVPTX classes are
just code bloat. By removing them, the codebase gets a bit cleaner.
Reviewed By: jdoerfert, JonChesterfield, tianshilei1992
Differential Revision: https://reviews.llvm.org/D113421
GitOrigin-RevId: 81a7cad2ffc18f15b732f69d991c8398c979c5ca
diff --git a/libomptarget/DeviceRTL/src/Mapping.cpp b/libomptarget/DeviceRTL/src/Mapping.cpp
index bece294..75a500f 100644
--- a/libomptarget/DeviceRTL/src/Mapping.cpp
+++ b/libomptarget/DeviceRTL/src/Mapping.cpp
@@ -277,5 +277,10 @@
FunctionTracingRAII();
return impl::getNumHardwareThreadsInBlock();
}
+
+__attribute__((noinline)) uint32_t __kmpc_get_warp_size() {
+ FunctionTracingRAII();
+ return impl::getWarpSize();
+}
}
#pragma omp end declare target
diff --git a/libomptarget/DeviceRTL/src/Utils.cpp b/libomptarget/DeviceRTL/src/Utils.cpp
index df57497..8cd633b 100644
--- a/libomptarget/DeviceRTL/src/Utils.cpp
+++ b/libomptarget/DeviceRTL/src/Utils.cpp
@@ -24,6 +24,7 @@
__attribute__((used, weak, optnone)) void keepAlive() {
__kmpc_get_hardware_thread_id_in_block();
__kmpc_get_hardware_num_threads_in_block();
+ __kmpc_get_warp_size();
__kmpc_barrier_simple_spmd(nullptr, 0);
__kmpc_barrier_simple_generic(nullptr, 0);
}
diff --git a/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip b/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
index 2c6b888..3747c87 100644
--- a/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
+++ b/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
@@ -133,8 +133,11 @@
__builtin_amdgcn_workgroup_size_x());
}
+EXTERN unsigned __kmpc_get_warp_size() {
+ return WARPSIZE;
+}
+
EXTERN unsigned GetWarpId() { return __kmpc_get_hardware_thread_id_in_block() / WARPSIZE; }
-EXTERN unsigned GetWarpSize() { return WARPSIZE; }
EXTERN unsigned GetLaneId() {
return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
}
diff --git a/libomptarget/deviceRTLs/common/include/target/shuffle.h b/libomptarget/deviceRTLs/common/include/target/shuffle.h
index 5841b11..ed8d97a 100644
--- a/libomptarget/deviceRTLs/common/include/target/shuffle.h
+++ b/libomptarget/deviceRTLs/common/include/target/shuffle.h
@@ -35,7 +35,7 @@
///{
extern "C" {
unsigned GetLaneId();
-unsigned GetWarpSize();
+unsigned __kmpc_get_warp_size();
void __kmpc_impl_unpack(uint64_t val, uint32_t &lo, uint32_t &hi);
uint64_t __kmpc_impl_pack(uint32_t lo, uint32_t hi);
}
@@ -60,7 +60,7 @@
inline int32_t __kmpc_impl_shfl_sync(uint64_t Mask, int32_t Var,
int32_t SrcLane) {
- int Width = GetWarpSize();
+ int Width = __kmpc_get_warp_size();
int Self = GetLaneId();
int Index = SrcLane + (Self & ~(Width - 1));
return __builtin_amdgcn_ds_bpermute(Index << 2, Var);
@@ -90,7 +90,7 @@
inline int32_t __kmpc_impl_shfl_down_sync(uint64_t Mask, int32_t Var,
uint32_t Delta, int32_t Width) {
- int32_t T = ((GetWarpSize() - Width) << 8) | 0x1f;
+ int32_t T = ((__kmpc_get_warp_size() - Width) << 8) | 0x1f;
return __nvvm_shfl_sync_down_i32(Mask, Var, Delta, T);
}
diff --git a/libomptarget/deviceRTLs/nvptx/src/target_impl.cu b/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
index 11f017c..ee58369 100644
--- a/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
@@ -102,10 +102,10 @@
EXTERN int __kmpc_get_hardware_num_threads_in_block() {
return __nvvm_read_ptx_sreg_ntid_x();
}
+EXTERN unsigned __kmpc_get_warp_size() { return WARPSIZE; }
EXTERN unsigned GetWarpId() {
return __kmpc_get_hardware_thread_id_in_block() / WARPSIZE;
}
-EXTERN unsigned GetWarpSize() { return WARPSIZE; }
EXTERN unsigned GetLaneId() {
return __kmpc_get_hardware_thread_id_in_block() & (WARPSIZE - 1);
}