[OpenMP][FIX] Introduce and use a simple generic-mode barrier
Before we had aligned barriers the `__kmpc_barrier_simple_spmd` was
OK to be used in the custom state machine. Now that SPMD barriers are
assumed to be aligned we need to use a "generic" barrier in places
that are not aligned.
Reviewed By: tianshilei1992
Differential Revision: https://reviews.llvm.org/D112893
GitOrigin-RevId: 73720c8059cfcce12f0cc5b7e6ff2e4b635a9a61
diff --git a/libomptarget/DeviceRTL/include/Interface.h b/libomptarget/DeviceRTL/include/Interface.h
index da04e14..302e3eb 100644
--- a/libomptarget/DeviceRTL/include/Interface.h
+++ b/libomptarget/DeviceRTL/include/Interface.h
@@ -249,6 +249,8 @@
void __kmpc_barrier_simple_spmd(IdentTy *Loc_ref, int32_t TId);
+void __kmpc_barrier_simple_generic(IdentTy *Loc_ref, int32_t TId);
+
int32_t __kmpc_master(IdentTy *Loc, int32_t TId);
void __kmpc_end_master(IdentTy *Loc, int32_t TId);
diff --git a/libomptarget/DeviceRTL/src/Synchronization.cpp b/libomptarget/DeviceRTL/src/Synchronization.cpp
index e219c75..6b4bab0 100644
--- a/libomptarget/DeviceRTL/src/Synchronization.cpp
+++ b/libomptarget/DeviceRTL/src/Synchronization.cpp
@@ -348,6 +348,12 @@
synchronize::threadsAligned();
}
+__attribute__((noinline)) void __kmpc_barrier_simple_generic(IdentTy *Loc,
+ int32_t TId) {
+ FunctionTracingRAII();
+ synchronize::threads();
+}
+
int32_t __kmpc_master(IdentTy *Loc, int32_t TId) {
FunctionTracingRAII();
return omp_get_team_num() == 0;
diff --git a/libomptarget/DeviceRTL/src/Utils.cpp b/libomptarget/DeviceRTL/src/Utils.cpp
index 8fcb96b..df57497 100644
--- a/libomptarget/DeviceRTL/src/Utils.cpp
+++ b/libomptarget/DeviceRTL/src/Utils.cpp
@@ -25,6 +25,7 @@
__kmpc_get_hardware_thread_id_in_block();
__kmpc_get_hardware_num_threads_in_block();
__kmpc_barrier_simple_spmd(nullptr, 0);
+ __kmpc_barrier_simple_generic(nullptr, 0);
}
} // namespace _OMP
diff --git a/libomptarget/deviceRTLs/common/src/sync.cu b/libomptarget/deviceRTLs/common/src/sync.cu
index 8711cd2..823c9fc 100644
--- a/libomptarget/deviceRTLs/common/src/sync.cu
+++ b/libomptarget/deviceRTLs/common/src/sync.cu
@@ -78,6 +78,9 @@
__kmpc_impl_syncthreads();
PRINT0(LD_SYNC, "completed kmpc_barrier_simple_spmd\n");
}
+EXTERN void __kmpc_barrier_simple_generic(kmp_Ident *loc_ref, int32_t tid) {
+ return __kmpc_barrier_simple_spmd(loc_ref, tid);
+}
////////////////////////////////////////////////////////////////////////////////
// KMP MASTER
diff --git a/libomptarget/deviceRTLs/interface.h b/libomptarget/deviceRTLs/interface.h
index cb193c9..00aa07c 100644
--- a/libomptarget/deviceRTLs/interface.h
+++ b/libomptarget/deviceRTLs/interface.h
@@ -380,6 +380,7 @@
// sync barrier
EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid);
EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid);
+EXTERN void __kmpc_barrier_simple_generic(kmp_Ident *loc_ref, int32_t tid);
EXTERN int32_t __kmpc_cancel_barrier(kmp_Ident *loc, int32_t global_tid);
// single