[OpenMP] Clean up load balancing dynamic mode
This patch cleans up the bookkeeping code for the load balancing dynamic mode.
When a thread is moved to or from the thread pool, the th_active_in_pool flag
and the __kmp_thread_pool_active_nth global counter are both updated. This
removes the need for the corrective code in the main wait loop. Another global
counter, __kmp_thread_pool_nth, was removed completely, as it was only used for
debugging, but was not under KMP_DEBUG.
Patch by Terry Wilmarth
Differential Revision: https://reviews.llvm.org/D59508
git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@357927 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/runtime/src/exports_so.txt b/runtime/src/exports_so.txt
index 4926697..dd0fa59 100644
--- a/runtime/src/exports_so.txt
+++ b/runtime/src/exports_so.txt
@@ -54,7 +54,6 @@
___kmp_allocate;
___kmp_free;
__kmp_thread_pool;
- __kmp_thread_pool_nth;
__kmp_reset_stats;
diff --git a/runtime/src/kmp.h b/runtime/src/kmp.h
index ae7cfa0..d152b43 100644
--- a/runtime/src/kmp.h
+++ b/runtime/src/kmp.h
@@ -2951,7 +2951,6 @@
/* total number of threads reachable from some root thread including all root
threads, and those in the thread pool */
extern volatile int __kmp_all_nth;
-extern int __kmp_thread_pool_nth;
extern std::atomic<int> __kmp_thread_pool_active_nth;
extern kmp_root_t **__kmp_root; /* root of thread hierarchy */
@@ -3340,6 +3339,7 @@
extern void __kmp_register_atfork(void);
#endif
extern void __kmp_suspend_initialize(void);
+extern void __kmp_suspend_initialize_thread(kmp_info_t *th);
extern void __kmp_suspend_uninitialize_thread(kmp_info_t *th);
extern kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
diff --git a/runtime/src/kmp_global.cpp b/runtime/src/kmp_global.cpp
index 1805ade..dfd598f 100644
--- a/runtime/src/kmp_global.cpp
+++ b/runtime/src/kmp_global.cpp
@@ -414,7 +414,6 @@
KMP_ALIGN_CACHE
volatile int __kmp_nth = 0;
volatile int __kmp_all_nth = 0;
-int __kmp_thread_pool_nth = 0;
volatile kmp_info_t *__kmp_thread_pool = NULL;
volatile kmp_team_t *__kmp_team_pool = NULL;
diff --git a/runtime/src/kmp_runtime.cpp b/runtime/src/kmp_runtime.cpp
index 6b6ebee..6672631 100644
--- a/runtime/src/kmp_runtime.cpp
+++ b/runtime/src/kmp_runtime.cpp
@@ -4264,15 +4264,24 @@
__kmp_thread_pool_insert_pt = NULL;
}
TCW_4(new_thr->th.th_in_pool, FALSE);
- // Don't touch th_active_in_pool or th_active.
- // The worker thread adjusts those flags as it sleeps/awakens.
- __kmp_thread_pool_nth--;
+ __kmp_suspend_initialize_thread(new_thr);
+ __kmp_lock_suspend_mx(new_thr);
+ if (new_thr->th.th_active_in_pool == TRUE) {
+ KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
+ KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
+ new_thr->th.th_active_in_pool = FALSE;
+ }
+#if KMP_DEBUG
+ else {
+ KMP_DEBUG_ASSERT(new_thr->th.th_active == FALSE);
+ }
+#endif
+ __kmp_unlock_suspend_mx(new_thr);
KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
__kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
KMP_ASSERT(!new_thr->th.th_team);
KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
- KMP_DEBUG_ASSERT(__kmp_thread_pool_nth >= 0);
/* setup the thread structure */
__kmp_initialize_info(new_thr, team, new_tid,
@@ -5705,7 +5714,18 @@
(this_th->th.th_info.ds.ds_gtid <
this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
TCW_4(this_th->th.th_in_pool, TRUE);
- __kmp_thread_pool_nth++;
+ __kmp_suspend_initialize_thread(this_th);
+ __kmp_lock_suspend_mx(this_th);
+ if (this_th->th.th_active == TRUE) {
+ KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
+ this_th->th.th_active_in_pool = TRUE;
+ }
+#if KMP_DEBUG
+ else {
+ KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
+ }
+#endif
+ __kmp_unlock_suspend_mx(this_th);
TCW_4(__kmp_nth, __kmp_nth - 1);
@@ -5954,10 +5974,6 @@
KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
}
-
- // Decrement # of [worker] threads in the pool.
- KMP_DEBUG_ASSERT(__kmp_thread_pool_nth > 0);
- --__kmp_thread_pool_nth;
}
__kmp_free_implicit_task(thread);
@@ -6099,6 +6115,8 @@
KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
thread->th.th_next_pool = NULL;
thread->th.th_in_pool = FALSE;
+ thread->th.th_active_in_pool = FALSE;
+ KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
__kmp_reap_thread(thread, 0);
}
__kmp_thread_pool_insert_pt = NULL;
diff --git a/runtime/src/kmp_wait_release.h b/runtime/src/kmp_wait_release.h
index 048e74e..3a4884c 100644
--- a/runtime/src/kmp_wait_release.h
+++ b/runtime/src/kmp_wait_release.h
@@ -326,7 +326,6 @@
// Main wait spin loop
while (flag->notdone_check()) {
- int in_pool;
kmp_task_team_t *task_team = NULL;
if (__kmp_tasking_mode != tskm_immediate_exec) {
task_team = this_thr->th.th_task_team;
@@ -371,27 +370,6 @@
// KMP_LIBRARY=throughput), then yield
KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
- // Check if this thread was transferred from a team
- // to the thread pool (or vice-versa) while spinning.
- in_pool = !!TCR_4(this_thr->th.th_in_pool);
- if (in_pool != !!this_thr->th.th_active_in_pool) {
- if (in_pool) { // Recently transferred from team to pool
- KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
- this_thr->th.th_active_in_pool = TRUE;
- /* Here, we cannot assert that:
- KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <=
- __kmp_thread_pool_nth);
- __kmp_thread_pool_nth is inc/dec'd by the master thread while the
- fork/join lock is held, whereas __kmp_thread_pool_active_nth is
- inc/dec'd asynchronously by the workers. The two can get out of sync
- for brief periods of time. */
- } else { // Recently transferred from pool to team
- KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
- KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
- this_thr->th.th_active_in_pool = FALSE;
- }
- }
-
#if KMP_STATS_ENABLED
// Check if thread has been signalled to idle state
// This indicates that the logical "join-barrier" has finished
diff --git a/runtime/src/z_Linux_util.cpp b/runtime/src/z_Linux_util.cpp
index df1c47b..9e08db3 100644
--- a/runtime/src/z_Linux_util.cpp
+++ b/runtime/src/z_Linux_util.cpp
@@ -1352,7 +1352,7 @@
KMP_CHECK_SYSFAIL("pthread_condattr_init", status);
}
-static void __kmp_suspend_initialize_thread(kmp_info_t *th) {
+void __kmp_suspend_initialize_thread(kmp_info_t *th) {
ANNOTATE_HAPPENS_AFTER(&th->th.th_suspend_init_count);
if (th->th.th_suspend_init_count <= __kmp_fork_count) {
/* this means we haven't initialized the suspension pthread objects for this
diff --git a/runtime/src/z_Windows_NT_util.cpp b/runtime/src/z_Windows_NT_util.cpp
index 33b0181..97d0e79 100644
--- a/runtime/src/z_Windows_NT_util.cpp
+++ b/runtime/src/z_Windows_NT_util.cpp
@@ -309,7 +309,7 @@
void __kmp_suspend_initialize(void) { /* do nothing */
}
-static void __kmp_suspend_initialize_thread(kmp_info_t *th) {
+void __kmp_suspend_initialize_thread(kmp_info_t *th) {
if (!TCR_4(th->th.th_suspend_init)) {
/* this means we haven't initialized the suspension pthread objects for this
thread in this instance of the process */