blob: d7ef57c608149e7729fc0fc4457a67299131b103 [file] [log] [blame]
Jim Cownie5f037e12014-10-07 16:25:50 +00001/*
2 * kmp_barrier.cpp
Jim Cownie5f037e12014-10-07 16:25:50 +00003 */
4
Jim Cownie5f037e12014-10-07 16:25:50 +00005//===----------------------------------------------------------------------===//
6//
Chandler Carruthb7aa2c72019-01-19 10:56:40 +00007// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Jim Cownie5f037e12014-10-07 16:25:50 +000010//
11//===----------------------------------------------------------------------===//
12
Jim Cownie5f037e12014-10-07 16:25:50 +000013#include "kmp_wait_release.h"
Terry Wilmarth70439502021-07-15 10:28:47 -050014#include "kmp_barrier.h"
Jim Cownie5f037e12014-10-07 16:25:50 +000015#include "kmp_itt.h"
Jonathan Peytonea766a22015-10-08 18:23:38 +000016#include "kmp_os.h"
Jonathan Peytona770c6d2017-05-12 18:01:32 +000017#include "kmp_stats.h"
Joachim Protze6caf9352017-11-01 10:08:30 +000018#include "ompt-specific.h"
Terry Wilmarth70439502021-07-15 10:28:47 -050019// for distributed barrier
20#include "kmp_affinity.h"
Jonathan Peytonea766a22015-10-08 18:23:38 +000021
Jim Cownie5f037e12014-10-07 16:25:50 +000022#if KMP_MIC
23#include <immintrin.h>
24#define USE_NGO_STORES 1
25#endif // KMP_MIC
26
27#if KMP_MIC && USE_NGO_STORES
28// ICV copying
Jonathan Peytona770c6d2017-05-12 18:01:32 +000029#define ngo_load(src) __m512d Vt = _mm512_load_pd((void *)(src))
Jim Cownie5f037e12014-10-07 16:25:50 +000030#define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt)
Jonathan Peytona770c6d2017-05-12 18:01:32 +000031#define ngo_store_go(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt)
32#define ngo_sync() __asm__ volatile("lock; addl $0,0(%%rsp)" ::: "memory")
Jim Cownie5f037e12014-10-07 16:25:50 +000033#else
Jonathan Peytona770c6d2017-05-12 18:01:32 +000034#define ngo_load(src) ((void)0)
Jim Cownie5f037e12014-10-07 16:25:50 +000035#define ngo_store_icvs(dst, src) copy_icvs((dst), (src))
Jonathan Peytona770c6d2017-05-12 18:01:32 +000036#define ngo_store_go(dst, src) KMP_MEMCPY((dst), (src), CACHE_LINE)
37#define ngo_sync() ((void)0)
Jim Cownie5f037e12014-10-07 16:25:50 +000038#endif /* KMP_MIC && USE_NGO_STORES */
39
40void __kmp_print_structure(void); // Forward declaration
41
42// ---------------------------- Barrier Algorithms ----------------------------
Terry Wilmarth70439502021-07-15 10:28:47 -050043// Distributed barrier
44
45// Compute how many threads to have polling each cache-line.
46// We want to limit the number of writes to IDEAL_GO_RESOLUTION.
47void distributedBarrier::computeVarsForN(size_t n) {
48 int nsockets = 1;
49 if (__kmp_topology) {
50 int socket_level = __kmp_topology->get_level(KMP_HW_SOCKET);
51 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
52 int ncores_per_socket =
53 __kmp_topology->calculate_ratio(core_level, socket_level);
54 nsockets = __kmp_topology->get_count(socket_level);
55
56 if (nsockets <= 0)
57 nsockets = 1;
58 if (ncores_per_socket <= 0)
59 ncores_per_socket = 1;
60
61 threads_per_go = ncores_per_socket >> 1;
62 if (!fix_threads_per_go) {
63 // Minimize num_gos
64 if (threads_per_go > 4) {
65 if (KMP_OPTIMIZE_FOR_REDUCTIONS) {
66 threads_per_go = threads_per_go >> 1;
67 }
68 if (threads_per_go > 4 && nsockets == 1)
69 threads_per_go = threads_per_go >> 1;
70 }
71 }
72 if (threads_per_go == 0)
73 threads_per_go = 1;
74 fix_threads_per_go = true;
75 num_gos = n / threads_per_go;
76 if (n % threads_per_go)
77 num_gos++;
78 if (nsockets == 1 || num_gos == 1)
79 num_groups = 1;
80 else {
81 num_groups = num_gos / nsockets;
82 if (num_gos % nsockets)
83 num_groups++;
84 }
85 if (num_groups <= 0)
86 num_groups = 1;
87 gos_per_group = num_gos / num_groups;
88 if (num_gos % num_groups)
89 gos_per_group++;
90 threads_per_group = threads_per_go * gos_per_group;
91 } else {
92 num_gos = n / threads_per_go;
93 if (n % threads_per_go)
94 num_gos++;
95 if (num_gos == 1)
96 num_groups = 1;
97 else {
98 num_groups = num_gos / 2;
99 if (num_gos % 2)
100 num_groups++;
101 }
102 gos_per_group = num_gos / num_groups;
103 if (num_gos % num_groups)
104 gos_per_group++;
105 threads_per_group = threads_per_go * gos_per_group;
106 }
107}
108
109void distributedBarrier::computeGo(size_t n) {
110 // Minimize num_gos
111 for (num_gos = 1;; num_gos++)
112 if (IDEAL_CONTENTION * num_gos >= n)
113 break;
114 threads_per_go = n / num_gos;
115 if (n % num_gos)
116 threads_per_go++;
117 while (num_gos > MAX_GOS) {
118 threads_per_go++;
119 num_gos = n / threads_per_go;
120 if (n % threads_per_go)
121 num_gos++;
122 }
123 computeVarsForN(n);
124}
125
126// This function is to resize the barrier arrays when the new number of threads
127// exceeds max_threads, which is the current size of all the arrays
128void distributedBarrier::resize(size_t nthr) {
129 KMP_DEBUG_ASSERT(nthr > max_threads);
130
131 // expand to requested size * 2
132 max_threads = nthr * 2;
133
134 // allocate arrays to new max threads
135 for (int i = 0; i < MAX_ITERS; ++i) {
136 if (flags[i])
137 flags[i] = (flags_s *)KMP_INTERNAL_REALLOC(flags[i],
138 max_threads * sizeof(flags_s));
139 else
140 flags[i] = (flags_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(flags_s));
141 }
142
143 if (go)
144 go = (go_s *)KMP_INTERNAL_REALLOC(go, max_threads * sizeof(go_s));
145 else
146 go = (go_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(go_s));
147
148 if (iter)
149 iter = (iter_s *)KMP_INTERNAL_REALLOC(iter, max_threads * sizeof(iter_s));
150 else
151 iter = (iter_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(iter_s));
152
153 if (sleep)
154 sleep =
155 (sleep_s *)KMP_INTERNAL_REALLOC(sleep, max_threads * sizeof(sleep_s));
156 else
157 sleep = (sleep_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(sleep_s));
158}
159
160// This function is to set all the go flags that threads might be waiting
161// on, and when blocktime is not infinite, it should be followed by a wake-up
162// call to each thread
163kmp_uint64 distributedBarrier::go_release() {
164 kmp_uint64 next_go = iter[0].iter + distributedBarrier::MAX_ITERS;
165 for (size_t j = 0; j < num_gos; j++) {
166 go[j].go.store(next_go);
167 }
168 return next_go;
169}
170
171void distributedBarrier::go_reset() {
172 for (size_t j = 0; j < max_threads; ++j) {
173 for (size_t i = 0; i < distributedBarrier::MAX_ITERS; ++i) {
174 flags[i][j].stillNeed = 1;
175 }
176 go[j].go.store(0);
177 iter[j].iter = 0;
178 }
179}
180
181// This function inits/re-inits the distributed barrier for a particular number
182// of threads. If a resize of arrays is needed, it calls the resize function.
183void distributedBarrier::init(size_t nthr) {
184 size_t old_max = max_threads;
185 if (nthr > max_threads) { // need more space in arrays
186 resize(nthr);
187 }
188
189 for (size_t i = 0; i < max_threads; i++) {
190 for (size_t j = 0; j < distributedBarrier::MAX_ITERS; j++) {
191 flags[j][i].stillNeed = 1;
192 }
193 go[i].go.store(0);
194 iter[i].iter = 0;
195 if (i >= old_max)
196 sleep[i].sleep = false;
197 }
198
199 // Recalculate num_gos, etc. based on new nthr
200 computeVarsForN(nthr);
201
202 num_threads = nthr;
203
204 if (team_icvs == NULL)
205 team_icvs = __kmp_allocate(sizeof(kmp_internal_control_t));
206}
207
208// This function is used only when KMP_BLOCKTIME is not infinite.
209// static
210void __kmp_dist_barrier_wakeup(enum barrier_type bt, kmp_team_t *team,
211 size_t start, size_t stop, size_t inc,
212 size_t tid) {
213 KMP_DEBUG_ASSERT(__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME);
214 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
215 return;
216
217 kmp_info_t **other_threads = team->t.t_threads;
218 for (size_t thr = start; thr < stop; thr += inc) {
219 KMP_DEBUG_ASSERT(other_threads[thr]);
220 int gtid = other_threads[thr]->th.th_info.ds.ds_gtid;
221 // Wake up worker regardless of if it appears to be sleeping or not
222 __kmp_atomic_resume_64(gtid, (kmp_atomic_flag_64<> *)NULL);
223 }
224}
225
226static void __kmp_dist_barrier_gather(
227 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
228 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
229 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_gather);
230 kmp_team_t *team;
231 distributedBarrier *b;
232 kmp_info_t **other_threads;
233 kmp_uint64 my_current_iter, my_next_iter;
234 kmp_uint32 nproc;
235 bool group_leader;
236
237 team = this_thr->th.th_team;
238 nproc = this_thr->th.th_team_nproc;
239 other_threads = team->t.t_threads;
240 b = team->t.b;
241 my_current_iter = b->iter[tid].iter;
242 my_next_iter = (my_current_iter + 1) % distributedBarrier::MAX_ITERS;
243 group_leader = ((tid % b->threads_per_group) == 0);
244
245 KA_TRACE(20,
246 ("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n",
247 gtid, team->t.t_id, tid, bt));
248
249#if USE_ITT_BUILD && USE_ITT_NOTIFY
250 // Barrier imbalance - save arrive time to the thread
251 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
252 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
253 __itt_get_timestamp();
254 }
255#endif
256
257 if (group_leader) {
258 // Start from the thread after the group leader
259 size_t group_start = tid + 1;
260 size_t group_end = tid + b->threads_per_group;
261 size_t threads_pending = 0;
262
263 if (group_end > nproc)
264 group_end = nproc;
265 do { // wait for threads in my group
266 threads_pending = 0;
267 // Check all the flags every time to avoid branch misspredict
268 for (size_t thr = group_start; thr < group_end; thr++) {
269 // Each thread uses a different cache line
270 threads_pending += b->flags[my_current_iter][thr].stillNeed;
271 }
272 // Execute tasks here
273 if (__kmp_tasking_mode != tskm_immediate_exec) {
274 kmp_task_team_t *task_team = this_thr->th.th_task_team;
275 if (task_team != NULL) {
276 if (TCR_SYNC_4(task_team->tt.tt_active)) {
277 if (KMP_TASKING_ENABLED(task_team)) {
278 int tasks_completed = FALSE;
279 __kmp_atomic_execute_tasks_64(
280 this_thr, gtid, (kmp_atomic_flag_64<> *)NULL, FALSE,
281 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
282 } else
283 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
284 }
285 } else {
286 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
287 } // if
288 }
289 if (TCR_4(__kmp_global.g.g_done)) {
290 if (__kmp_global.g.g_abort)
291 __kmp_abort_thread();
292 break;
293 } else if (__kmp_tasking_mode != tskm_immediate_exec &&
294 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
295 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
296 }
297 } while (threads_pending > 0);
298
299 if (reduce) { // Perform reduction if needed
300 OMPT_REDUCTION_DECL(this_thr, gtid);
301 OMPT_REDUCTION_BEGIN;
302 // Group leader reduces all threads in group
303 for (size_t thr = group_start; thr < group_end; thr++) {
304 (*reduce)(this_thr->th.th_local.reduce_data,
305 other_threads[thr]->th.th_local.reduce_data);
306 }
307 OMPT_REDUCTION_END;
308 }
309
310 // Set flag for next iteration
311 b->flags[my_next_iter][tid].stillNeed = 1;
312 // Each thread uses a different cache line; resets stillNeed to 0 to
313 // indicate it has reached the barrier
314 b->flags[my_current_iter][tid].stillNeed = 0;
315
316 do { // wait for all group leaders
317 threads_pending = 0;
318 for (size_t thr = 0; thr < nproc; thr += b->threads_per_group) {
319 threads_pending += b->flags[my_current_iter][thr].stillNeed;
320 }
321 // Execute tasks here
322 if (__kmp_tasking_mode != tskm_immediate_exec) {
323 kmp_task_team_t *task_team = this_thr->th.th_task_team;
324 if (task_team != NULL) {
325 if (TCR_SYNC_4(task_team->tt.tt_active)) {
326 if (KMP_TASKING_ENABLED(task_team)) {
327 int tasks_completed = FALSE;
328 __kmp_atomic_execute_tasks_64(
329 this_thr, gtid, (kmp_atomic_flag_64<> *)NULL, FALSE,
330 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
331 } else
332 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
333 }
334 } else {
335 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
336 } // if
337 }
338 if (TCR_4(__kmp_global.g.g_done)) {
339 if (__kmp_global.g.g_abort)
340 __kmp_abort_thread();
341 break;
342 } else if (__kmp_tasking_mode != tskm_immediate_exec &&
343 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
344 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
345 }
346 } while (threads_pending > 0);
347
348 if (reduce) { // Perform reduction if needed
349 if (KMP_MASTER_TID(tid)) { // Master reduces over group leaders
350 OMPT_REDUCTION_DECL(this_thr, gtid);
351 OMPT_REDUCTION_BEGIN;
352 for (size_t thr = b->threads_per_group; thr < nproc;
353 thr += b->threads_per_group) {
354 (*reduce)(this_thr->th.th_local.reduce_data,
355 other_threads[thr]->th.th_local.reduce_data);
356 }
357 OMPT_REDUCTION_END;
358 }
359 }
360 } else {
361 // Set flag for next iteration
362 b->flags[my_next_iter][tid].stillNeed = 1;
363 // Each thread uses a different cache line; resets stillNeed to 0 to
364 // indicate it has reached the barrier
365 b->flags[my_current_iter][tid].stillNeed = 0;
366 }
367
368 KMP_MFENCE();
369
370 KA_TRACE(20,
371 ("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
372 gtid, team->t.t_id, tid, bt));
373}
374
375static void __kmp_dist_barrier_release(
376 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
377 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
378 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_release);
379 kmp_team_t *team;
380 distributedBarrier *b;
381 kmp_bstate_t *thr_bar;
382 kmp_uint64 my_current_iter, next_go;
383 size_t my_go_index;
384 bool group_leader;
385
386 KA_TRACE(20, ("__kmp_dist_barrier_release: T#%d(%d) enter; barrier type %d\n",
387 gtid, tid, bt));
388
389 thr_bar = &this_thr->th.th_bar[bt].bb;
390
391 if (!KMP_MASTER_TID(tid)) {
392 // workers and non-master group leaders need to check their presence in team
393 do {
394 if (this_thr->th.th_used_in_team.load() != 1 &&
395 this_thr->th.th_used_in_team.load() != 3) {
396 // Thread is not in use in a team. Wait on location in tid's thread
397 // struct. The 0 value tells anyone looking that this thread is spinning
398 // or sleeping until this location becomes 3 again; 3 is the transition
399 // state to get to 1 which is waiting on go and being in the team
400 kmp_flag_32<false, false> my_flag(&(this_thr->th.th_used_in_team), 3);
401 if (KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 2,
402 0) ||
403 this_thr->th.th_used_in_team.load() == 0) {
404 my_flag.wait(this_thr, true USE_ITT_BUILD_ARG(itt_sync_obj));
405 }
406#if USE_ITT_BUILD && USE_ITT_NOTIFY
407 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
408 // In fork barrier where we could not get the object reliably
409 itt_sync_obj =
410 __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
411 // Cancel wait on previous parallel region...
412 __kmp_itt_task_starting(itt_sync_obj);
413
414 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
415 return;
416
417 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
418 if (itt_sync_obj != NULL)
419 // Call prepare as early as possible for "new" barrier
420 __kmp_itt_task_finished(itt_sync_obj);
421 } else
422#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
423 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
424 return;
425 }
426 if (this_thr->th.th_used_in_team.load() != 1 &&
427 this_thr->th.th_used_in_team.load() != 3) // spurious wake-up?
428 continue;
429 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
430 return;
431
432 // At this point, the thread thinks it is in use in a team, or in
433 // transition to be used in a team, but it might have reached this barrier
434 // before it was marked unused by the team. Unused threads are awoken and
435 // shifted to wait on local thread struct elsewhere. It also might reach
436 // this point by being picked up for use by a different team. Either way,
437 // we need to update the tid.
438 tid = __kmp_tid_from_gtid(gtid);
439 team = this_thr->th.th_team;
440 KMP_DEBUG_ASSERT(tid >= 0);
441 KMP_DEBUG_ASSERT(team);
442 b = team->t.b;
443 my_current_iter = b->iter[tid].iter;
444 next_go = my_current_iter + distributedBarrier::MAX_ITERS;
445 my_go_index = tid / b->threads_per_go;
446 if (this_thr->th.th_used_in_team.load() == 3) {
Hansang Baee7c9f442024-08-15 10:42:22 -0500447 (void)KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 3,
448 1);
Terry Wilmarth70439502021-07-15 10:28:47 -0500449 }
450 // Check if go flag is set
451 if (b->go[my_go_index].go.load() != next_go) {
452 // Wait on go flag on team
453 kmp_atomic_flag_64<false, true> my_flag(
454 &(b->go[my_go_index].go), next_go, &(b->sleep[tid].sleep));
455 my_flag.wait(this_thr, true USE_ITT_BUILD_ARG(itt_sync_obj));
456 KMP_DEBUG_ASSERT(my_current_iter == b->iter[tid].iter ||
457 b->iter[tid].iter == 0);
458 KMP_DEBUG_ASSERT(b->sleep[tid].sleep == false);
459 }
460
461 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
462 return;
463 // At this point, the thread's go location was set. This means the primary
464 // thread is safely in the barrier, and so this thread's data is
465 // up-to-date, but we should check again that this thread is really in
466 // use in the team, as it could have been woken up for the purpose of
467 // changing team size, or reaping threads at shutdown.
468 if (this_thr->th.th_used_in_team.load() == 1)
469 break;
470 } while (1);
471
472 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
473 return;
474
475 group_leader = ((tid % b->threads_per_group) == 0);
476 if (group_leader) {
477 // Tell all the threads in my group they can go!
478 for (size_t go_idx = my_go_index + 1;
479 go_idx < my_go_index + b->gos_per_group; go_idx++) {
480 b->go[go_idx].go.store(next_go);
481 }
482 // Fence added so that workers can see changes to go. sfence inadequate.
483 KMP_MFENCE();
484 }
485
486#if KMP_BARRIER_ICV_PUSH
487 if (propagate_icvs) { // copy ICVs to final dest
488 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team,
489 tid, FALSE);
490 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
491 (kmp_internal_control_t *)team->t.b->team_icvs);
492 copy_icvs(&thr_bar->th_fixed_icvs,
493 &team->t.t_implicit_task_taskdata[tid].td_icvs);
494 }
495#endif
496 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && group_leader) {
497 // This thread is now awake and participating in the barrier;
498 // wake up the other threads in the group
499 size_t nproc = this_thr->th.th_team_nproc;
500 size_t group_end = tid + b->threads_per_group;
501 if (nproc < group_end)
502 group_end = nproc;
503 __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid);
504 }
505 } else { // Primary thread
506 team = this_thr->th.th_team;
507 b = team->t.b;
508 my_current_iter = b->iter[tid].iter;
509 next_go = my_current_iter + distributedBarrier::MAX_ITERS;
510#if KMP_BARRIER_ICV_PUSH
511 if (propagate_icvs) {
512 // primary thread has ICVs in final destination; copy
513 copy_icvs(&thr_bar->th_fixed_icvs,
514 &team->t.t_implicit_task_taskdata[tid].td_icvs);
515 }
516#endif
517 // Tell all the group leaders they can go!
518 for (size_t go_idx = 0; go_idx < b->num_gos; go_idx += b->gos_per_group) {
519 b->go[go_idx].go.store(next_go);
520 }
521
522 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
523 // Wake-up the group leaders
524 size_t nproc = this_thr->th.th_team_nproc;
525 __kmp_dist_barrier_wakeup(bt, team, tid + b->threads_per_group, nproc,
526 b->threads_per_group, tid);
527 }
528
529 // Tell all the threads in my group they can go!
530 for (size_t go_idx = 1; go_idx < b->gos_per_group; go_idx++) {
531 b->go[go_idx].go.store(next_go);
532 }
533
534 // Fence added so that workers can see changes to go. sfence inadequate.
535 KMP_MFENCE();
536
537 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
538 // Wake-up the other threads in my group
539 size_t nproc = this_thr->th.th_team_nproc;
540 size_t group_end = tid + b->threads_per_group;
541 if (nproc < group_end)
542 group_end = nproc;
543 __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid);
544 }
545 }
546 // Update to next iteration
547 KMP_ASSERT(my_current_iter == b->iter[tid].iter);
548 b->iter[tid].iter = (b->iter[tid].iter + 1) % distributedBarrier::MAX_ITERS;
549
550 KA_TRACE(
551 20, ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
552 gtid, team->t.t_id, tid, bt));
553}
Jim Cownie5f037e12014-10-07 16:25:50 +0000554
555// Linear Barrier
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +0000556template <bool cancellable = false>
557static bool __kmp_linear_barrier_gather_template(
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000558 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
559 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
560 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather);
Ed Mastee632f142017-07-07 21:06:05 +0000561 kmp_team_t *team = this_thr->th.th_team;
562 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
563 kmp_info_t **other_threads = team->t.t_threads;
Jim Cownie5f037e12014-10-07 16:25:50 +0000564
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000565 KA_TRACE(
566 20,
567 ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
568 gtid, team->t.t_id, tid, bt));
569 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
Jim Cownie5f037e12014-10-07 16:25:50 +0000570
571#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000572 // Barrier imbalance - save arrive time to the thread
573 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
574 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
575 __itt_get_timestamp();
576 }
Jim Cownie5f037e12014-10-07 16:25:50 +0000577#endif
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000578 // We now perform a linear reduction to signal that all of the threads have
579 // arrived.
580 if (!KMP_MASTER_TID(tid)) {
581 KA_TRACE(20,
582 ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
583 "arrived(%p): %llu => %llu\n",
584 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(0, team),
585 team->t.t_id, 0, &thr_bar->b_arrived, thr_bar->b_arrived,
586 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
tlwilmaracdce612021-03-05 09:21:39 -0600587 // Mark arrival to primary thread
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000588 /* After performing this write, a worker thread may not assume that the team
tlwilmaracdce612021-03-05 09:21:39 -0600589 is valid any more - it could be deallocated by the primary thread at any
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000590 time. */
Terry Wilmarth71399132020-12-01 14:03:40 -0600591 kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[0]);
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000592 flag.release();
593 } else {
Ed Mastee632f142017-07-07 21:06:05 +0000594 kmp_balign_team_t *team_bar = &team->t.t_bar[bt];
595 int nproc = this_thr->th.th_team_nproc;
596 int i;
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000597 // Don't have to worry about sleep bit here or atomic since team setting
Jonathan Peytonc2f9edc2017-10-20 19:30:57 +0000598 kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP;
Jim Cownie5f037e12014-10-07 16:25:50 +0000599
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000600 // Collect all the worker team member threads.
601 for (i = 1; i < nproc; ++i) {
Jim Cownie5f037e12014-10-07 16:25:50 +0000602#if KMP_CACHE_MANAGE
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000603 // Prefetch next thread's arrived count
604 if (i + 1 < nproc)
605 KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_arrived);
Jim Cownie5f037e12014-10-07 16:25:50 +0000606#endif /* KMP_CACHE_MANAGE */
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000607 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
608 "arrived(%p) == %llu\n",
609 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),
610 team->t.t_id, i,
611 &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state));
Jim Cownie5f037e12014-10-07 16:25:50 +0000612
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000613 // Wait for worker thread to arrive
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +0000614 if (cancellable) {
Terry Wilmarth71399132020-12-01 14:03:40 -0600615 kmp_flag_64<true, false> flag(
616 &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state);
617 if (flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)))
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +0000618 return true;
619 } else {
Terry Wilmarth71399132020-12-01 14:03:40 -0600620 kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived,
621 new_state);
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +0000622 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
623 }
Jim Cownie5f037e12014-10-07 16:25:50 +0000624#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000625 // Barrier imbalance - write min of the thread time and the other thread
626 // time to the thread.
627 if (__kmp_forkjoin_frames_mode == 2) {
628 this_thr->th.th_bar_min_time = KMP_MIN(
629 this_thr->th.th_bar_min_time, other_threads[i]->th.th_bar_min_time);
630 }
Jim Cownie5f037e12014-10-07 16:25:50 +0000631#endif
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000632 if (reduce) {
633 KA_TRACE(100,
634 ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
635 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),
636 team->t.t_id, i));
protze@itc.rwth-aachen.de5210bae2019-12-27 14:39:50 +0100637 OMPT_REDUCTION_DECL(this_thr, gtid);
638 OMPT_REDUCTION_BEGIN;
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000639 (*reduce)(this_thr->th.th_local.reduce_data,
640 other_threads[i]->th.th_local.reduce_data);
protze@itc.rwth-aachen.de5210bae2019-12-27 14:39:50 +0100641 OMPT_REDUCTION_END;
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000642 }
Jim Cownie5f037e12014-10-07 16:25:50 +0000643 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000644 // Don't have to worry about sleep bit here or atomic since team setting
645 team_bar->b_arrived = new_state;
646 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
647 "arrived(%p) = %llu\n",
648 gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived,
649 new_state));
650 }
651 KA_TRACE(
652 20,
653 ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
654 gtid, team->t.t_id, tid, bt));
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +0000655 return false;
Jim Cownie5f037e12014-10-07 16:25:50 +0000656}
657
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +0000658template <bool cancellable = false>
659static bool __kmp_linear_barrier_release_template(
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000660 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
661 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
662 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release);
Ed Mastee632f142017-07-07 21:06:05 +0000663 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
664 kmp_team_t *team;
Jim Cownie5f037e12014-10-07 16:25:50 +0000665
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000666 if (KMP_MASTER_TID(tid)) {
Ed Mastee632f142017-07-07 21:06:05 +0000667 unsigned int i;
668 kmp_uint32 nproc = this_thr->th.th_team_nproc;
669 kmp_info_t **other_threads;
Jim Cownie5f037e12014-10-07 16:25:50 +0000670
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000671 team = __kmp_threads[gtid]->th.th_team;
672 KMP_DEBUG_ASSERT(team != NULL);
673 other_threads = team->t.t_threads;
Jim Cownie5f037e12014-10-07 16:25:50 +0000674
tlwilmaracdce612021-03-05 09:21:39 -0600675 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) primary enter for "
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000676 "barrier type %d\n",
677 gtid, team->t.t_id, tid, bt));
Jim Cownie5f037e12014-10-07 16:25:50 +0000678
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000679 if (nproc > 1) {
Jim Cownie5f037e12014-10-07 16:25:50 +0000680#if KMP_BARRIER_ICV_PUSH
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000681 {
682 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
683 if (propagate_icvs) {
684 ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
685 for (i = 1; i < nproc; ++i) {
686 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i],
687 team, i, FALSE);
688 ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
689 &team->t.t_implicit_task_taskdata[0].td_icvs);
690 }
691 ngo_sync();
692 }
693 }
Jim Cownie5f037e12014-10-07 16:25:50 +0000694#endif // KMP_BARRIER_ICV_PUSH
695
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000696 // Now, release all of the worker threads
697 for (i = 1; i < nproc; ++i) {
Jim Cownie5f037e12014-10-07 16:25:50 +0000698#if KMP_CACHE_MANAGE
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000699 // Prefetch next thread's go flag
700 if (i + 1 < nproc)
701 KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_go);
Jim Cownie5f037e12014-10-07 16:25:50 +0000702#endif /* KMP_CACHE_MANAGE */
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000703 KA_TRACE(
704 20,
705 ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
706 "go(%p): %u => %u\n",
707 gtid, team->t.t_id, tid, other_threads[i]->th.th_info.ds.ds_gtid,
708 team->t.t_id, i, &other_threads[i]->th.th_bar[bt].bb.b_go,
709 other_threads[i]->th.th_bar[bt].bb.b_go,
710 other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP));
Terry Wilmarth71399132020-12-01 14:03:40 -0600711 kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_go,
Shilei Tiand0678fa2021-02-20 12:46:04 -0500712 other_threads[i]);
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000713 flag.release();
714 }
715 }
tlwilmaracdce612021-03-05 09:21:39 -0600716 } else { // Wait for the PRIMARY thread to release us
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000717 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
718 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +0000719 if (cancellable) {
Terry Wilmarth71399132020-12-01 14:03:40 -0600720 kmp_flag_64<true, false> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
721 if (flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)))
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +0000722 return true;
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +0000723 } else {
Terry Wilmarth71399132020-12-01 14:03:40 -0600724 kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +0000725 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
726 }
Jim Cownie5f037e12014-10-07 16:25:50 +0000727#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000728 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
729 // In a fork barrier; cannot get the object reliably (or ITTNOTIFY is
730 // disabled)
731 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
732 // Cancel wait on previous parallel region...
733 __kmp_itt_task_starting(itt_sync_obj);
Jim Cownie5f037e12014-10-07 16:25:50 +0000734
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000735 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +0000736 return false;
Jim Cownie5f037e12014-10-07 16:25:50 +0000737
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000738 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
739 if (itt_sync_obj != NULL)
740 // Call prepare as early as possible for "new" barrier
741 __kmp_itt_task_finished(itt_sync_obj);
742 } else
Jim Cownie5f037e12014-10-07 16:25:50 +0000743#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
744 // Early exit for reaping threads releasing forkjoin barrier
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000745 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +0000746 return false;
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000747// The worker thread may now assume that the team is valid.
Jim Cownie5f037e12014-10-07 16:25:50 +0000748#ifdef KMP_DEBUG
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000749 tid = __kmp_tid_from_gtid(gtid);
750 team = __kmp_threads[gtid]->th.th_team;
Jim Cownie5f037e12014-10-07 16:25:50 +0000751#endif
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000752 KMP_DEBUG_ASSERT(team != NULL);
753 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
754 KA_TRACE(20,
755 ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
756 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
757 KMP_MB(); // Flush all pending memory write invalidates.
758 }
759 KA_TRACE(
760 20,
761 ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
762 gtid, team->t.t_id, tid, bt));
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +0000763 return false;
764}
765
766static void __kmp_linear_barrier_gather(
767 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
768 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
769 __kmp_linear_barrier_gather_template<false>(
770 bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj));
771}
772
773static bool __kmp_linear_barrier_gather_cancellable(
774 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
775 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
776 return __kmp_linear_barrier_gather_template<true>(
777 bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj));
778}
779
780static void __kmp_linear_barrier_release(
781 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
782 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
783 __kmp_linear_barrier_release_template<false>(
784 bt, this_thr, gtid, tid, propagate_icvs USE_ITT_BUILD_ARG(itt_sync_obj));
785}
786
787static bool __kmp_linear_barrier_release_cancellable(
788 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
789 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
790 return __kmp_linear_barrier_release_template<true>(
791 bt, this_thr, gtid, tid, propagate_icvs USE_ITT_BUILD_ARG(itt_sync_obj));
Jim Cownie5f037e12014-10-07 16:25:50 +0000792}
793
794// Tree barrier
Shilei Tiand0678fa2021-02-20 12:46:04 -0500795static void __kmp_tree_barrier_gather(
796 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
797 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000798 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather);
Ed Mastee632f142017-07-07 21:06:05 +0000799 kmp_team_t *team = this_thr->th.th_team;
800 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
801 kmp_info_t **other_threads = team->t.t_threads;
802 kmp_uint32 nproc = this_thr->th.th_team_nproc;
803 kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
804 kmp_uint32 branch_factor = 1 << branch_bits;
805 kmp_uint32 child;
806 kmp_uint32 child_tid;
Hansang Baea3918ee2021-03-11 17:34:06 -0600807 kmp_uint64 new_state = 0;
Jim Cownie5f037e12014-10-07 16:25:50 +0000808
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000809 KA_TRACE(
810 20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
811 gtid, team->t.t_id, tid, bt));
812 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
Jim Cownie5f037e12014-10-07 16:25:50 +0000813
814#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000815 // Barrier imbalance - save arrive time to the thread
816 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
817 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
818 __itt_get_timestamp();
819 }
Jim Cownie5f037e12014-10-07 16:25:50 +0000820#endif
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000821 // Perform tree gather to wait until all threads have arrived; reduce any
822 // required data as we go
823 child_tid = (tid << branch_bits) + 1;
824 if (child_tid < nproc) {
825 // Parent threads wait for all their children to arrive
826 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
827 child = 1;
828 do {
Ed Mastee632f142017-07-07 21:06:05 +0000829 kmp_info_t *child_thr = other_threads[child_tid];
830 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jim Cownie5f037e12014-10-07 16:25:50 +0000831#if KMP_CACHE_MANAGE
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000832 // Prefetch next thread's arrived count
833 if (child + 1 <= branch_factor && child_tid + 1 < nproc)
834 KMP_CACHE_PREFETCH(
835 &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_arrived);
Jim Cownie5f037e12014-10-07 16:25:50 +0000836#endif /* KMP_CACHE_MANAGE */
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000837 KA_TRACE(20,
838 ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
839 "arrived(%p) == %llu\n",
840 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
841 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
842 // Wait for child to arrive
Terry Wilmarth71399132020-12-01 14:03:40 -0600843 kmp_flag_64<> flag(&child_bar->b_arrived, new_state);
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000844 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
Jim Cownie5f037e12014-10-07 16:25:50 +0000845#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000846 // Barrier imbalance - write min of the thread time and a child time to
847 // the thread.
848 if (__kmp_forkjoin_frames_mode == 2) {
849 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
850 child_thr->th.th_bar_min_time);
851 }
Jim Cownie5f037e12014-10-07 16:25:50 +0000852#endif
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000853 if (reduce) {
854 KA_TRACE(100,
855 ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
856 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
857 team->t.t_id, child_tid));
protze@itc.rwth-aachen.de5210bae2019-12-27 14:39:50 +0100858 OMPT_REDUCTION_DECL(this_thr, gtid);
859 OMPT_REDUCTION_BEGIN;
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000860 (*reduce)(this_thr->th.th_local.reduce_data,
861 child_thr->th.th_local.reduce_data);
protze@itc.rwth-aachen.de5210bae2019-12-27 14:39:50 +0100862 OMPT_REDUCTION_END;
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000863 }
864 child++;
865 child_tid++;
866 } while (child <= branch_factor && child_tid < nproc);
867 }
Jim Cownie5f037e12014-10-07 16:25:50 +0000868
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000869 if (!KMP_MASTER_TID(tid)) { // Worker threads
Ed Mastee632f142017-07-07 21:06:05 +0000870 kmp_int32 parent_tid = (tid - 1) >> branch_bits;
Jim Cownie5f037e12014-10-07 16:25:50 +0000871
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000872 KA_TRACE(20,
873 ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
874 "arrived(%p): %llu => %llu\n",
875 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),
876 team->t.t_id, parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,
877 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
Jim Cownie5f037e12014-10-07 16:25:50 +0000878
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000879 // Mark arrival to parent thread
880 /* After performing this write, a worker thread may not assume that the team
tlwilmaracdce612021-03-05 09:21:39 -0600881 is valid any more - it could be deallocated by the primary thread at any
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000882 time. */
Terry Wilmarth71399132020-12-01 14:03:40 -0600883 kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[parent_tid]);
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000884 flag.release();
885 } else {
tlwilmaracdce612021-03-05 09:21:39 -0600886 // Need to update the team arrived pointer if we are the primary thread
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000887 if (nproc > 1) // New value was already computed above
888 team->t.t_bar[bt].b_arrived = new_state;
889 else
890 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
891 KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "
892 "arrived(%p) = %llu\n",
893 gtid, team->t.t_id, tid, team->t.t_id,
894 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
895 }
896 KA_TRACE(20,
897 ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
898 gtid, team->t.t_id, tid, bt));
Jim Cownie5f037e12014-10-07 16:25:50 +0000899}
900
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000901static void __kmp_tree_barrier_release(
902 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
903 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
904 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release);
Ed Mastee632f142017-07-07 21:06:05 +0000905 kmp_team_t *team;
906 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
907 kmp_uint32 nproc;
908 kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
909 kmp_uint32 branch_factor = 1 << branch_bits;
910 kmp_uint32 child;
911 kmp_uint32 child_tid;
Jim Cownie5f037e12014-10-07 16:25:50 +0000912
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000913 // Perform a tree release for all of the threads that have been gathered
914 if (!KMP_MASTER_TID(
915 tid)) { // Handle fork barrier workers who aren't part of a team yet
916 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", gtid,
917 &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
918 // Wait for parent thread to release us
Terry Wilmarth71399132020-12-01 14:03:40 -0600919 kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000920 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
Jim Cownie5f037e12014-10-07 16:25:50 +0000921#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000922 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
923 // In fork barrier where we could not get the object reliably (or
924 // ITTNOTIFY is disabled)
925 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
926 // Cancel wait on previous parallel region...
927 __kmp_itt_task_starting(itt_sync_obj);
Jim Cownie5f037e12014-10-07 16:25:50 +0000928
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000929 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
930 return;
Jim Cownie5f037e12014-10-07 16:25:50 +0000931
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000932 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
933 if (itt_sync_obj != NULL)
934 // Call prepare as early as possible for "new" barrier
935 __kmp_itt_task_finished(itt_sync_obj);
936 } else
Jim Cownie5f037e12014-10-07 16:25:50 +0000937#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
938 // Early exit for reaping threads releasing forkjoin barrier
939 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000940 return;
Jim Cownie5f037e12014-10-07 16:25:50 +0000941
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000942 // The worker thread may now assume that the team is valid.
943 team = __kmp_threads[gtid]->th.th_team;
944 KMP_DEBUG_ASSERT(team != NULL);
945 tid = __kmp_tid_from_gtid(gtid);
Jim Cownie5f037e12014-10-07 16:25:50 +0000946
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000947 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
948 KA_TRACE(20,
949 ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", gtid,
950 team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
951 KMP_MB(); // Flush all pending memory write invalidates.
952 } else {
953 team = __kmp_threads[gtid]->th.th_team;
954 KMP_DEBUG_ASSERT(team != NULL);
tlwilmaracdce612021-03-05 09:21:39 -0600955 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) primary enter for "
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000956 "barrier type %d\n",
957 gtid, team->t.t_id, tid, bt));
958 }
959 nproc = this_thr->th.th_team_nproc;
960 child_tid = (tid << branch_bits) + 1;
Jim Cownie5f037e12014-10-07 16:25:50 +0000961
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000962 if (child_tid < nproc) {
Ed Mastee632f142017-07-07 21:06:05 +0000963 kmp_info_t **other_threads = team->t.t_threads;
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000964 child = 1;
965 // Parent threads release all their children
966 do {
Ed Mastee632f142017-07-07 21:06:05 +0000967 kmp_info_t *child_thr = other_threads[child_tid];
968 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jim Cownie5f037e12014-10-07 16:25:50 +0000969#if KMP_CACHE_MANAGE
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000970 // Prefetch next thread's go count
971 if (child + 1 <= branch_factor && child_tid + 1 < nproc)
972 KMP_CACHE_PREFETCH(
973 &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_go);
Jim Cownie5f037e12014-10-07 16:25:50 +0000974#endif /* KMP_CACHE_MANAGE */
975
976#if KMP_BARRIER_ICV_PUSH
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000977 {
978 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
979 if (propagate_icvs) {
980 __kmp_init_implicit_task(team->t.t_ident,
981 team->t.t_threads[child_tid], team,
982 child_tid, FALSE);
983 copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
984 &team->t.t_implicit_task_taskdata[0].td_icvs);
Jim Cownie5f037e12014-10-07 16:25:50 +0000985 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000986 }
987#endif // KMP_BARRIER_ICV_PUSH
988 KA_TRACE(20,
989 ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
990 "go(%p): %u => %u\n",
991 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
992 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
993 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
994 // Release child from barrier
Terry Wilmarth71399132020-12-01 14:03:40 -0600995 kmp_flag_64<> flag(&child_bar->b_go, child_thr);
Jonathan Peytona770c6d2017-05-12 18:01:32 +0000996 flag.release();
997 child++;
998 child_tid++;
999 } while (child <= branch_factor && child_tid < nproc);
1000 }
1001 KA_TRACE(
1002 20, ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
1003 gtid, team->t.t_id, tid, bt));
Jim Cownie5f037e12014-10-07 16:25:50 +00001004}
1005
Jim Cownie5f037e12014-10-07 16:25:50 +00001006// Hyper Barrier
Shilei Tiand0678fa2021-02-20 12:46:04 -05001007static void __kmp_hyper_barrier_gather(
1008 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
1009 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001010 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather);
Ed Mastee632f142017-07-07 21:06:05 +00001011 kmp_team_t *team = this_thr->th.th_team;
1012 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
1013 kmp_info_t **other_threads = team->t.t_threads;
1014 kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE;
1015 kmp_uint32 num_threads = this_thr->th.th_team_nproc;
1016 kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
1017 kmp_uint32 branch_factor = 1 << branch_bits;
1018 kmp_uint32 offset;
1019 kmp_uint32 level;
Jim Cownie5f037e12014-10-07 16:25:50 +00001020
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001021 KA_TRACE(
1022 20,
1023 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
1024 gtid, team->t.t_id, tid, bt));
1025 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
Jim Cownie5f037e12014-10-07 16:25:50 +00001026
1027#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001028 // Barrier imbalance - save arrive time to the thread
1029 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
1030 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
1031 __itt_get_timestamp();
1032 }
Jim Cownie5f037e12014-10-07 16:25:50 +00001033#endif
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001034 /* Perform a hypercube-embedded tree gather to wait until all of the threads
1035 have arrived, and reduce any required data as we go. */
Terry Wilmarth71399132020-12-01 14:03:40 -06001036 kmp_flag_64<> p_flag(&thr_bar->b_arrived);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001037 for (level = 0, offset = 1; offset < num_threads;
1038 level += branch_bits, offset <<= branch_bits) {
Ed Mastee632f142017-07-07 21:06:05 +00001039 kmp_uint32 child;
1040 kmp_uint32 child_tid;
Jim Cownie5f037e12014-10-07 16:25:50 +00001041
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001042 if (((tid >> level) & (branch_factor - 1)) != 0) {
Ed Mastee632f142017-07-07 21:06:05 +00001043 kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) - 1);
Jim Cownie5f037e12014-10-07 16:25:50 +00001044
Bryan Chan85125332020-04-06 18:43:35 -04001045 KMP_MB(); // Synchronize parent and child threads.
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001046 KA_TRACE(20,
1047 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
1048 "arrived(%p): %llu => %llu\n",
1049 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),
1050 team->t.t_id, parent_tid, &thr_bar->b_arrived,
1051 thr_bar->b_arrived,
1052 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
1053 // Mark arrival to parent thread
1054 /* After performing this write (in the last iteration of the enclosing for
1055 loop), a worker thread may not assume that the team is valid any more
tlwilmaracdce612021-03-05 09:21:39 -06001056 - it could be deallocated by the primary thread at any time. */
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001057 p_flag.set_waiter(other_threads[parent_tid]);
1058 p_flag.release();
1059 break;
1060 }
Jim Cownie5f037e12014-10-07 16:25:50 +00001061
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001062 // Parent threads wait for children to arrive
1063 if (new_state == KMP_BARRIER_UNUSED_STATE)
1064 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
1065 for (child = 1, child_tid = tid + (1 << level);
1066 child < branch_factor && child_tid < num_threads;
1067 child++, child_tid += (1 << level)) {
Ed Mastee632f142017-07-07 21:06:05 +00001068 kmp_info_t *child_thr = other_threads[child_tid];
1069 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jim Cownie5f037e12014-10-07 16:25:50 +00001070#if KMP_CACHE_MANAGE
Ed Mastee632f142017-07-07 21:06:05 +00001071 kmp_uint32 next_child_tid = child_tid + (1 << level);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001072 // Prefetch next thread's arrived count
1073 if (child + 1 < branch_factor && next_child_tid < num_threads)
1074 KMP_CACHE_PREFETCH(
1075 &other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived);
Jim Cownie5f037e12014-10-07 16:25:50 +00001076#endif /* KMP_CACHE_MANAGE */
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001077 KA_TRACE(20,
1078 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
1079 "arrived(%p) == %llu\n",
1080 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1081 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
1082 // Wait for child to arrive
Terry Wilmarth71399132020-12-01 14:03:40 -06001083 kmp_flag_64<> c_flag(&child_bar->b_arrived, new_state);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001084 c_flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
Bryan Chan85125332020-04-06 18:43:35 -04001085 KMP_MB(); // Synchronize parent and child threads.
Jim Cownie5f037e12014-10-07 16:25:50 +00001086#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001087 // Barrier imbalance - write min of the thread time and a child time to
1088 // the thread.
1089 if (__kmp_forkjoin_frames_mode == 2) {
1090 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
1091 child_thr->th.th_bar_min_time);
1092 }
Jim Cownie5f037e12014-10-07 16:25:50 +00001093#endif
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001094 if (reduce) {
1095 KA_TRACE(100,
1096 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
1097 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1098 team->t.t_id, child_tid));
protze@itc.rwth-aachen.de5210bae2019-12-27 14:39:50 +01001099 OMPT_REDUCTION_DECL(this_thr, gtid);
1100 OMPT_REDUCTION_BEGIN;
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001101 (*reduce)(this_thr->th.th_local.reduce_data,
1102 child_thr->th.th_local.reduce_data);
protze@itc.rwth-aachen.de5210bae2019-12-27 14:39:50 +01001103 OMPT_REDUCTION_END;
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001104 }
Jim Cownie5f037e12014-10-07 16:25:50 +00001105 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001106 }
Jim Cownie5f037e12014-10-07 16:25:50 +00001107
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001108 if (KMP_MASTER_TID(tid)) {
tlwilmaracdce612021-03-05 09:21:39 -06001109 // Need to update the team arrived pointer if we are the primary thread
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001110 if (new_state == KMP_BARRIER_UNUSED_STATE)
1111 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
1112 else
1113 team->t.t_bar[bt].b_arrived = new_state;
1114 KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "
1115 "arrived(%p) = %llu\n",
1116 gtid, team->t.t_id, tid, team->t.t_id,
1117 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
1118 }
1119 KA_TRACE(
1120 20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
1121 gtid, team->t.t_id, tid, bt));
Jim Cownie5f037e12014-10-07 16:25:50 +00001122}
1123
1124// The reverse versions seem to beat the forward versions overall
1125#define KMP_REVERSE_HYPER_BAR
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001126static void __kmp_hyper_barrier_release(
1127 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
1128 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
1129 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release);
Ed Mastee632f142017-07-07 21:06:05 +00001130 kmp_team_t *team;
1131 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
1132 kmp_info_t **other_threads;
1133 kmp_uint32 num_threads;
1134 kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
1135 kmp_uint32 branch_factor = 1 << branch_bits;
1136 kmp_uint32 child;
1137 kmp_uint32 child_tid;
1138 kmp_uint32 offset;
1139 kmp_uint32 level;
Jim Cownie5f037e12014-10-07 16:25:50 +00001140
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001141 /* Perform a hypercube-embedded tree release for all of the threads that have
1142 been gathered. If KMP_REVERSE_HYPER_BAR is defined (default) the threads
1143 are released in the reverse order of the corresponding gather, otherwise
1144 threads are released in the same order. */
tlwilmaracdce612021-03-05 09:21:39 -06001145 if (KMP_MASTER_TID(tid)) { // primary thread
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001146 team = __kmp_threads[gtid]->th.th_team;
1147 KMP_DEBUG_ASSERT(team != NULL);
tlwilmaracdce612021-03-05 09:21:39 -06001148 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) primary enter for "
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001149 "barrier type %d\n",
1150 gtid, team->t.t_id, tid, bt));
Jim Cownie5f037e12014-10-07 16:25:50 +00001151#if KMP_BARRIER_ICV_PUSH
tlwilmaracdce612021-03-05 09:21:39 -06001152 if (propagate_icvs) { // primary already has ICVs in final destination; copy
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001153 copy_icvs(&thr_bar->th_fixed_icvs,
1154 &team->t.t_implicit_task_taskdata[tid].td_icvs);
Jim Cownie5f037e12014-10-07 16:25:50 +00001155 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001156#endif
1157 } else { // Handle fork barrier workers who aren't part of a team yet
1158 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid,
1159 &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
1160 // Wait for parent thread to release us
Terry Wilmarth71399132020-12-01 14:03:40 -06001161 kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001162 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
Jim Cownie5f037e12014-10-07 16:25:50 +00001163#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001164 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
1165 // In fork barrier where we could not get the object reliably
1166 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
1167 // Cancel wait on previous parallel region...
1168 __kmp_itt_task_starting(itt_sync_obj);
Jim Cownie5f037e12014-10-07 16:25:50 +00001169
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001170 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
1171 return;
Jim Cownie5f037e12014-10-07 16:25:50 +00001172
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001173 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1174 if (itt_sync_obj != NULL)
1175 // Call prepare as early as possible for "new" barrier
1176 __kmp_itt_task_finished(itt_sync_obj);
1177 } else
Jim Cownie5f037e12014-10-07 16:25:50 +00001178#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1179 // Early exit for reaping threads releasing forkjoin barrier
1180 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001181 return;
Jim Cownie5f037e12014-10-07 16:25:50 +00001182
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001183 // The worker thread may now assume that the team is valid.
1184 team = __kmp_threads[gtid]->th.th_team;
1185 KMP_DEBUG_ASSERT(team != NULL);
1186 tid = __kmp_tid_from_gtid(gtid);
Jim Cownie5f037e12014-10-07 16:25:50 +00001187
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001188 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
1189 KA_TRACE(20,
1190 ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1191 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
1192 KMP_MB(); // Flush all pending memory write invalidates.
1193 }
1194 num_threads = this_thr->th.th_team_nproc;
1195 other_threads = team->t.t_threads;
Jim Cownie5f037e12014-10-07 16:25:50 +00001196
1197#ifdef KMP_REVERSE_HYPER_BAR
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001198 // Count up to correct level for parent
1199 for (level = 0, offset = 1;
1200 offset < num_threads && (((tid >> level) & (branch_factor - 1)) == 0);
1201 level += branch_bits, offset <<= branch_bits)
1202 ;
Jim Cownie5f037e12014-10-07 16:25:50 +00001203
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001204 // Now go down from there
1205 for (level -= branch_bits, offset >>= branch_bits; offset != 0;
1206 level -= branch_bits, offset >>= branch_bits)
Jim Cownie5f037e12014-10-07 16:25:50 +00001207#else
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001208 // Go down the tree, level by level
1209 for (level = 0, offset = 1; offset < num_threads;
1210 level += branch_bits, offset <<= branch_bits)
1211#endif // KMP_REVERSE_HYPER_BAR
1212 {
1213#ifdef KMP_REVERSE_HYPER_BAR
1214 /* Now go in reverse order through the children, highest to lowest.
1215 Initial setting of child is conservative here. */
1216 child = num_threads >> ((level == 0) ? level : level - 1);
1217 for (child = (child < branch_factor - 1) ? child : branch_factor - 1,
1218 child_tid = tid + (child << level);
1219 child >= 1; child--, child_tid -= (1 << level))
1220#else
1221 if (((tid >> level) & (branch_factor - 1)) != 0)
1222 // No need to go lower than this, since this is the level parent would be
1223 // notified
1224 break;
1225 // Iterate through children on this level of the tree
1226 for (child = 1, child_tid = tid + (1 << level);
1227 child < branch_factor && child_tid < num_threads;
1228 child++, child_tid += (1 << level))
Jim Cownie5f037e12014-10-07 16:25:50 +00001229#endif // KMP_REVERSE_HYPER_BAR
1230 {
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001231 if (child_tid >= num_threads)
1232 continue; // Child doesn't exist so keep going
1233 else {
Ed Mastee632f142017-07-07 21:06:05 +00001234 kmp_info_t *child_thr = other_threads[child_tid];
1235 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jim Cownie5f037e12014-10-07 16:25:50 +00001236#if KMP_CACHE_MANAGE
Ed Mastee632f142017-07-07 21:06:05 +00001237 kmp_uint32 next_child_tid = child_tid - (1 << level);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001238// Prefetch next thread's go count
1239#ifdef KMP_REVERSE_HYPER_BAR
1240 if (child - 1 >= 1 && next_child_tid < num_threads)
1241#else
1242 if (child + 1 < branch_factor && next_child_tid < num_threads)
1243#endif // KMP_REVERSE_HYPER_BAR
1244 KMP_CACHE_PREFETCH(
1245 &other_threads[next_child_tid]->th.th_bar[bt].bb.b_go);
Jim Cownie5f037e12014-10-07 16:25:50 +00001246#endif /* KMP_CACHE_MANAGE */
1247
1248#if KMP_BARRIER_ICV_PUSH
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001249 if (propagate_icvs) // push my fixed ICVs to my child
1250 copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
Jim Cownie5f037e12014-10-07 16:25:50 +00001251#endif // KMP_BARRIER_ICV_PUSH
1252
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001253 KA_TRACE(
1254 20,
1255 ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
1256 "go(%p): %u => %u\n",
1257 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1258 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
1259 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1260 // Release child from barrier
Terry Wilmarth71399132020-12-01 14:03:40 -06001261 kmp_flag_64<> flag(&child_bar->b_go, child_thr);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001262 flag.release();
1263 }
Jim Cownie5f037e12014-10-07 16:25:50 +00001264 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001265 }
Jim Cownie5f037e12014-10-07 16:25:50 +00001266#if KMP_BARRIER_ICV_PUSH
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001267 if (propagate_icvs &&
1268 !KMP_MASTER_TID(tid)) { // copy ICVs locally to final dest
1269 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
1270 FALSE);
1271 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1272 &thr_bar->th_fixed_icvs);
1273 }
Jim Cownie5f037e12014-10-07 16:25:50 +00001274#endif
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001275 KA_TRACE(
1276 20,
1277 ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
1278 gtid, team->t.t_id, tid, bt));
Jim Cownie5f037e12014-10-07 16:25:50 +00001279}
1280
1281// Hierarchical Barrier
1282
1283// Initialize thread barrier data
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001284/* Initializes/re-initializes the hierarchical barrier data stored on a thread.
1285 Performs the minimum amount of initialization required based on how the team
1286 has changed. Returns true if leaf children will require both on-core and
1287 traditional wake-up mechanisms. For example, if the team size increases,
1288 threads already in the team will respond to on-core wakeup on their parent
1289 thread, but threads newly added to the team will only be listening on the
1290 their local b_go. */
1291static bool __kmp_init_hierarchical_barrier_thread(enum barrier_type bt,
1292 kmp_bstate_t *thr_bar,
1293 kmp_uint32 nproc, int gtid,
1294 int tid, kmp_team_t *team) {
1295 // Checks to determine if (re-)initialization is needed
1296 bool uninitialized = thr_bar->team == NULL;
1297 bool team_changed = team != thr_bar->team;
1298 bool team_sz_changed = nproc != thr_bar->nproc;
1299 bool tid_changed = tid != thr_bar->old_tid;
1300 bool retval = false;
Jim Cownie5f037e12014-10-07 16:25:50 +00001301
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001302 if (uninitialized || team_sz_changed) {
1303 __kmp_get_hierarchy(nproc, thr_bar);
1304 }
Jim Cownie5f037e12014-10-07 16:25:50 +00001305
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001306 if (uninitialized || team_sz_changed || tid_changed) {
tlwilmaracdce612021-03-05 09:21:39 -06001307 thr_bar->my_level = thr_bar->depth - 1; // default for primary thread
1308 thr_bar->parent_tid = -1; // default for primary thread
1309 if (!KMP_MASTER_TID(tid)) {
1310 // if not primary thread, find parent thread in hierarchy
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001311 kmp_uint32 d = 0;
1312 while (d < thr_bar->depth) { // find parent based on level of thread in
1313 // hierarchy, and note level
1314 kmp_uint32 rem;
tlwilmaracdce612021-03-05 09:21:39 -06001315 if (d == thr_bar->depth - 2) { // reached level right below the primary
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001316 thr_bar->parent_tid = 0;
1317 thr_bar->my_level = d;
1318 break;
Terry Wilmarth8f222cb2021-01-13 10:09:39 -06001319 } else if ((rem = tid % thr_bar->skip_per_level[d + 1]) != 0) {
1320 // TODO: can we make the above op faster?
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001321 // thread is not a subtree root at next level, so this is max
1322 thr_bar->parent_tid = tid - rem;
1323 thr_bar->my_level = d;
1324 break;
Jim Cownie5f037e12014-10-07 16:25:50 +00001325 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001326 ++d;
1327 }
Jim Cownie5f037e12014-10-07 16:25:50 +00001328 }
Terry Wilmarth8f222cb2021-01-13 10:09:39 -06001329 __kmp_type_convert(7 - ((tid - thr_bar->parent_tid) /
1330 (thr_bar->skip_per_level[thr_bar->my_level])),
1331 &(thr_bar->offset));
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001332 thr_bar->old_tid = tid;
1333 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
1334 thr_bar->team = team;
1335 thr_bar->parent_bar =
1336 &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
1337 }
1338 if (uninitialized || team_changed || tid_changed) {
1339 thr_bar->team = team;
1340 thr_bar->parent_bar =
1341 &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
1342 retval = true;
1343 }
1344 if (uninitialized || team_sz_changed || tid_changed) {
1345 thr_bar->nproc = nproc;
1346 thr_bar->leaf_kids = thr_bar->base_leaf_kids;
1347 if (thr_bar->my_level == 0)
1348 thr_bar->leaf_kids = 0;
1349 if (thr_bar->leaf_kids && (kmp_uint32)tid + thr_bar->leaf_kids + 1 > nproc)
Terry Wilmarthc3686782020-12-31 00:39:48 +03001350 __kmp_type_convert(nproc - tid - 1, &(thr_bar->leaf_kids));
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001351 thr_bar->leaf_state = 0;
1352 for (int i = 0; i < thr_bar->leaf_kids; ++i)
1353 ((char *)&(thr_bar->leaf_state))[7 - i] = 1;
1354 }
1355 return retval;
Jim Cownie5f037e12014-10-07 16:25:50 +00001356}
1357
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001358static void __kmp_hierarchical_barrier_gather(
1359 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
1360 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
1361 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather);
Ed Mastee632f142017-07-07 21:06:05 +00001362 kmp_team_t *team = this_thr->th.th_team;
1363 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
1364 kmp_uint32 nproc = this_thr->th.th_team_nproc;
1365 kmp_info_t **other_threads = team->t.t_threads;
Hansang Baea3918ee2021-03-11 17:34:06 -06001366 kmp_uint64 new_state = 0;
Jim Cownie5f037e12014-10-07 16:25:50 +00001367
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001368 int level = team->t.t_level;
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001369 if (other_threads[0]
1370 ->th.th_teams_microtask) // are we inside the teams construct?
1371 if (this_thr->th.th_teams_size.nteams > 1)
1372 ++level; // level was not increased in teams construct for team_of_masters
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001373 if (level == 1)
1374 thr_bar->use_oncore_barrier = 1;
1375 else
1376 thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested
Jim Cownie5f037e12014-10-07 16:25:50 +00001377
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001378 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for "
1379 "barrier type %d\n",
1380 gtid, team->t.t_id, tid, bt));
1381 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
Jim Cownie5f037e12014-10-07 16:25:50 +00001382
Andrey Churbanov6a1be002015-05-06 18:34:15 +00001383#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001384 // Barrier imbalance - save arrive time to the thread
1385 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
1386 this_thr->th.th_bar_arrive_time = __itt_get_timestamp();
1387 }
Andrey Churbanov6a1be002015-05-06 18:34:15 +00001388#endif
1389
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001390 (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid,
1391 team);
Jim Cownie5f037e12014-10-07 16:25:50 +00001392
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001393 if (thr_bar->my_level) { // not a leaf (my_level==0 means leaf)
Ed Mastee632f142017-07-07 21:06:05 +00001394 kmp_int32 child_tid;
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001395 new_state =
1396 (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
1397 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
1398 thr_bar->use_oncore_barrier) {
Andrey Churbanovb87d16a2017-07-17 09:03:14 +00001399 if (thr_bar->leaf_kids) {
1400 // First, wait for leaf children to check-in on my b_arrived flag
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001401 kmp_uint64 leaf_state =
1402 KMP_MASTER_TID(tid)
1403 ? thr_bar->b_arrived | thr_bar->leaf_state
1404 : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state;
1405 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "
1406 "for leaf kids\n",
1407 gtid, team->t.t_id, tid));
Terry Wilmarth71399132020-12-01 14:03:40 -06001408 kmp_flag_64<> flag(&thr_bar->b_arrived, leaf_state);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001409 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1410 if (reduce) {
protze@itc.rwth-aachen.de5210bae2019-12-27 14:39:50 +01001411 OMPT_REDUCTION_DECL(this_thr, gtid);
1412 OMPT_REDUCTION_BEGIN;
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001413 for (child_tid = tid + 1; child_tid <= tid + thr_bar->leaf_kids;
1414 ++child_tid) {
1415 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
1416 "T#%d(%d:%d)\n",
1417 gtid, team->t.t_id, tid,
1418 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1419 child_tid));
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001420 (*reduce)(this_thr->th.th_local.reduce_data,
1421 other_threads[child_tid]->th.th_local.reduce_data);
1422 }
protze@itc.rwth-aachen.de5210bae2019-12-27 14:39:50 +01001423 OMPT_REDUCTION_END;
Jim Cownie5f037e12014-10-07 16:25:50 +00001424 }
Andrey Churbanov2356d3d2017-07-03 11:24:08 +00001425 // clear leaf_state bits
Andrey Churbanovb87d16a2017-07-17 09:03:14 +00001426 KMP_TEST_THEN_AND64(&thr_bar->b_arrived, ~(thr_bar->leaf_state));
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001427 }
1428 // Next, wait for higher level children on each child's b_arrived flag
1429 for (kmp_uint32 d = 1; d < thr_bar->my_level;
1430 ++d) { // gather lowest level threads first, but skip 0
1431 kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
1432 skip = thr_bar->skip_per_level[d];
1433 if (last > nproc)
1434 last = nproc;
1435 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
Ed Mastee632f142017-07-07 21:06:05 +00001436 kmp_info_t *child_thr = other_threads[child_tid];
1437 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001438 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
1439 "T#%d(%d:%d) "
1440 "arrived(%p) == %llu\n",
1441 gtid, team->t.t_id, tid,
1442 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1443 child_tid, &child_bar->b_arrived, new_state));
Terry Wilmarth71399132020-12-01 14:03:40 -06001444 kmp_flag_64<> flag(&child_bar->b_arrived, new_state);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001445 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001446 if (reduce) {
1447 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
1448 "T#%d(%d:%d)\n",
1449 gtid, team->t.t_id, tid,
1450 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1451 child_tid));
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001452 (*reduce)(this_thr->th.th_local.reduce_data,
1453 child_thr->th.th_local.reduce_data);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001454 }
Jim Cownie5f037e12014-10-07 16:25:50 +00001455 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001456 }
1457 } else { // Blocktime is not infinite
1458 for (kmp_uint32 d = 0; d < thr_bar->my_level;
1459 ++d) { // Gather lowest level threads first
1460 kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
1461 skip = thr_bar->skip_per_level[d];
1462 if (last > nproc)
1463 last = nproc;
1464 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
Ed Mastee632f142017-07-07 21:06:05 +00001465 kmp_info_t *child_thr = other_threads[child_tid];
1466 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001467 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
1468 "T#%d(%d:%d) "
1469 "arrived(%p) == %llu\n",
1470 gtid, team->t.t_id, tid,
1471 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1472 child_tid, &child_bar->b_arrived, new_state));
Terry Wilmarth71399132020-12-01 14:03:40 -06001473 kmp_flag_64<> flag(&child_bar->b_arrived, new_state);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001474 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001475 if (reduce) {
1476 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
1477 "T#%d(%d:%d)\n",
1478 gtid, team->t.t_id, tid,
1479 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1480 child_tid));
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001481 (*reduce)(this_thr->th.th_local.reduce_data,
1482 child_thr->th.th_local.reduce_data);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001483 }
1484 }
1485 }
Jim Cownie5f037e12014-10-07 16:25:50 +00001486 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001487 }
tlwilmaracdce612021-03-05 09:21:39 -06001488 // All subordinates are gathered; now release parent if not primary thread
Jim Cownie5f037e12014-10-07 16:25:50 +00001489
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001490 if (!KMP_MASTER_TID(tid)) { // worker threads release parent in hierarchy
Jonathan Peytonb2603202018-07-09 17:36:22 +00001491 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"
1492 " T#%d(%d:%d) arrived(%p): %llu => %llu\n",
1493 gtid, team->t.t_id, tid,
1494 __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id,
1495 thr_bar->parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,
1496 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001497 /* Mark arrival to parent: After performing this write, a worker thread may
1498 not assume that the team is valid any more - it could be deallocated by
tlwilmaracdce612021-03-05 09:21:39 -06001499 the primary thread at any time. */
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001500 if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
1501 !thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived
1502 // flag; release it
Terry Wilmarth71399132020-12-01 14:03:40 -06001503 kmp_flag_64<> flag(&thr_bar->b_arrived,
1504 other_threads[thr_bar->parent_tid]);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001505 flag.release();
Jonathan Peytonb2603202018-07-09 17:36:22 +00001506 } else {
1507 // Leaf does special release on "offset" bits of parent's b_arrived flag
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001508 thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
Terry Wilmarth8f222cb2021-01-13 10:09:39 -06001509 kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived,
1510 thr_bar->offset + 1);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001511 flag.set_waiter(other_threads[thr_bar->parent_tid]);
1512 flag.release();
Jim Cownie5f037e12014-10-07 16:25:50 +00001513 }
tlwilmaracdce612021-03-05 09:21:39 -06001514 } else { // Primary thread needs to update the team's b_arrived value
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001515 team->t.t_bar[bt].b_arrived = new_state;
1516 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "
1517 "arrived(%p) = %llu\n",
1518 gtid, team->t.t_id, tid, team->t.t_id,
1519 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
1520 }
1521 // Is the team access below unsafe or just technically invalid?
1522 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for "
1523 "barrier type %d\n",
1524 gtid, team->t.t_id, tid, bt));
Jim Cownie5f037e12014-10-07 16:25:50 +00001525}
1526
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001527static void __kmp_hierarchical_barrier_release(
1528 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
1529 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
1530 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release);
Ed Mastee632f142017-07-07 21:06:05 +00001531 kmp_team_t *team;
1532 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
1533 kmp_uint32 nproc;
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001534 bool team_change = false; // indicates on-core barrier shouldn't be used
Jim Cownie5f037e12014-10-07 16:25:50 +00001535
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001536 if (KMP_MASTER_TID(tid)) {
1537 team = __kmp_threads[gtid]->th.th_team;
1538 KMP_DEBUG_ASSERT(team != NULL);
tlwilmaracdce612021-03-05 09:21:39 -06001539 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) primary "
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001540 "entered barrier type %d\n",
1541 gtid, team->t.t_id, tid, bt));
1542 } else { // Worker threads
1543 // Wait for parent thread to release me
1544 if (!thr_bar->use_oncore_barrier ||
1545 __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME || thr_bar->my_level != 0 ||
1546 thr_bar->team == NULL) {
1547 // Use traditional method of waiting on my own b_go flag
1548 thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG;
Terry Wilmarth71399132020-12-01 14:03:40 -06001549 kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001550 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001551 TCW_8(thr_bar->b_go,
1552 KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
1553 } else { // Thread barrier data is initialized, this is a leaf, blocktime is
1554 // infinite, not nested
1555 // Wait on my "offset" bits on parent's b_go flag
1556 thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG;
1557 kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP,
Terry Wilmarth8f222cb2021-01-13 10:09:39 -06001558 thr_bar->offset + 1, bt,
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001559 this_thr USE_ITT_BUILD_ARG(itt_sync_obj));
1560 flag.wait(this_thr, TRUE);
1561 if (thr_bar->wait_flag ==
1562 KMP_BARRIER_SWITCHING) { // Thread was switched to own b_go
1563 TCW_8(thr_bar->b_go,
1564 KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
1565 } else { // Reset my bits on parent's b_go flag
Andrey Churbanov2356d3d2017-07-03 11:24:08 +00001566 (RCAST(volatile char *,
Terry Wilmarth8f222cb2021-01-13 10:09:39 -06001567 &(thr_bar->parent_bar->b_go)))[thr_bar->offset + 1] = 0;
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001568 }
Jim Cownie5f037e12014-10-07 16:25:50 +00001569 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001570 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
1571 // Early exit for reaping threads releasing forkjoin barrier
1572 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
1573 return;
1574 // The worker thread may now assume that the team is valid.
1575 team = __kmp_threads[gtid]->th.th_team;
1576 KMP_DEBUG_ASSERT(team != NULL);
1577 tid = __kmp_tid_from_gtid(gtid);
Jim Cownie5f037e12014-10-07 16:25:50 +00001578
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001579 KA_TRACE(
1580 20,
1581 ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1582 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
1583 KMP_MB(); // Flush all pending memory write invalidates.
1584 }
Jim Cownie5f037e12014-10-07 16:25:50 +00001585
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001586 nproc = this_thr->th.th_team_nproc;
1587 int level = team->t.t_level;
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001588 if (team->t.t_threads[0]
1589 ->th.th_teams_microtask) { // are we inside the teams construct?
1590 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1591 this_thr->th.th_teams_level == level)
1592 ++level; // level was not increased in teams construct for team_of_workers
1593 if (this_thr->th.th_teams_size.nteams > 1)
1594 ++level; // level was not increased in teams construct for team_of_masters
1595 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001596 if (level == 1)
1597 thr_bar->use_oncore_barrier = 1;
1598 else
1599 thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested
Jim Cownie5f037e12014-10-07 16:25:50 +00001600
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001601 // If the team size has increased, we still communicate with old leaves via
1602 // oncore barrier.
1603 unsigned short int old_leaf_kids = thr_bar->leaf_kids;
1604 kmp_uint64 old_leaf_state = thr_bar->leaf_state;
1605 team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid,
1606 tid, team);
1607 // But if the entire team changes, we won't use oncore barrier at all
1608 if (team_change)
1609 old_leaf_kids = 0;
Jim Cownie5f037e12014-10-07 16:25:50 +00001610
1611#if KMP_BARRIER_ICV_PUSH
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001612 if (propagate_icvs) {
1613 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
1614 FALSE);
1615 if (KMP_MASTER_TID(
tlwilmaracdce612021-03-05 09:21:39 -06001616 tid)) { // primary already has copy in final destination; copy
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001617 copy_icvs(&thr_bar->th_fixed_icvs,
1618 &team->t.t_implicit_task_taskdata[tid].td_icvs);
1619 } else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
1620 thr_bar->use_oncore_barrier) { // optimization for inf blocktime
1621 if (!thr_bar->my_level) // I'm a leaf in the hierarchy (my_level==0)
1622 // leaves (on-core children) pull parent's fixed ICVs directly to local
1623 // ICV store
1624 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1625 &thr_bar->parent_bar->th_fixed_icvs);
1626 // non-leaves will get ICVs piggybacked with b_go via NGO store
1627 } else { // blocktime is not infinite; pull ICVs from parent's fixed ICVs
1628 if (thr_bar->my_level) // not a leaf; copy ICVs to my fixed ICVs child can
1629 // access
1630 copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs);
1631 else // leaves copy parent's fixed ICVs directly to local ICV store
1632 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1633 &thr_bar->parent_bar->th_fixed_icvs);
Jim Cownie5f037e12014-10-07 16:25:50 +00001634 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001635 }
Jim Cownie5f037e12014-10-07 16:25:50 +00001636#endif // KMP_BARRIER_ICV_PUSH
1637
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001638 // Now, release my children
1639 if (thr_bar->my_level) { // not a leaf
Ed Mastee632f142017-07-07 21:06:05 +00001640 kmp_int32 child_tid;
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001641 kmp_uint32 last;
1642 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
1643 thr_bar->use_oncore_barrier) {
1644 if (KMP_MASTER_TID(tid)) { // do a flat release
1645 // Set local b_go to bump children via NGO store of the cache line
1646 // containing IVCs and b_go.
1647 thr_bar->b_go = KMP_BARRIER_STATE_BUMP;
1648 // Use ngo stores if available; b_go piggybacks in the last 8 bytes of
1649 // the cache line
1650 ngo_load(&thr_bar->th_fixed_icvs);
1651 // This loops over all the threads skipping only the leaf nodes in the
1652 // hierarchy
1653 for (child_tid = thr_bar->skip_per_level[1]; child_tid < (int)nproc;
1654 child_tid += thr_bar->skip_per_level[1]) {
Ed Mastee632f142017-07-07 21:06:05 +00001655 kmp_bstate_t *child_bar =
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001656 &team->t.t_threads[child_tid]->th.th_bar[bt].bb;
1657 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
1658 "releasing T#%d(%d:%d)"
1659 " go(%p): %u => %u\n",
1660 gtid, team->t.t_id, tid,
1661 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1662 child_tid, &child_bar->b_go, child_bar->b_go,
1663 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1664 // Use ngo store (if available) to both store ICVs and release child
1665 // via child's b_go
1666 ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
Jim Cownie5f037e12014-10-07 16:25:50 +00001667 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001668 ngo_sync();
1669 }
1670 TCW_8(thr_bar->b_go,
1671 KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
1672 // Now, release leaf children
1673 if (thr_bar->leaf_kids) { // if there are any
1674 // We test team_change on the off-chance that the level 1 team changed.
1675 if (team_change ||
1676 old_leaf_kids < thr_bar->leaf_kids) { // some old, some new
1677 if (old_leaf_kids) { // release old leaf kids
1678 thr_bar->b_go |= old_leaf_state;
1679 }
1680 // Release new leaf kids
1681 last = tid + thr_bar->skip_per_level[1];
1682 if (last > nproc)
1683 last = nproc;
1684 for (child_tid = tid + 1 + old_leaf_kids; child_tid < (int)last;
1685 ++child_tid) { // skip_per_level[0]=1
Ed Mastee632f142017-07-07 21:06:05 +00001686 kmp_info_t *child_thr = team->t.t_threads[child_tid];
1687 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001688 KA_TRACE(
1689 20,
1690 ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
1691 " T#%d(%d:%d) go(%p): %u => %u\n",
1692 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1693 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
1694 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1695 // Release child using child's b_go flag
Terry Wilmarth71399132020-12-01 14:03:40 -06001696 kmp_flag_64<> flag(&child_bar->b_go, child_thr);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001697 flag.release();
1698 }
1699 } else { // Release all children at once with leaf_state bits on my own
1700 // b_go flag
1701 thr_bar->b_go |= thr_bar->leaf_state;
Jim Cownie5f037e12014-10-07 16:25:50 +00001702 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001703 }
1704 } else { // Blocktime is not infinite; do a simple hierarchical release
1705 for (int d = thr_bar->my_level - 1; d >= 0;
1706 --d) { // Release highest level threads first
1707 last = tid + thr_bar->skip_per_level[d + 1];
1708 kmp_uint32 skip = thr_bar->skip_per_level[d];
1709 if (last > nproc)
1710 last = nproc;
1711 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
Ed Mastee632f142017-07-07 21:06:05 +00001712 kmp_info_t *child_thr = team->t.t_threads[child_tid];
1713 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001714 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
1715 "releasing T#%d(%d:%d) go(%p): %u => %u\n",
1716 gtid, team->t.t_id, tid,
1717 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1718 child_tid, &child_bar->b_go, child_bar->b_go,
1719 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1720 // Release child using child's b_go flag
Terry Wilmarth71399132020-12-01 14:03:40 -06001721 kmp_flag_64<> flag(&child_bar->b_go, child_thr);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001722 flag.release();
1723 }
1724 }
Jim Cownie5f037e12014-10-07 16:25:50 +00001725 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001726#if KMP_BARRIER_ICV_PUSH
1727 if (propagate_icvs && !KMP_MASTER_TID(tid))
1728 // non-leaves copy ICVs from fixed ICVs to local dest
1729 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1730 &thr_bar->th_fixed_icvs);
1731#endif // KMP_BARRIER_ICV_PUSH
1732 }
1733 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for "
1734 "barrier type %d\n",
1735 gtid, team->t.t_id, tid, bt));
Jim Cownie5f037e12014-10-07 16:25:50 +00001736}
1737
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001738// End of Barrier Algorithms
Jim Cownie5f037e12014-10-07 16:25:50 +00001739
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +00001740// type traits for cancellable value
1741// if cancellable is true, then is_cancellable is a normal boolean variable
1742// if cancellable is false, then is_cancellable is a compile time constant
1743template <bool cancellable> struct is_cancellable {};
1744template <> struct is_cancellable<true> {
1745 bool value;
1746 is_cancellable() : value(false) {}
1747 is_cancellable(bool b) : value(b) {}
1748 is_cancellable &operator=(bool b) {
1749 value = b;
1750 return *this;
1751 }
1752 operator bool() const { return value; }
1753};
1754template <> struct is_cancellable<false> {
1755 is_cancellable &operator=(bool b) { return *this; }
1756 constexpr operator bool() const { return false; }
1757};
1758
Jim Cownie5f037e12014-10-07 16:25:50 +00001759// Internal function to do a barrier.
1760/* If is_split is true, do a split barrier, otherwise, do a plain barrier
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001761 If reduce is non-NULL, do a split reduction barrier, otherwise, do a split
1762 barrier
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +00001763 When cancellable = false,
tlwilmaracdce612021-03-05 09:21:39 -06001764 Returns 0 if primary thread, 1 if worker thread.
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +00001765 When cancellable = true
1766 Returns 0 if not cancelled, 1 if cancelled. */
1767template <bool cancellable = false>
1768static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split,
1769 size_t reduce_size, void *reduce_data,
1770 void (*reduce)(void *, void *)) {
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001771 KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier);
1772 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
Ed Mastee632f142017-07-07 21:06:05 +00001773 int tid = __kmp_tid_from_gtid(gtid);
1774 kmp_info_t *this_thr = __kmp_threads[gtid];
1775 kmp_team_t *team = this_thr->th.th_team;
1776 int status = 0;
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +00001777 is_cancellable<cancellable> cancelled;
Joachim Protze62a68bd2018-09-10 14:34:54 +00001778#if OMPT_SUPPORT && OMPT_OPTIONAL
Joachim Protze6caf9352017-11-01 10:08:30 +00001779 ompt_data_t *my_task_data;
1780 ompt_data_t *my_parallel_data;
1781 void *return_address;
Jonathan Peyton6f8ffce2019-02-28 20:55:39 +00001782 ompt_sync_region_t barrier_kind;
Andrey Churbanov82ccfaa2015-04-29 16:42:24 +00001783#endif
Jim Cownie5f037e12014-10-07 16:25:50 +00001784
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001785 KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n", gtid,
1786 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
Jim Cownie5f037e12014-10-07 16:25:50 +00001787
Jonathan Peyton9e4474d2015-06-29 17:28:57 +00001788#if OMPT_SUPPORT
Joachim Protze6caf9352017-11-01 10:08:30 +00001789 if (ompt_enabled.enabled) {
1790#if OMPT_OPTIONAL
1791 my_task_data = OMPT_CUR_TASK_DATA(this_thr);
1792 my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
1793 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
Jonathan Peyton6f8ffce2019-02-28 20:55:39 +00001794 barrier_kind = __ompt_get_barrier_kind(bt, this_thr);
Joachim Protze6caf9352017-11-01 10:08:30 +00001795 if (ompt_enabled.ompt_callback_sync_region) {
1796 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
Jonathan Peyton6f8ffce2019-02-28 20:55:39 +00001797 barrier_kind, ompt_scope_begin, my_parallel_data, my_task_data,
1798 return_address);
Andrey Churbanov82ccfaa2015-04-29 16:42:24 +00001799 }
Joachim Protze6caf9352017-11-01 10:08:30 +00001800 if (ompt_enabled.ompt_callback_sync_region_wait) {
1801 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
Jonathan Peyton6f8ffce2019-02-28 20:55:39 +00001802 barrier_kind, ompt_scope_begin, my_parallel_data, my_task_data,
1803 return_address);
Andrey Churbanov82ccfaa2015-04-29 16:42:24 +00001804 }
Jonathan Peyton9e4474d2015-06-29 17:28:57 +00001805#endif
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001806 // It is OK to report the barrier state after the barrier begin callback.
1807 // According to the OMPT specification, a compliant implementation may
1808 // even delay reporting this state until the barrier begins to wait.
Hansang Bae3a33e252024-07-16 09:52:20 -05001809 auto *ompt_thr_info = &this_thr->th.ompt_thread_info;
1810 switch (barrier_kind) {
1811 case ompt_sync_region_barrier_explicit:
1812 ompt_thr_info->state = ompt_state_wait_barrier_explicit;
1813 break;
1814 case ompt_sync_region_barrier_implicit_workshare:
1815 ompt_thr_info->state = ompt_state_wait_barrier_implicit_workshare;
1816 break;
1817 case ompt_sync_region_barrier_implicit_parallel:
1818 ompt_thr_info->state = ompt_state_wait_barrier_implicit_parallel;
1819 break;
1820 case ompt_sync_region_barrier_teams:
1821 ompt_thr_info->state = ompt_state_wait_barrier_teams;
1822 break;
1823 case ompt_sync_region_barrier_implementation:
1824 [[fallthrough]];
1825 default:
1826 ompt_thr_info->state = ompt_state_wait_barrier_implementation;
1827 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001828 }
Andrey Churbanov82ccfaa2015-04-29 16:42:24 +00001829#endif
1830
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001831 if (!team->t.t_serialized) {
1832#if USE_ITT_BUILD
1833 // This value will be used in itt notify events below.
1834 void *itt_sync_obj = NULL;
1835#if USE_ITT_NOTIFY
1836 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1837 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1838#endif
1839#endif /* USE_ITT_BUILD */
Jim Cownie5f037e12014-10-07 16:25:50 +00001840 if (__kmp_tasking_mode == tskm_extra_barrier) {
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001841 __kmp_tasking_barrier(team, this_thr, gtid);
1842 KA_TRACE(15,
1843 ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n", gtid,
1844 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
Jim Cownie5f037e12014-10-07 16:25:50 +00001845 }
Jim Cownie5f037e12014-10-07 16:25:50 +00001846
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001847 /* Copy the blocktime info to the thread, where __kmp_wait_template() can
1848 access it when the team struct is not guaranteed to exist. */
1849 // See note about the corresponding code in __kmp_join_barrier() being
1850 // performance-critical.
Jim Cownie5f037e12014-10-07 16:25:50 +00001851 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
Jonathan Peyton786babf2016-10-07 18:12:19 +00001852#if KMP_USE_MONITOR
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001853 this_thr->th.th_team_bt_intervals =
1854 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1855 this_thr->th.th_team_bt_set =
1856 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
Jonathan Peytond5c70582017-01-27 17:54:31 +00001857#else
Jonathan Peyton52430c62017-09-05 15:45:48 +00001858 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
Jonathan Peytond5c70582017-01-27 17:54:31 +00001859#endif
Jim Cownie5f037e12014-10-07 16:25:50 +00001860 }
1861
1862#if USE_ITT_BUILD
1863 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001864 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
Jim Cownie5f037e12014-10-07 16:25:50 +00001865#endif /* USE_ITT_BUILD */
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001866#if USE_DEBUGGER
1867 // Let the debugger know: the thread arrived to the barrier and waiting.
tlwilmaracdce612021-03-05 09:21:39 -06001868 if (KMP_MASTER_TID(tid)) { // Primary thread counter stored in team struct
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001869 team->t.t_bar[bt].b_master_arrived += 1;
1870 } else {
1871 this_thr->th.th_bar[bt].bb.b_worker_arrived += 1;
1872 } // if
1873#endif /* USE_DEBUGGER */
1874 if (reduce != NULL) {
1875 // KMP_DEBUG_ASSERT( is_split == TRUE ); // #C69956
1876 this_thr->th.th_local.reduce_data = reduce_data;
1877 }
Jim Cownie5f037e12014-10-07 16:25:50 +00001878
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001879 if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec)
Jonathan Peytona64c7e62024-05-07 08:41:51 -05001880 __kmp_task_team_setup(this_thr, team);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001881
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +00001882 if (cancellable) {
1883 cancelled = __kmp_linear_barrier_gather_cancellable(
1884 bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1885 } else {
1886 switch (__kmp_barrier_gather_pattern[bt]) {
Terry Wilmarth70439502021-07-15 10:28:47 -05001887 case bp_dist_bar: {
1888 __kmp_dist_barrier_gather(bt, this_thr, gtid, tid,
1889 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1890 break;
1891 }
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +00001892 case bp_hyper_bar: {
1893 // don't set branch bits to 0; use linear
1894 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
1895 __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid,
1896 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1897 break;
1898 }
1899 case bp_hierarchical_bar: {
1900 __kmp_hierarchical_barrier_gather(
1901 bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1902 break;
1903 }
1904 case bp_tree_bar: {
1905 // don't set branch bits to 0; use linear
1906 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
1907 __kmp_tree_barrier_gather(bt, this_thr, gtid, tid,
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001908 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +00001909 break;
1910 }
1911 default: {
1912 __kmp_linear_barrier_gather(bt, this_thr, gtid, tid,
1913 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1914 }
1915 }
Jim Cownie5f037e12014-10-07 16:25:50 +00001916 }
1917
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001918 KMP_MB();
1919
Jim Cownie5f037e12014-10-07 16:25:50 +00001920 if (KMP_MASTER_TID(tid)) {
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001921 status = 0;
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +00001922 if (__kmp_tasking_mode != tskm_immediate_exec && !cancelled) {
Andrey Churbanov2356d3d2017-07-03 11:24:08 +00001923 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001924 }
1925#if USE_DEBUGGER
1926 // Let the debugger know: All threads are arrived and starting leaving the
1927 // barrier.
1928 team->t.t_bar[bt].b_team_arrived += 1;
1929#endif
1930
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +00001931 if (__kmp_omp_cancellation) {
1932 kmp_int32 cancel_request = KMP_ATOMIC_LD_RLX(&team->t.t_cancel_request);
1933 // Reset cancellation flag for worksharing constructs
1934 if (cancel_request == cancel_loop ||
1935 cancel_request == cancel_sections) {
1936 KMP_ATOMIC_ST_RLX(&team->t.t_cancel_request, cancel_noreq);
1937 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001938 }
Jim Cownie5f037e12014-10-07 16:25:50 +00001939#if USE_ITT_BUILD
tlwilmaracdce612021-03-05 09:21:39 -06001940 /* TODO: In case of split reduction barrier, primary thread may send
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001941 acquired event early, before the final summation into the shared
1942 variable is done (final summation can be a long operation for array
1943 reductions). */
1944 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1945 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1946#endif /* USE_ITT_BUILD */
1947#if USE_ITT_BUILD && USE_ITT_NOTIFY
1948 // Barrier - report frame end (only if active_level == 1)
1949 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
1950 __kmp_forkjoin_frames_mode &&
AndreyChurbanov967ebf12020-07-17 21:10:25 +03001951 (this_thr->th.th_teams_microtask == NULL || // either not in teams
1952 this_thr->th.th_teams_size.nteams == 1) && // or inside single team
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001953 team->t.t_active_level == 1) {
Jonathan Peytone9a23022018-08-09 22:04:30 +00001954 ident_t *loc = __kmp_threads[gtid]->th.th_ident;
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001955 kmp_uint64 cur_time = __itt_get_timestamp();
1956 kmp_info_t **other_threads = team->t.t_threads;
1957 int nproc = this_thr->th.th_team_nproc;
1958 int i;
1959 switch (__kmp_forkjoin_frames_mode) {
1960 case 1:
1961 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1962 loc, nproc);
1963 this_thr->th.th_frame_time = cur_time;
1964 break;
1965 case 2: // AC 2015-01-19: currently does not work for hierarchical (to
1966 // be fixed)
1967 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time,
1968 1, loc, nproc);
1969 break;
1970 case 3:
1971 if (__itt_metadata_add_ptr) {
tlwilmaracdce612021-03-05 09:21:39 -06001972 // Initialize with primary thread's wait time
Jonathan Peytona770c6d2017-05-12 18:01:32 +00001973 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1974 // Set arrive time to zero to be able to check it in
1975 // __kmp_invoke_task(); the same is done inside the loop below
1976 this_thr->th.th_bar_arrive_time = 0;
1977 for (i = 1; i < nproc; ++i) {
1978 delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
1979 other_threads[i]->th.th_bar_arrive_time = 0;
1980 }
1981 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
1982 cur_time, delta,
1983 (kmp_uint64)(reduce != NULL));
1984 }
1985 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1986 loc, nproc);
1987 this_thr->th.th_frame_time = cur_time;
1988 break;
1989 }
1990 }
1991#endif /* USE_ITT_BUILD */
1992 } else {
1993 status = 1;
1994#if USE_ITT_BUILD
1995 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1996 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1997#endif /* USE_ITT_BUILD */
1998 }
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +00001999 if ((status == 1 || !is_split) && !cancelled) {
2000 if (cancellable) {
2001 cancelled = __kmp_linear_barrier_release_cancellable(
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002002 bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +00002003 } else {
2004 switch (__kmp_barrier_release_pattern[bt]) {
Terry Wilmarth70439502021-07-15 10:28:47 -05002005 case bp_dist_bar: {
2006 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
2007 __kmp_dist_barrier_release(bt, this_thr, gtid, tid,
2008 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
2009 break;
2010 }
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +00002011 case bp_hyper_bar: {
2012 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
2013 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
2014 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
2015 break;
2016 }
2017 case bp_hierarchical_bar: {
2018 __kmp_hierarchical_barrier_release(
2019 bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
2020 break;
2021 }
2022 case bp_tree_bar: {
2023 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
2024 __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002025 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +00002026 break;
2027 }
2028 default: {
2029 __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
2030 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
2031 }
2032 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002033 }
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +00002034 if (__kmp_tasking_mode != tskm_immediate_exec && !cancelled) {
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002035 __kmp_task_team_sync(this_thr, team);
2036 }
2037 }
2038
2039#if USE_ITT_BUILD
2040 /* GEH: TODO: Move this under if-condition above and also include in
2041 __kmp_end_split_barrier(). This will more accurately represent the actual
2042 release time of the threads for split barriers. */
2043 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
2044 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
2045#endif /* USE_ITT_BUILD */
2046 } else { // Team is serialized.
2047 status = 0;
2048 if (__kmp_tasking_mode != tskm_immediate_exec) {
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002049 if (this_thr->th.th_task_team != NULL) {
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002050#if USE_ITT_NOTIFY
Jonathan Peytonc90278f2018-08-24 18:03:27 +00002051 void *itt_sync_obj = NULL;
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002052 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
2053 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
2054 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
2055 }
2056#endif
2057
Shilei Tiane34c7a32021-12-29 23:22:37 -05002058 KMP_DEBUG_ASSERT(
2059 this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE ||
2060 this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered ==
2061 TRUE);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002062 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
Jonathan Peytona64c7e62024-05-07 08:41:51 -05002063 __kmp_task_team_setup(this_thr, team);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002064
2065#if USE_ITT_BUILD
Jim Cownie5f037e12014-10-07 16:25:50 +00002066 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002067 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
2068#endif /* USE_ITT_BUILD */
2069 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002070 }
2071 }
2072 KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",
2073 gtid, __kmp_team_from_gtid(gtid)->t.t_id,
2074 __kmp_tid_from_gtid(gtid), status));
2075
2076#if OMPT_SUPPORT
Joachim Protze6caf9352017-11-01 10:08:30 +00002077 if (ompt_enabled.enabled) {
2078#if OMPT_OPTIONAL
2079 if (ompt_enabled.ompt_callback_sync_region_wait) {
2080 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
Jonathan Peyton6f8ffce2019-02-28 20:55:39 +00002081 barrier_kind, ompt_scope_end, my_parallel_data, my_task_data,
2082 return_address);
Joachim Protze6caf9352017-11-01 10:08:30 +00002083 }
2084 if (ompt_enabled.ompt_callback_sync_region) {
2085 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
Jonathan Peyton6f8ffce2019-02-28 20:55:39 +00002086 barrier_kind, ompt_scope_end, my_parallel_data, my_task_data,
2087 return_address);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002088 }
2089#endif
Joachim Protze96ae9972018-12-18 08:52:30 +00002090 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002091 }
2092#endif
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002093
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +00002094 if (cancellable)
2095 return (int)cancelled;
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002096 return status;
2097}
2098
tlwilmaracdce612021-03-05 09:21:39 -06002099// Returns 0 if primary thread, 1 if worker thread.
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +00002100int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
2101 size_t reduce_size, void *reduce_data,
2102 void (*reduce)(void *, void *)) {
2103 return __kmp_barrier_template<>(bt, gtid, is_split, reduce_size, reduce_data,
2104 reduce);
2105}
2106
2107#if defined(KMP_GOMP_COMPAT)
2108// Returns 1 if cancelled, 0 otherwise
2109int __kmp_barrier_gomp_cancel(int gtid) {
2110 if (__kmp_omp_cancellation) {
2111 int cancelled = __kmp_barrier_template<true>(bs_plain_barrier, gtid, FALSE,
2112 0, NULL, NULL);
2113 if (cancelled) {
2114 int tid = __kmp_tid_from_gtid(gtid);
2115 kmp_info_t *this_thr = __kmp_threads[gtid];
2116 if (KMP_MASTER_TID(tid)) {
tlwilmaracdce612021-03-05 09:21:39 -06002117 // Primary thread does not need to revert anything
Jonathan Peyton2d9c27e2019-02-19 18:47:57 +00002118 } else {
2119 // Workers need to revert their private b_arrived flag
2120 this_thr->th.th_bar[bs_plain_barrier].bb.b_arrived -=
2121 KMP_BARRIER_STATE_BUMP;
2122 }
2123 }
2124 return cancelled;
2125 }
2126 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2127 return FALSE;
2128}
2129#endif
2130
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002131void __kmp_end_split_barrier(enum barrier_type bt, int gtid) {
2132 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier);
2133 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
Hansang Baea3918ee2021-03-11 17:34:06 -06002134 KMP_DEBUG_ASSERT(bt < bs_last_barrier);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002135 int tid = __kmp_tid_from_gtid(gtid);
2136 kmp_info_t *this_thr = __kmp_threads[gtid];
2137 kmp_team_t *team = this_thr->th.th_team;
2138
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002139 if (!team->t.t_serialized) {
2140 if (KMP_MASTER_GTID(gtid)) {
2141 switch (__kmp_barrier_release_pattern[bt]) {
Terry Wilmarth70439502021-07-15 10:28:47 -05002142 case bp_dist_bar: {
2143 __kmp_dist_barrier_release(bt, this_thr, gtid, tid,
2144 FALSE USE_ITT_BUILD_ARG(NULL));
2145 break;
2146 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002147 case bp_hyper_bar: {
2148 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
2149 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
2150 FALSE USE_ITT_BUILD_ARG(NULL));
2151 break;
2152 }
2153 case bp_hierarchical_bar: {
2154 __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid,
2155 FALSE USE_ITT_BUILD_ARG(NULL));
2156 break;
2157 }
2158 case bp_tree_bar: {
2159 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
2160 __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
2161 FALSE USE_ITT_BUILD_ARG(NULL));
2162 break;
2163 }
2164 default: {
2165 __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
2166 FALSE USE_ITT_BUILD_ARG(NULL));
2167 }
2168 }
2169 if (__kmp_tasking_mode != tskm_immediate_exec) {
2170 __kmp_task_team_sync(this_thr, team);
2171 } // if
2172 }
2173 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002174}
2175
2176void __kmp_join_barrier(int gtid) {
2177 KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier);
2178 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
Hansang Baea3918ee2021-03-11 17:34:06 -06002179
2180 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
2181
Ed Mastee632f142017-07-07 21:06:05 +00002182 kmp_info_t *this_thr = __kmp_threads[gtid];
2183 kmp_team_t *team;
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002184 int tid;
2185#ifdef KMP_DEBUG
2186 int team_id;
2187#endif /* KMP_DEBUG */
2188#if USE_ITT_BUILD
2189 void *itt_sync_obj = NULL;
2190#if USE_ITT_NOTIFY
2191 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) // Don't call routine without need
2192 // Get object created at fork_barrier
2193 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
2194#endif
2195#endif /* USE_ITT_BUILD */
Jonathan Peytonedc7f6d2022-01-31 10:04:49 -06002196#if ((USE_ITT_BUILD && USE_ITT_NOTIFY) || defined KMP_DEBUG)
2197 int nproc = this_thr->th.th_team_nproc;
2198#endif
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002199 KMP_MB();
2200
2201 // Get current info
2202 team = this_thr->th.th_team;
Jonathan Peytonedc7f6d2022-01-31 10:04:49 -06002203 KMP_DEBUG_ASSERT(nproc == team->t.t_nproc);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002204 tid = __kmp_tid_from_gtid(gtid);
2205#ifdef KMP_DEBUG
2206 team_id = team->t.t_id;
AndreyChurbanovbb76e132021-07-30 17:04:42 +03002207 kmp_info_t *master_thread = this_thr->th.th_team_master;
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002208 if (master_thread != team->t.t_threads[0]) {
2209 __kmp_print_structure();
2210 }
2211#endif /* KMP_DEBUG */
2212 KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0]);
2213 KMP_MB();
2214
2215 // Verify state
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002216 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team));
2217 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root));
2218 KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]);
2219 KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n",
2220 gtid, team_id, tid));
2221
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002222#if OMPT_SUPPORT
Joachim Protze6caf9352017-11-01 10:08:30 +00002223 if (ompt_enabled.enabled) {
2224#if OMPT_OPTIONAL
Joachim Protze62a68bd2018-09-10 14:34:54 +00002225 ompt_data_t *my_task_data;
2226 ompt_data_t *my_parallel_data;
Joachim Protze6caf9352017-11-01 10:08:30 +00002227 void *codeptr = NULL;
2228 int ds_tid = this_thr->th.th_info.ds.ds_tid;
2229 if (KMP_MASTER_TID(ds_tid) &&
2230 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
2231 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
2232 codeptr = team->t.ompt_team_info.master_return_address;
2233 my_task_data = OMPT_CUR_TASK_DATA(this_thr);
2234 my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
Hansang Bae3a33e252024-07-16 09:52:20 -05002235 ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
2236 ompt_state_t ompt_state = ompt_state_wait_barrier_implicit_parallel;
2237 if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league) {
2238 sync_kind = ompt_sync_region_barrier_teams;
2239 ompt_state = ompt_state_wait_barrier_teams;
2240 }
Joachim Protze6caf9352017-11-01 10:08:30 +00002241 if (ompt_enabled.ompt_callback_sync_region) {
2242 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
Hansang Bae3a33e252024-07-16 09:52:20 -05002243 sync_kind, ompt_scope_begin, my_parallel_data, my_task_data, codeptr);
Joachim Protze6caf9352017-11-01 10:08:30 +00002244 }
2245 if (ompt_enabled.ompt_callback_sync_region_wait) {
2246 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
Hansang Bae3a33e252024-07-16 09:52:20 -05002247 sync_kind, ompt_scope_begin, my_parallel_data, my_task_data, codeptr);
Joachim Protze6caf9352017-11-01 10:08:30 +00002248 }
Joachim Protze201c6ca2018-01-10 12:51:27 +00002249 if (!KMP_MASTER_TID(ds_tid))
2250 this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002251#endif
Hansang Bae3a33e252024-07-16 09:52:20 -05002252 this_thr->th.ompt_thread_info.state = ompt_state;
Joachim Protze6caf9352017-11-01 10:08:30 +00002253 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002254#endif
2255
2256 if (__kmp_tasking_mode == tskm_extra_barrier) {
2257 __kmp_tasking_barrier(team, this_thr, gtid);
Terry Wilmarth70439502021-07-15 10:28:47 -05002258 KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past tasking barrier\n",
2259 gtid, team_id, tid));
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002260 }
2261#ifdef KMP_DEBUG
2262 if (__kmp_tasking_mode != tskm_immediate_exec) {
2263 KA_TRACE(20, ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "
2264 "%p, th_task_team = %p\n",
2265 __kmp_gtid_from_thread(this_thr), team_id,
2266 team->t.t_task_team[this_thr->th.th_task_state],
2267 this_thr->th.th_task_team));
Jonathan Peytona64c7e62024-05-07 08:41:51 -05002268 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, this_thr);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002269 }
2270#endif /* KMP_DEBUG */
2271
2272 /* Copy the blocktime info to the thread, where __kmp_wait_template() can
2273 access it when the team struct is not guaranteed to exist. Doing these
2274 loads causes a cache miss slows down EPCC parallel by 2x. As a workaround,
2275 we do not perform the copy if blocktime=infinite, since the values are not
2276 used by __kmp_wait_template() in that case. */
2277 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
2278#if KMP_USE_MONITOR
2279 this_thr->th.th_team_bt_intervals =
2280 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
2281 this_thr->th.th_team_bt_set =
2282 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
2283#else
Jonathan Peyton52430c62017-09-05 15:45:48 +00002284 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002285#endif
2286 }
2287
2288#if USE_ITT_BUILD
2289 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
2290 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
Jim Cownie5f037e12014-10-07 16:25:50 +00002291#endif /* USE_ITT_BUILD */
2292
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002293 switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {
Terry Wilmarth70439502021-07-15 10:28:47 -05002294 case bp_dist_bar: {
2295 __kmp_dist_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
2296 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
2297 break;
2298 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002299 case bp_hyper_bar: {
2300 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
2301 __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
2302 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
2303 break;
2304 }
2305 case bp_hierarchical_bar: {
2306 __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
2307 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
2308 break;
2309 }
2310 case bp_tree_bar: {
2311 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
2312 __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
2313 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
2314 break;
2315 }
2316 default: {
2317 __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
2318 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
2319 }
2320 }
2321
2322 /* From this point on, the team data structure may be deallocated at any time
tlwilmaracdce612021-03-05 09:21:39 -06002323 by the primary thread - it is unsafe to reference it in any of the worker
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002324 threads. Any per-team data items that need to be referenced before the
2325 end of the barrier should be moved to the kmp_task_team_t structs. */
2326 if (KMP_MASTER_TID(tid)) {
2327 if (__kmp_tasking_mode != tskm_immediate_exec) {
2328 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
2329 }
Jonathan Peyton211f7642018-12-13 23:14:24 +00002330 if (__kmp_display_affinity) {
2331 KMP_CHECK_UPDATE(team->t.t_display_affinity, 0);
2332 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002333#if KMP_STATS_ENABLED
tlwilmaracdce612021-03-05 09:21:39 -06002334 // Have primary thread flag the workers to indicate they are now waiting for
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002335 // next parallel region, Also wake them up so they switch their timers to
2336 // idle.
2337 for (int i = 0; i < team->t.t_nproc; ++i) {
2338 kmp_info_t *team_thread = team->t.t_threads[i];
2339 if (team_thread == this_thr)
2340 continue;
2341 team_thread->th.th_stats->setIdleFlag();
2342 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME &&
2343 team_thread->th.th_sleep_loc != NULL)
Terry Wilmarth70439502021-07-15 10:28:47 -05002344 __kmp_null_resume_wrapper(team_thread);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002345 }
Andrey Churbanovfe3c30a2015-05-06 19:22:36 +00002346#endif
Jim Cownie5f037e12014-10-07 16:25:50 +00002347#if USE_ITT_BUILD
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002348 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
2349 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
2350#endif /* USE_ITT_BUILD */
2351
2352#if USE_ITT_BUILD && USE_ITT_NOTIFY
2353 // Join barrier - report frame end
2354 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
AndreyChurbanov967ebf12020-07-17 21:10:25 +03002355 __kmp_forkjoin_frames_mode &&
2356 (this_thr->th.th_teams_microtask == NULL || // either not in teams
2357 this_thr->th.th_teams_size.nteams == 1) && // or inside single team
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002358 team->t.t_active_level == 1) {
2359 kmp_uint64 cur_time = __itt_get_timestamp();
2360 ident_t *loc = team->t.t_ident;
2361 kmp_info_t **other_threads = team->t.t_threads;
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002362 switch (__kmp_forkjoin_frames_mode) {
2363 case 1:
2364 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
2365 loc, nproc);
2366 break;
2367 case 2:
2368 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1,
2369 loc, nproc);
2370 break;
2371 case 3:
2372 if (__itt_metadata_add_ptr) {
tlwilmaracdce612021-03-05 09:21:39 -06002373 // Initialize with primary thread's wait time
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002374 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
2375 // Set arrive time to zero to be able to check it in
2376 // __kmp_invoke_task(); the same is done inside the loop below
2377 this_thr->th.th_bar_arrive_time = 0;
Jonathan Peytonedc7f6d2022-01-31 10:04:49 -06002378 for (int i = 1; i < nproc; ++i) {
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002379 delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
2380 other_threads[i]->th.th_bar_arrive_time = 0;
2381 }
2382 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
2383 cur_time, delta, 0);
2384 }
2385 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
2386 loc, nproc);
2387 this_thr->th.th_frame_time = cur_time;
2388 break;
2389 }
Jim Cownie5f037e12014-10-07 16:25:50 +00002390 }
2391#endif /* USE_ITT_BUILD */
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002392 }
2393#if USE_ITT_BUILD
2394 else {
2395 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
2396 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
2397 }
2398#endif /* USE_ITT_BUILD */
Jim Cownie5f037e12014-10-07 16:25:50 +00002399
2400#if KMP_DEBUG
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002401 if (KMP_MASTER_TID(tid)) {
2402 KA_TRACE(
2403 15,
2404 ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",
2405 gtid, team_id, tid, nproc));
2406 }
Jim Cownie5f037e12014-10-07 16:25:50 +00002407#endif /* KMP_DEBUG */
2408
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002409 // TODO now, mark worker threads as done so they may be disbanded
2410 KMP_MB(); // Flush all pending memory write invalidates.
2411 KA_TRACE(10,
2412 ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid));
Andrey Churbanov82ccfaa2015-04-29 16:42:24 +00002413
Jim Cownie5f037e12014-10-07 16:25:50 +00002414}
2415
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002416// TODO release worker threads' fork barriers as we are ready instead of all at
2417// once
2418void __kmp_fork_barrier(int gtid, int tid) {
2419 KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier);
2420 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
2421 kmp_info_t *this_thr = __kmp_threads[gtid];
2422 kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
Jim Cownie5f037e12014-10-07 16:25:50 +00002423#if USE_ITT_BUILD
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002424 void *itt_sync_obj = NULL;
Jim Cownie5f037e12014-10-07 16:25:50 +00002425#endif /* USE_ITT_BUILD */
Alexandre Ganea09d984a2024-01-23 12:03:12 -05002426#ifdef KMP_DEBUG
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002427 if (team)
Alexandre Ganea09d984a2024-01-23 12:03:12 -05002428 KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n", gtid,
2429 (team != NULL) ? team->t.t_id : -1, tid));
2430#endif
tlwilmaracdce612021-03-05 09:21:39 -06002431 // th_team pointer only valid for primary thread here
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002432 if (KMP_MASTER_TID(tid)) {
Jim Cownie5f037e12014-10-07 16:25:50 +00002433#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002434 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
2435 // Create itt barrier object
2436 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1);
2437 __kmp_itt_barrier_middle(gtid, itt_sync_obj); // Call acquired/releasing
2438 }
Jim Cownie5f037e12014-10-07 16:25:50 +00002439#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
2440
2441#ifdef KMP_DEBUG
Hansang Baea3918ee2021-03-11 17:34:06 -06002442 KMP_DEBUG_ASSERT(team);
Ed Mastee632f142017-07-07 21:06:05 +00002443 kmp_info_t **other_threads = team->t.t_threads;
2444 int i;
Jim Cownie5f037e12014-10-07 16:25:50 +00002445
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002446 // Verify state
2447 KMP_MB();
Jim Cownie5f037e12014-10-07 16:25:50 +00002448
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002449 for (i = 1; i < team->t.t_nproc; ++i) {
2450 KA_TRACE(500,
2451 ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
2452 "== %u.\n",
2453 gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,
2454 team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,
2455 other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go));
2456 KMP_DEBUG_ASSERT(
2457 (TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) &
2458 ~(KMP_BARRIER_SLEEP_STATE)) == KMP_INIT_BARRIER_STATE);
2459 KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team);
2460 }
Jim Cownie5f037e12014-10-07 16:25:50 +00002461#endif
2462
Jonathan Peytona64c7e62024-05-07 08:41:51 -05002463 if (__kmp_tasking_mode != tskm_immediate_exec)
2464 __kmp_task_team_setup(this_thr, team);
Jim Cownie5f037e12014-10-07 16:25:50 +00002465
tlwilmaracdce612021-03-05 09:21:39 -06002466 /* The primary thread may have changed its blocktime between join barrier
2467 and fork barrier. Copy the blocktime info to the thread, where
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002468 __kmp_wait_template() can access it when the team struct is not
2469 guaranteed to exist. */
2470 // See note about the corresponding code in __kmp_join_barrier() being
2471 // performance-critical
2472 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
2473#if KMP_USE_MONITOR
2474 this_thr->th.th_team_bt_intervals =
2475 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
2476 this_thr->th.th_team_bt_set =
2477 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
2478#else
Jonathan Peyton52430c62017-09-05 15:45:48 +00002479 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
Jim Cownie5f037e12014-10-07 16:25:50 +00002480#endif
Jim Cownie5f037e12014-10-07 16:25:50 +00002481 }
tlwilmaracdce612021-03-05 09:21:39 -06002482 } // primary thread
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002483
2484 switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {
Terry Wilmarth70439502021-07-15 10:28:47 -05002485 case bp_dist_bar: {
2486 __kmp_dist_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
2487 TRUE USE_ITT_BUILD_ARG(NULL));
2488 break;
2489 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002490 case bp_hyper_bar: {
2491 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
2492 __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
2493 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
2494 break;
2495 }
2496 case bp_hierarchical_bar: {
2497 __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
2498 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
2499 break;
2500 }
2501 case bp_tree_bar: {
2502 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
2503 __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
2504 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
2505 break;
2506 }
2507 default: {
2508 __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
2509 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
2510 }
2511 }
2512
Joachim Protze6caf9352017-11-01 10:08:30 +00002513#if OMPT_SUPPORT
Hansang Bae3a33e252024-07-16 09:52:20 -05002514 ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
Jonas Hahnfeld69997662018-02-23 16:46:25 +00002515 if (ompt_enabled.enabled &&
Hansang Bae3a33e252024-07-16 09:52:20 -05002516 (ompt_state == ompt_state_wait_barrier_teams ||
2517 ompt_state == ompt_state_wait_barrier_implicit_parallel)) {
Jonas Hahnfeld69997662018-02-23 16:46:25 +00002518 int ds_tid = this_thr->th.th_info.ds.ds_tid;
2519 ompt_data_t *task_data = (team)
2520 ? OMPT_CUR_TASK_DATA(this_thr)
2521 : &(this_thr->th.ompt_thread_info.task_data);
Joachim Protze96ae9972018-12-18 08:52:30 +00002522 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
Joachim Protze6caf9352017-11-01 10:08:30 +00002523#if OMPT_OPTIONAL
Jonas Hahnfeld69997662018-02-23 16:46:25 +00002524 void *codeptr = NULL;
2525 if (KMP_MASTER_TID(ds_tid) &&
2526 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
2527 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
Hansang Baea3918ee2021-03-11 17:34:06 -06002528 codeptr = team ? team->t.ompt_team_info.master_return_address : NULL;
Hansang Bae3a33e252024-07-16 09:52:20 -05002529 ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
2530 if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
2531 sync_kind = ompt_sync_region_barrier_teams;
Jonas Hahnfeld69997662018-02-23 16:46:25 +00002532 if (ompt_enabled.ompt_callback_sync_region_wait) {
2533 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
Hansang Bae3a33e252024-07-16 09:52:20 -05002534 sync_kind, ompt_scope_end, NULL, task_data, codeptr);
Jonas Hahnfeld69997662018-02-23 16:46:25 +00002535 }
2536 if (ompt_enabled.ompt_callback_sync_region) {
2537 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
Hansang Bae3a33e252024-07-16 09:52:20 -05002538 sync_kind, ompt_scope_end, NULL, task_data, codeptr);
Jonas Hahnfeld69997662018-02-23 16:46:25 +00002539 }
Joachim Protze6caf9352017-11-01 10:08:30 +00002540#endif
Jonas Hahnfeld69997662018-02-23 16:46:25 +00002541 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
2542 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
Shilei Tiand0678fa2021-02-20 12:46:04 -05002543 ompt_scope_end, NULL, task_data, 0, ds_tid,
2544 ompt_task_implicit); // TODO: Can this be ompt_task_initial?
Joachim Protze6caf9352017-11-01 10:08:30 +00002545 }
2546 }
2547#endif
2548
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002549 // Early exit for reaping threads releasing forkjoin barrier
2550 if (TCR_4(__kmp_global.g.g_done)) {
2551 this_thr->th.th_task_team = NULL;
Jim Cownie5f037e12014-10-07 16:25:50 +00002552
2553#if USE_ITT_BUILD && USE_ITT_NOTIFY
2554 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002555 if (!KMP_MASTER_TID(tid)) {
2556 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
2557 if (itt_sync_obj)
2558 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
2559 }
Jim Cownie5f037e12014-10-07 16:25:50 +00002560 }
2561#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002562 KA_TRACE(10, ("__kmp_fork_barrier: T#%d is leaving early\n", gtid));
2563 return;
2564 }
2565
2566 /* We can now assume that a valid team structure has been allocated by the
tlwilmaracdce612021-03-05 09:21:39 -06002567 primary thread and propagated to all worker threads. The current thread,
2568 however, may not be part of the team, so we can't blindly assume that the
2569 team pointer is non-null. */
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002570 team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team);
2571 KMP_DEBUG_ASSERT(team != NULL);
2572 tid = __kmp_tid_from_gtid(gtid);
2573
2574#if KMP_BARRIER_ICV_PULL
tlwilmaracdce612021-03-05 09:21:39 -06002575 /* Primary thread's copy of the ICVs was set up on the implicit taskdata in
2576 __kmp_reinitialize_team. __kmp_fork_call() assumes the primary thread's
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002577 implicit task has this data before this function is called. We cannot
tlwilmaracdce612021-03-05 09:21:39 -06002578 modify __kmp_fork_call() to look at the fixed ICVs in the primary thread's
2579 thread struct, because it is not always the case that the threads arrays
2580 have been allocated when __kmp_fork_call() is executed. */
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002581 {
2582 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
tlwilmaracdce612021-03-05 09:21:39 -06002583 if (!KMP_MASTER_TID(tid)) { // primary thread already has ICVs
2584 // Copy the initial ICVs from the primary thread's thread struct to the
2585 // implicit task for this tid.
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002586 KA_TRACE(10,
2587 ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
2588 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team,
2589 tid, FALSE);
2590 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
2591 &team->t.t_threads[0]
2592 ->th.th_bar[bs_forkjoin_barrier]
2593 .bb.th_fixed_icvs);
2594 }
2595 }
2596#endif // KMP_BARRIER_ICV_PULL
2597
2598 if (__kmp_tasking_mode != tskm_immediate_exec) {
2599 __kmp_task_team_sync(this_thr, team);
2600 }
2601
Jonathan Peyton4d794cd2019-07-12 21:45:36 +00002602#if KMP_AFFINITY_SUPPORTED
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002603 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
2604 if (proc_bind == proc_bind_intel) {
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002605 // Call dynamic affinity settings
Jonathan Peytonf0e7a632022-08-01 17:03:27 -05002606 if (__kmp_affinity.type == affinity_balanced && team->t.t_size_changed) {
Jonathan Peyton2b8f4842018-09-26 20:43:23 +00002607 __kmp_balanced_affinity(this_thr, team->t.t_nproc);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002608 }
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002609 } else if (proc_bind != proc_bind_false) {
2610 if (this_thr->th.th_new_place == this_thr->th.th_current_place) {
2611 KA_TRACE(100, ("__kmp_fork_barrier: T#%d already in correct place %d\n",
2612 __kmp_gtid_from_thread(this_thr),
2613 this_thr->th.th_current_place));
2614 } else {
Jonathan Peytonf32212d2023-07-31 13:46:44 -05002615 __kmp_affinity_bind_place(gtid);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002616 }
2617 }
Jonathan Peyton4d794cd2019-07-12 21:45:36 +00002618#endif // KMP_AFFINITY_SUPPORTED
Jonathan Peyton211f7642018-12-13 23:14:24 +00002619 // Perform the display affinity functionality
2620 if (__kmp_display_affinity) {
2621 if (team->t.t_display_affinity
2622#if KMP_AFFINITY_SUPPORTED
Jonathan Peytonf0e7a632022-08-01 17:03:27 -05002623 || (__kmp_affinity.type == affinity_balanced && team->t.t_size_changed)
Jonathan Peyton211f7642018-12-13 23:14:24 +00002624#endif
Shilei Tiand0678fa2021-02-20 12:46:04 -05002625 ) {
Jonathan Peyton211f7642018-12-13 23:14:24 +00002626 // NULL means use the affinity-format-var ICV
2627 __kmp_aux_display_affinity(gtid, NULL);
2628 this_thr->th.th_prev_num_threads = team->t.t_nproc;
2629 this_thr->th.th_prev_level = team->t.t_level;
2630 }
2631 }
Jonathan Peytone023a4c2018-09-07 18:25:49 +00002632 if (!KMP_MASTER_TID(tid))
2633 KMP_CHECK_UPDATE(this_thr->th.th_def_allocator, team->t.t_def_allocator);
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002634
2635#if USE_ITT_BUILD && USE_ITT_NOTIFY
2636 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
2637 if (!KMP_MASTER_TID(tid)) {
2638 // Get correct barrier object
2639 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
2640 __kmp_itt_barrier_finished(gtid, itt_sync_obj); // Workers call acquired
2641 } // (prepare called inside barrier_release)
2642 }
2643#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002644 KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid,
2645 team->t.t_id, tid));
Jim Cownie5f037e12014-10-07 16:25:50 +00002646}
2647
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002648void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
2649 kmp_internal_control_t *new_icvs, ident_t *loc) {
2650 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy);
Jim Cownie5f037e12014-10-07 16:25:50 +00002651
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002652 KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
2653 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
Jim Cownie5f037e12014-10-07 16:25:50 +00002654
tlwilmaracdce612021-03-05 09:21:39 -06002655/* Primary thread's copy of the ICVs was set up on the implicit taskdata in
2656 __kmp_reinitialize_team. __kmp_fork_call() assumes the primary thread's
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002657 implicit task has this data before this function is called. */
Jim Cownie5f037e12014-10-07 16:25:50 +00002658#if KMP_BARRIER_ICV_PULL
tlwilmaracdce612021-03-05 09:21:39 -06002659 /* Copy ICVs to primary thread's thread structure into th_fixed_icvs (which
2660 remains untouched), where all of the worker threads can access them and
2661 make their own copies after the barrier. */
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002662 KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be
2663 // allocated at this point
2664 copy_icvs(
2665 &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs,
2666 new_icvs);
2667 KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 0,
2668 team->t.t_threads[0], team));
Jim Cownie5f037e12014-10-07 16:25:50 +00002669#elif KMP_BARRIER_ICV_PUSH
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002670 // The ICVs will be propagated in the fork barrier, so nothing needs to be
2671 // done here.
2672 KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 0,
2673 team->t.t_threads[0], team));
Jim Cownie5f037e12014-10-07 16:25:50 +00002674#else
tlwilmaracdce612021-03-05 09:21:39 -06002675 // Copy the ICVs to each of the non-primary threads. This takes O(nthreads)
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002676 // time.
2677 ngo_load(new_icvs);
2678 KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be
2679 // allocated at this point
tlwilmaracdce612021-03-05 09:21:39 -06002680 for (int f = 1; f < new_nproc; ++f) { // Skip the primary thread
Jonathan Peytona770c6d2017-05-12 18:01:32 +00002681 // TODO: GEH - pass in better source location info since usually NULL here
2682 KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
2683 f, team->t.t_threads[f], team));
2684 __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE);
2685 ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs);
2686 KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
2687 f, team->t.t_threads[f], team));
2688 }
2689 ngo_sync();
Jim Cownie5f037e12014-10-07 16:25:50 +00002690#endif // KMP_BARRIER_ICV_PULL
2691}